From 1f5026a7e21e409c2b9dd54f6dfb9446511fb7c5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Jul 2011 09:58:09 +0200
Subject: memblock: Kill MEMBLOCK_ERROR

25818f0f28 (memblock: Make MEMBLOCK_ERROR be 0) thankfully made
MEMBLOCK_ERROR 0 and there already are codes which expect error return
to be 0.  There's no point in keeping MEMBLOCK_ERROR around.  End its
misery.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1310457490-3356-6-git-send-email-tj@kernel.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/memblock.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 7525e38c434d..d235ec5fe678 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,8 +2,6 @@
 #define _LINUX_MEMBLOCK_H
 #ifdef __KERNEL__
 
-#define MEMBLOCK_ERROR	0
-
 #ifdef CONFIG_HAVE_MEMBLOCK
 /*
  * Logical memory blocks.
@@ -164,7 +162,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 #else
 static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
 {
-	return MEMBLOCK_ERROR;
+	return 0;
 }
 
 #endif /* CONFIG_HAVE_MEMBLOCK */
-- 
cgit v1.2.3


From fc769a8e70a3348d5de49e5f69f6aff810157360 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Jul 2011 09:58:10 +0200
Subject: memblock: Replace memblock_find_base() with memblock_find_in_range()

memblock_find_base() is a static function with two callers in
memblock.c and memblock_find_in_range() is a wrapper around it which
just changes the types and order of parameters.

Make memblock_find_in_range() take phys_addr_t instead of u64 for
consistency and replace memblock_find_base() with it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1310457490-3356-7-git-send-email-tj@kernel.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/memblock.h |  3 ++-
 mm/memblock.c            | 19 +++++++------------
 2 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index d235ec5fe678..349688899cb0 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -46,7 +46,8 @@ extern int memblock_can_resize;
 #define memblock_dbg(fmt, ...) \
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
-u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align);
+phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
+				   phys_addr_t size, phys_addr_t align);
 int memblock_free_reserved_regions(void);
 int memblock_reserve_reserved_regions(void);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 196993661346..0f9626f01b5e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -97,8 +97,11 @@ static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_
 	return 0;
 }
 
-static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
-			phys_addr_t align, phys_addr_t start, phys_addr_t end)
+/*
+ * Find a free area with specified alignment in a specific range.
+ */
+phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, phys_addr_t end,
+					phys_addr_t size, phys_addr_t align)
 {
 	long i;
 
@@ -132,14 +135,6 @@ static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
 	return 0;
 }
 
-/*
- * Find a free area with specified alignment in a specific range.
- */
-u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align)
-{
-	return memblock_find_base(size, align, start, end);
-}
-
 /*
  * Free memblock.reserved.regions
  */
@@ -216,7 +211,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
 		new_array = kmalloc(new_size, GFP_KERNEL);
 		addr = new_array ? __pa(new_array) : 0;
 	} else
-		addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE);
+		addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t));
 	if (!addr) {
 		pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
 		       memblock_type_name(type), type->max, type->max * 2);
@@ -477,7 +472,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph
 	 */
 	size = round_up(size, align);
 
-	found = memblock_find_base(size, align, 0, max_addr);
+	found = memblock_find_in_range(0, max_addr, size, align);
 	if (found && !memblock_add_region(&memblock.reserved, found, size))
 		return found;
 
-- 
cgit v1.2.3


From 5dfe8660a3d7f1ee1265c3536433ee53da3f98a3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 14 Jul 2011 09:46:10 +0200
Subject: bootmem: Replace work_with_active_regions() with
 for_each_mem_pfn_range()

Callback based iteration is cumbersome and much less useful than
for_each_*() iterator.  This patch implements for_each_mem_pfn_range()
which replaces work_with_active_regions().  All the current users of
work_with_active_regions() are converted.

This simplifies walking over early_node_map and will allow converting
internal logics in page_alloc to use iterator instead of walking
early_node_map directly, which in turn will enable moving node
information to memblock.

powerpc change is only compile tested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20110714074610.GD3455@htj.dyndns.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/powerpc/mm/numa.c    | 50 ++++++++++++++---------------------------------
 arch/x86/mm/memblock.c    | 23 ++++------------------
 drivers/pci/intel-iommu.c | 24 +++++++++--------------
 include/linux/mm.h        | 22 +++++++++++++++++++--
 mm/page_alloc.c           | 40 +++++++++++++++++++++++++------------
 5 files changed, 76 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 2164006fe170..6f06ea53bca2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -127,45 +127,25 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
 }
 
 /*
- * get_active_region_work_fn - A helper function for get_node_active_region
- *	Returns datax set to the start_pfn and end_pfn if they contain
- *	the initial value of datax->start_pfn between them
- * @start_pfn: start page(inclusive) of region to check
- * @end_pfn: end page(exclusive) of region to check
- * @datax: comes in with ->start_pfn set to value to search for and
- *	goes out with active range if it contains it
- * Returns 1 if search value is in range else 0
- */
-static int __init get_active_region_work_fn(unsigned long start_pfn,
-					unsigned long end_pfn, void *datax)
-{
-	struct node_active_region *data;
-	data = (struct node_active_region *)datax;
-
-	if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) {
-		data->start_pfn = start_pfn;
-		data->end_pfn = end_pfn;
-		return 1;
-	}
-	return 0;
-
-}
-
-/*
- * get_node_active_region - Return active region containing start_pfn
+ * get_node_active_region - Return active region containing pfn
  * Active range returned is empty if none found.
- * @start_pfn: The page to return the region for.
- * @node_ar: Returned set to the active region containing start_pfn
+ * @pfn: The page to return the region for
+ * @node_ar: Returned set to the active region containing @pfn
  */
-static void __init get_node_active_region(unsigned long start_pfn,
-		       struct node_active_region *node_ar)
+static void __init get_node_active_region(unsigned long pfn,
+					  struct node_active_region *node_ar)
 {
-	int nid = early_pfn_to_nid(start_pfn);
+	unsigned long start_pfn, end_pfn;
+	int i, nid;
 
-	node_ar->nid = nid;
-	node_ar->start_pfn = start_pfn;
-	node_ar->end_pfn = start_pfn;
-	work_with_active_regions(nid, get_active_region_work_fn, node_ar);
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+		if (pfn >= start_pfn && pfn < end_pfn) {
+			node_ar->nid = nid;
+			node_ar->start_pfn = start_pfn;
+			node_ar->end_pfn = end_pfn;
+			break;
+		}
+	}
 }
 
 static void map_cpu_to_node(int cpu, int node)
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
index e126117d1b03..da0d5c84586e 100644
--- a/arch/x86/mm/memblock.c
+++ b/arch/x86/mm/memblock.c
@@ -115,28 +115,13 @@ static void __init memblock_x86_subtract_reserved(struct range *range, int az)
 	memblock_reserve_reserved_regions();
 }
 
-struct count_data {
-	int nr;
-};
-
-static int __init count_work_fn(unsigned long start_pfn,
-				unsigned long end_pfn, void *datax)
-{
-	struct count_data *data = datax;
-
-	data->nr++;
-
-	return 0;
-}
-
 static int __init count_early_node_map(int nodeid)
 {
-	struct count_data data;
-
-	data.nr = 0;
-	work_with_active_regions(nodeid, count_work_fn, &data);
+	int i, cnt = 0;
 
-	return data.nr;
+	for_each_mem_pfn_range(i, nodeid, NULL, NULL, NULL)
+		cnt++;
+	return cnt;
 }
 
 int __init __get_free_all_memory_range(struct range **rangep, int nodeid,
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index f02c34d26d1b..8ec352077e1a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2178,18 +2178,6 @@ static inline void iommu_prepare_isa(void)
 
 static int md_domain_init(struct dmar_domain *domain, int guest_width);
 
-static int __init si_domain_work_fn(unsigned long start_pfn,
-				    unsigned long end_pfn, void *datax)
-{
-	int *ret = datax;
-
-	*ret = iommu_domain_identity_map(si_domain,
-					 (uint64_t)start_pfn << PAGE_SHIFT,
-					 (uint64_t)end_pfn << PAGE_SHIFT);
-	return *ret;
-
-}
-
 static int __init si_domain_init(int hw)
 {
 	struct dmar_drhd_unit *drhd;
@@ -2221,9 +2209,15 @@ static int __init si_domain_init(int hw)
 		return 0;
 
 	for_each_online_node(nid) {
-		work_with_active_regions(nid, si_domain_work_fn, &ret);
-		if (ret)
-			return ret;
+		unsigned long start_pfn, end_pfn;
+		int i;
+
+		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
+			ret = iommu_domain_identity_map(si_domain,
+					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+			if (ret)
+				return ret;
+		}
 	}
 
 	return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c70a326b8f26..57e4c9ffdff8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1327,9 +1327,27 @@ int add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid);
 u64 __init find_memory_core_early(int nid, u64 size, u64 align,
 					u64 goal, u64 limit);
-typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
-extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
 extern void sparse_memory_present_with_active_regions(int nid);
+
+extern void __next_mem_pfn_range(int *idx, int nid,
+				 unsigned long *out_start_pfn,
+				 unsigned long *out_end_pfn, int *out_nid);
+
+/**
+ * for_each_mem_pfn_range - early memory pfn range iterator
+ * @i: an integer used as loop variable
+ * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @p_start: ptr to ulong for start pfn of the range, can be %NULL
+ * @p_end: ptr to ulong for end pfn of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ *
+ * Walks over configured memory ranges.  Available after early_node_map is
+ * populated.
+ */
+#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid)		\
+	for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
+	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
+
 #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 
 #if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c7f0e5be4a31..69fffabf61b7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3903,18 +3903,6 @@ int __init add_from_early_node_map(struct range *range, int az,
 	return nr_range;
 }
 
-void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
-{
-	int i;
-	int ret;
-
-	for_each_active_range_index_in_nid(i, nid) {
-		ret = work_fn(early_node_map[i].start_pfn,
-			      early_node_map[i].end_pfn, data);
-		if (ret)
-			break;
-	}
-}
 /**
  * sparse_memory_present_with_active_regions - Call memory_present for each active range
  * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -4421,6 +4409,34 @@ static inline void setup_nr_node_ids(void)
 }
 #endif
 
+/*
+ * Common iterator interface used to define for_each_mem_pfn_range().
+ */
+void __meminit __next_mem_pfn_range(int *idx, int nid,
+				    unsigned long *out_start_pfn,
+				    unsigned long *out_end_pfn, int *out_nid)
+{
+	struct node_active_region *r = NULL;
+
+	while (++*idx < nr_nodemap_entries) {
+		if (nid == MAX_NUMNODES || nid == early_node_map[*idx].nid) {
+			r = &early_node_map[*idx];
+			break;
+		}
+	}
+	if (!r) {
+		*idx = -1;
+		return;
+	}
+
+	if (out_start_pfn)
+		*out_start_pfn = r->start_pfn;
+	if (out_end_pfn)
+		*out_end_pfn = r->end_pfn;
+	if (out_nid)
+		*out_nid = r->nid;
+}
+
 /**
  * add_active_range - Register a range of PFNs backed by physical memory
  * @nid: The node ID the range resides on
-- 
cgit v1.2.3


From f9b18db3b1cedc75e5d002a4d7097891c3399736 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Jul 2011 10:46:32 +0200
Subject: memblock: Don't allow archs to override memblock_nid_range()

memblock_nid_range() is used to implement memblock_[try_]alloc_nid().
The generic version determines the range by walking early_node_map
with for_each_mem_pfn_range().  The generic version is defined __weak
to allow arch override.

Currently, only sparc overrides it; however, with the previous update
to the generic implementation, there isn't much to be gained with arch
override.  Sparc would behave exactly the same with the generic
implementation.

This patch disallows arch override for memblock_nid_range() and make
both generic and sparc versions static.

sparc is only compile tested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1310460395-30913-6-git-send-email-tj@kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/sparc/mm/init_64.c  | 4 ++--
 include/linux/memblock.h | 1 -
 mm/memblock.c            | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 3fd8e18bed80..8415f614ce0c 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -785,7 +785,7 @@ static int find_node(unsigned long addr)
 	return -1;
 }
 
-u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 memblock_nid_range(u64 start, u64 end, int *nid)
 {
 	*nid = find_node(start);
 	start += PAGE_SIZE;
@@ -803,7 +803,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid)
 	return start;
 }
 #else
-u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 memblock_nid_range(u64 start, u64 end, int *nid)
 {
 	*nid = 0;
 	return end;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 349688899cb0..329ffb26c1c9 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -89,7 +89,6 @@ extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
 extern void memblock_dump_all(void);
 
 /* Provided by the architecture */
-extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid);
 extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
 				   phys_addr_t addr2, phys_addr_t size2);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 97f3486ce4d6..22cd999b0d4e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -508,7 +508,7 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
  * have been done to populate it.
  */
 
-phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid)
+static phys_addr_t __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid)
 {
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
 	unsigned long start_pfn, end_pfn;
-- 
cgit v1.2.3


From e64980405cc6aa74ef178d8d9aa4018c867ceed1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Jul 2011 10:46:34 +0200
Subject: memblock: Separate out memblock_find_in_range_node()

Node affine memblock allocation logic is currently implemented across
memblock_alloc_nid() and memblock_alloc_nid_region().  This
reorganizes it such that it resembles that of non-NUMA allocation API.

Area finding is collected and moved into new exported function
memblock_find_in_range_node() which is symmetrical to non-NUMA
counterpart - it handles @start/@end and understands ANYWHERE and
ACCESSIBLE.  memblock_alloc_nid() now simply calls
memblock_find_in_range_node() and reserves the returned area.

This makes memblock_alloc[_try]_nid() observe ACCESSIBLE limit on node
affine allocations too (again, this doesn't make any difference for
the current sole user - sparc64).

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1310460395-30913-8-git-send-email-tj@kernel.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/memblock.h |  4 ++++
 mm/memblock.c            | 57 +++++++++++++++++++++++++++---------------------
 2 files changed, 36 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 329ffb26c1c9..7400d029df48 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -61,6 +61,10 @@ extern long memblock_reserve(phys_addr_t base, phys_addr_t size);
 /* The numa aware allocator is only available if
  * CONFIG_ARCH_POPULATES_NODE_MAP is set
  */
+extern phys_addr_t memblock_find_in_range_node(phys_addr_t start,
+					       phys_addr_t end,
+					       phys_addr_t size,
+					       phys_addr_t align, int nid);
 extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align,
 					int nid);
 extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
diff --git a/mm/memblock.c b/mm/memblock.c
index 447cf64304ba..a8edb422795b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -521,49 +521,56 @@ static phys_addr_t __init memblock_nid_range_rev(phys_addr_t start,
 	return start;
 }
 
-static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp,
+phys_addr_t __init memblock_find_in_range_node(phys_addr_t start,
+					       phys_addr_t end,
 					       phys_addr_t size,
 					       phys_addr_t align, int nid)
 {
-	phys_addr_t start, end;
+	struct memblock_type *mem = &memblock.memory;
+	int i;
 
-	start = mp->base;
-	end = start + mp->size;
+	BUG_ON(0 == size);
 
-	while (start < end) {
-		phys_addr_t this_start;
-		int this_nid;
+	/* Pump up max_addr */
+	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
+		end = memblock.current_limit;
 
-		this_start = memblock_nid_range_rev(start, end, &this_nid);
-		if (this_nid == nid) {
-			phys_addr_t ret = memblock_find_region(this_start, end, size, align);
-			if (ret &&
-			    !memblock_add_region(&memblock.reserved, ret, size))
-				return ret;
+	for (i = mem->cnt - 1; i >= 0; i--) {
+		struct memblock_region *r = &mem->regions[i];
+		phys_addr_t base = max(start, r->base);
+		phys_addr_t top = min(end, r->base + r->size);
+
+		while (base < top) {
+			phys_addr_t tbase, ret;
+			int tnid;
+
+			tbase = memblock_nid_range_rev(base, top, &tnid);
+			if (nid == MAX_NUMNODES || tnid == nid) {
+				ret = memblock_find_region(tbase, top, size, align);
+				if (ret)
+					return ret;
+			}
+			top = tbase;
 		}
-		end = this_start;
 	}
+
 	return 0;
 }
 
 phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
-	struct memblock_type *mem = &memblock.memory;
-	int i;
-
-	BUG_ON(0 == size);
+	phys_addr_t found;
 
-	/* We align the size to limit fragmentation. Without this, a lot of
+	/*
+	 * We align the size to limit fragmentation. Without this, a lot of
 	 * small allocs quickly eat up the whole reserve array on sparc
 	 */
 	size = round_up(size, align);
 
-	for (i = mem->cnt - 1; i >= 0; i--) {
-		phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i],
-					       size, align, nid);
-		if (ret)
-			return ret;
-	}
+	found = memblock_find_in_range_node(0, MEMBLOCK_ALLOC_ACCESSIBLE,
+					    size, align, nid);
+	if (found && !memblock_add_region(&memblock.reserved, found, size))
+		return found;
 
 	return 0;
 }
-- 
cgit v1.2.3


From eb40c4c27f1722f058e4713ccfedebac577d5190 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Jul 2011 10:46:35 +0200
Subject: memblock, x86: Replace memblock_x86_find_in_range_node() with generic
 memblock calls

With the previous changes, generic NUMA aware memblock API has feature
parity with memblock_x86_find_in_range_node().  There currently are
two users - x86 setup_node_data() and __alloc_memory_core_early() in
nobootmem.c.

This patch converts the former to use memblock_alloc_nid() and the
latter memblock_find_range_in_node(), and kills
memblock_x86_find_in_range_node() and related functions including
find_memory_early_core_early() in page_alloc.c.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1310460395-30913-9-git-send-email-tj@kernel.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/memblock.h |  1 -
 arch/x86/mm/memblock.c          | 15 ---------
 arch/x86/mm/numa.c              |  9 +-----
 include/linux/mm.h              |  2 --
 mm/nobootmem.c                  |  3 +-
 mm/page_alloc.c                 | 67 -----------------------------------------
 6 files changed, 2 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
index 0cd3800f33b9..161792ec524f 100644
--- a/arch/x86/include/asm/memblock.h
+++ b/arch/x86/include/asm/memblock.h
@@ -15,7 +15,6 @@ int get_free_all_memory_range(struct range **rangep, int nodeid);
 void memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
 					 unsigned long last_pfn);
 u64 memblock_x86_hole_size(u64 start, u64 end);
-u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
 u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
 u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
 bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align);
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
index da0d5c84586e..e4569f85b390 100644
--- a/arch/x86/mm/memblock.c
+++ b/arch/x86/mm/memblock.c
@@ -251,21 +251,6 @@ void __init memblock_x86_free_range(u64 start, u64 end)
 	memblock_free(start, end - start);
 }
 
-/*
- * Need to call this function after memblock_x86_register_active_regions,
- * so early_node_map[] is filled already.
- */
-u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align)
-{
-	u64 addr;
-	addr = find_memory_core_early(nid, size, align, start, end);
-	if (addr)
-		return addr;
-
-	/* Fallback, should already have start end within node range */
-	return memblock_find_in_range(start, end, size, align);
-}
-
 /*
  * Finds an active region in the address range from start_pfn to last_pfn and
  * returns its range in ei_startpfn and ei_endpfn for the memblock entry.
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fa1015de5cc0..824efadc5741 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -192,8 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 /* Initialize NODE_DATA for a node on the local memory */
 static void __init setup_node_data(int nid, u64 start, u64 end)
 {
-	const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
-	const u64 nd_high = PFN_PHYS(max_pfn_mapped);
 	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	bool remapped = false;
 	u64 nd_pa;
@@ -224,17 +222,12 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
 		nd_pa = __pa(nd);
 		remapped = true;
 	} else {
-		nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
-						nd_size, SMP_CACHE_BYTES);
-		if (!nd_pa)
-			nd_pa = memblock_find_in_range(nd_low, nd_high,
-						nd_size, SMP_CACHE_BYTES);
+		nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
 		if (!nd_pa) {
 			pr_err("Cannot find %zu bytes in node %d\n",
 			       nd_size, nid);
 			return;
 		}
-		memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
 		nd = __va(nd_pa);
 	}
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 57e4c9ffdff8..9ebc65ae6863 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1325,8 +1325,6 @@ extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 int add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid);
-u64 __init find_memory_core_early(int nid, u64 size, u64 align,
-					u64 goal, u64 limit);
 extern void sparse_memory_present_with_active_regions(int nid);
 
 extern void __next_mem_pfn_range(int *idx, int nid,
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 5b0eb06ecb4e..c78162668bc4 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -41,8 +41,7 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 	if (limit > memblock.current_limit)
 		limit = memblock.current_limit;
 
-	addr = find_memory_core_early(nid, size, align, goal, limit);
-
+	addr = memblock_find_in_range_node(goal, limit, size, align, nid);
 	if (!addr)
 		return NULL;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 902f03a4fd6b..8ab5e5e7fdad 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3779,73 +3779,6 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
 	}
 }
 
-#ifdef CONFIG_HAVE_MEMBLOCK
-/*
- * Basic iterator support. Return the last range of PFNs for a node
- * Note: nid == MAX_NUMNODES returns last region regardless of node
- */
-static int __meminit last_active_region_index_in_nid(int nid)
-{
-	int i;
-
-	for (i = nr_nodemap_entries - 1; i >= 0; i--)
-		if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
-			return i;
-
-	return -1;
-}
-
-/*
- * Basic iterator support. Return the previous active range of PFNs for a node
- * Note: nid == MAX_NUMNODES returns next region regardless of node
- */
-static int __meminit previous_active_region_index_in_nid(int index, int nid)
-{
-	for (index = index - 1; index >= 0; index--)
-		if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
-			return index;
-
-	return -1;
-}
-
-#define for_each_active_range_index_in_nid_reverse(i, nid) \
-	for (i = last_active_region_index_in_nid(nid); i != -1; \
-				i = previous_active_region_index_in_nid(i, nid))
-
-u64 __init find_memory_core_early(int nid, u64 size, u64 align,
-					u64 goal, u64 limit)
-{
-	int i;
-
-	/* Need to go over early_node_map to find out good range for node */
-	for_each_active_range_index_in_nid_reverse(i, nid) {
-		u64 addr;
-		u64 ei_start, ei_last;
-		u64 final_start, final_end;
-
-		ei_last = early_node_map[i].end_pfn;
-		ei_last <<= PAGE_SHIFT;
-		ei_start = early_node_map[i].start_pfn;
-		ei_start <<= PAGE_SHIFT;
-
-		final_start = max(ei_start, goal);
-		final_end = min(ei_last, limit);
-
-		if (final_start >= final_end)
-			continue;
-
-		addr = memblock_find_in_range(final_start, final_end, size, align);
-
-		if (!addr)
-			continue;
-
-		return addr;
-	}
-
-	return 0;
-}
-#endif
-
 int __init add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid)
 {
-- 
cgit v1.2.3


From ed7b56a799cade11f458cd83e1150af54a66b7e8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Jul 2011 11:15:54 +0200
Subject: memblock: Remove memblock_memory_can_coalesce()

Arch could implement memblock_memor_can_coalesce() to veto merging of
adjacent or overlapping memblock regions; however, no arch did and any
vetoing would trigger WARN_ON().  Memblock regions are supposed to
deal with proper memory anyway.  Remove the unused hook.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1310462166-31469-2-git-send-email-tj@kernel.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/memblock.h |  4 ----
 mm/memblock.c            | 29 -----------------------------
 2 files changed, 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 7400d029df48..aa5df9e11fff 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -92,10 +92,6 @@ extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
 
 extern void memblock_dump_all(void);
 
-/* Provided by the architecture */
-extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
-				   phys_addr_t addr2, phys_addr_t size2);
-
 /**
  * memblock_set_current_limit - Set the current allocation limit to allow
  *                         limiting allocations to what is currently
diff --git a/mm/memblock.c b/mm/memblock.c
index a8edb422795b..bd3a3a9591d4 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -251,12 +251,6 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
 	return 0;
 }
 
-extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
-					  phys_addr_t addr2, phys_addr_t size2)
-{
-	return 1;
-}
-
 static long __init_memblock memblock_add_region(struct memblock_type *type,
 						phys_addr_t base, phys_addr_t size)
 {
@@ -282,17 +276,6 @@ static long __init_memblock memblock_add_region(struct memblock_type *type,
 		 * of a block.
 		 */
 		if (base < rgn->base && end >= rgn->base) {
-			/* If we can't coalesce, create a new block */
-			if (!memblock_memory_can_coalesce(base, size,
-							  rgn->base,
-							  rgn->size)) {
-				/* Overlap & can't coalesce are mutually
-				 * exclusive, if you do that, be prepared
-				 * for trouble
-				 */
-				WARN_ON(end != rgn->base);
-				goto new_block;
-			}
 			/* We extend the bottom of the block down to our
 			 * base
 			 */
@@ -316,17 +299,6 @@ static long __init_memblock memblock_add_region(struct memblock_type *type,
 		 * top of a block
 		 */
 		if (base <= rend && end >= rend) {
-			/* If we can't coalesce, create a new block */
-			if (!memblock_memory_can_coalesce(rgn->base,
-							  rgn->size,
-							  base, size)) {
-				/* Overlap & can't coalesce are mutually
-				 * exclusive, if you do that, be prepared
-				 * for trouble
-				 */
-				WARN_ON(rend != base);
-				goto new_block;
-			}
 			/* We adjust our base down to enclose the
 			 * original block and destroy it. It will be
 			 * part of our new allocation. Since we've
@@ -349,7 +321,6 @@ static long __init_memblock memblock_add_region(struct memblock_type *type,
 		return 0;
 	}
 
- new_block:
 	/* If we are out of space, we fail. It's too late to resize the array
 	 * but then this shouldn't have happened in the first place.
 	 */
-- 
cgit v1.2.3


From 67e24bcb725cabd15ef577bf301275d03d6086d7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 14 Jul 2011 11:42:03 +0200
Subject: memblock: Use __meminit[data] instead of __init[data]

From 19ab281ed67b87a6623d725237a7333ca79f1e75 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 14 Jul 2011 11:22:16 +0200

memblock will be extended to include early_node_map[], which is also
used during memory hotplug.  Make memblock use __meminit[data] instead
of __init[data] so that memory hotplug code can safely reference it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20110714094203.GE3455@htj.dyndns.org
Reported-by: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/memblock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index aa5df9e11fff..434b958a4f5f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -152,8 +152,8 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 
 
 #ifdef ARCH_DISCARD_MEMBLOCK
-#define __init_memblock __init
-#define __initdata_memblock __initdata
+#define __init_memblock __meminit
+#define __initdata_memblock __meminitdata
 #else
 #define __init_memblock
 #define __initdata_memblock
-- 
cgit v1.2.3


From 7c0caeb866b0f648d91bb75b8bc6f86af95bb033 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 14 Jul 2011 11:43:42 +0200
Subject: memblock: Add optional region->nid

From 83103b92f3234ec830852bbc5c45911bd6cbdb20 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 14 Jul 2011 11:22:16 +0200

Add optional region->nid which can be enabled by arch using
CONFIG_HAVE_MEMBLOCK_NODE_MAP.  When enabled, memblock also carries
NUMA node information and replaces early_node_map[].

Newly added memblocks have MAX_NUMNODES as nid.  Arch can then call
memblock_set_node() to set node information.  memblock takes care of
merging and node affine allocations w.r.t. node information.

When MEMBLOCK_NODE_MAP is enabled, early_node_map[], related data
structures and functions to manipulate and iterate it are disabled.
memblock version of __next_mem_pfn_range() is provided such that
for_each_mem_pfn_range() behaves the same and its users don't have to
be updated.

-v2: Yinghai spotted section mismatch caused by missing
     __init_memblock in memblock_set_node().  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20110714094342.GF3455@htj.dyndns.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/memblock.h |  26 +++++++++
 include/linux/mm.h       |   2 +
 mm/Kconfig               |   3 +
 mm/memblock.c            | 142 +++++++++++++++++++++++++++++++++++++++++------
 mm/page_alloc.c          |  47 +++++++++-------
 5 files changed, 183 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 434b958a4f5f..c36a55d3c1c2 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -24,6 +24,9 @@
 struct memblock_region {
 	phys_addr_t base;
 	phys_addr_t size;
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+	int nid;
+#endif
 };
 
 struct memblock_type {
@@ -58,6 +61,29 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size);
 extern long memblock_free(phys_addr_t base, phys_addr_t size);
 extern long memblock_reserve(phys_addr_t base, phys_addr_t size);
 
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+extern int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
+
+static inline void memblock_set_region_node(struct memblock_region *r, int nid)
+{
+	r->nid = nid;
+}
+
+static inline int memblock_get_region_node(const struct memblock_region *r)
+{
+	return r->nid;
+}
+#else
+static inline void memblock_set_region_node(struct memblock_region *r, int nid)
+{
+}
+
+static inline int memblock_get_region_node(const struct memblock_region *r)
+{
+	return 0;
+}
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+
 /* The numa aware allocator is only available if
  * CONFIG_ARCH_POPULATES_NODE_MAP is set
  */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9ebc65ae6863..ceb1e4a1a736 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1307,12 +1307,14 @@ extern void free_area_init_node(int nid, unsigned long * zones_size,
  * CONFIG_ARCH_POPULATES_NODE_MAP
  */
 extern void free_area_init_nodes(unsigned long *max_zone_pfn);
+#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 void sort_node_map(void);
+#endif
 unsigned long node_map_pfn_alignment(void);
 unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
 						unsigned long end_pfn);
diff --git a/mm/Kconfig b/mm/Kconfig
index 8ca47a5ee9c8..30a5d4792b83 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -131,6 +131,9 @@ config SPARSEMEM_VMEMMAP
 config HAVE_MEMBLOCK
 	boolean
 
+config HAVE_MEMBLOCK_NODE_MAP
+	boolean
+
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
diff --git a/mm/memblock.c b/mm/memblock.c
index 992aa1807473..e815f4b75809 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -161,12 +161,8 @@ int __init_memblock memblock_reserve_reserved_regions(void)
 
 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
 {
-	unsigned long i;
-
-	for (i = r; i < type->cnt - 1; i++) {
-		type->regions[i].base = type->regions[i + 1].base;
-		type->regions[i].size = type->regions[i + 1].size;
-	}
+	memmove(&type->regions[r], &type->regions[r + 1],
+		(type->cnt - (r + 1)) * sizeof(type->regions[r]));
 	type->cnt--;
 
 	/* Special case for empty arrays */
@@ -174,6 +170,7 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
 		type->cnt = 1;
 		type->regions[0].base = 0;
 		type->regions[0].size = 0;
+		memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
 	}
 }
 
@@ -266,7 +263,9 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
 		struct memblock_region *this = &type->regions[i];
 		struct memblock_region *next = &type->regions[i + 1];
 
-		if (this->base + this->size != next->base) {
+		if (this->base + this->size != next->base ||
+		    memblock_get_region_node(this) !=
+		    memblock_get_region_node(next)) {
 			BUG_ON(this->base + this->size > next->base);
 			i++;
 			continue;
@@ -290,7 +289,7 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
  */
 static void __init_memblock memblock_insert_region(struct memblock_type *type,
 						   int idx, phys_addr_t base,
-						   phys_addr_t size)
+						   phys_addr_t size, int nid)
 {
 	struct memblock_region *rgn = &type->regions[idx];
 
@@ -298,6 +297,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
 	memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
 	rgn->base = base;
 	rgn->size = size;
+	memblock_set_region_node(rgn, nid);
 	type->cnt++;
 }
 
@@ -327,6 +327,7 @@ static long __init_memblock memblock_add_region(struct memblock_type *type,
 		WARN_ON(type->cnt != 1);
 		type->regions[0].base = base;
 		type->regions[0].size = size;
+		memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
 		return 0;
 	}
 repeat:
@@ -355,7 +356,7 @@ repeat:
 			nr_new++;
 			if (insert)
 				memblock_insert_region(type, i++, base,
-						       rbase - base);
+						rbase - base, MAX_NUMNODES);
 		}
 		/* area below @rend is dealt with, forget about it */
 		base = min(rend, end);
@@ -365,7 +366,8 @@ repeat:
 	if (base < end) {
 		nr_new++;
 		if (insert)
-			memblock_insert_region(type, i, base, end - base);
+			memblock_insert_region(type, i, base, end - base,
+					       MAX_NUMNODES);
 	}
 
 	/*
@@ -459,6 +461,101 @@ long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
 	return memblock_add_region(_rgn, base, size);
 }
 
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+/*
+ * Common iterator interface used to define for_each_mem_range().
+ */
+void __init_memblock __next_mem_pfn_range(int *idx, int nid,
+				unsigned long *out_start_pfn,
+				unsigned long *out_end_pfn, int *out_nid)
+{
+	struct memblock_type *type = &memblock.memory;
+	struct memblock_region *r;
+
+	while (++*idx < type->cnt) {
+		r = &type->regions[*idx];
+
+		if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
+			continue;
+		if (nid == MAX_NUMNODES || nid == r->nid)
+			break;
+	}
+	if (*idx >= type->cnt) {
+		*idx = -1;
+		return;
+	}
+
+	if (out_start_pfn)
+		*out_start_pfn = PFN_UP(r->base);
+	if (out_end_pfn)
+		*out_end_pfn = PFN_DOWN(r->base + r->size);
+	if (out_nid)
+		*out_nid = r->nid;
+}
+
+/**
+ * memblock_set_node - set node ID on memblock regions
+ * @base: base of area to set node ID for
+ * @size: size of area to set node ID for
+ * @nid: node ID to set
+ *
+ * Set the nid of memblock memory regions in [@base,@base+@size) to @nid.
+ * Regions which cross the area boundaries are split as necessary.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
+				      int nid)
+{
+	struct memblock_type *type = &memblock.memory;
+	phys_addr_t end = base + size;
+	int i;
+
+	/* we'll create at most two more regions */
+	while (type->cnt + 2 > type->max)
+		if (memblock_double_array(type) < 0)
+			return -ENOMEM;
+
+	for (i = 0; i < type->cnt; i++) {
+		struct memblock_region *rgn = &type->regions[i];
+		phys_addr_t rbase = rgn->base;
+		phys_addr_t rend = rbase + rgn->size;
+
+		if (rbase >= end)
+			break;
+		if (rend <= base)
+			continue;
+
+		if (rbase < base) {
+			/*
+			 * @rgn intersects from below.  Split and continue
+			 * to process the next region - the new top half.
+			 */
+			rgn->base = base;
+			rgn->size = rend - rgn->base;
+			memblock_insert_region(type, i, rbase, base - rbase,
+					       rgn->nid);
+		} else if (rend > end) {
+			/*
+			 * @rgn intersects from above.  Split and redo the
+			 * current region - the new bottom half.
+			 */
+			rgn->base = end;
+			rgn->size = rend - rgn->base;
+			memblock_insert_region(type, i--, rbase, end - rbase,
+					       rgn->nid);
+		} else {
+			/* @rgn is fully contained, set ->nid */
+			rgn->nid = nid;
+		}
+	}
+
+	memblock_merge_regions(type);
+	return 0;
+}
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+
 phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
 {
 	phys_addr_t found;
@@ -689,19 +786,26 @@ void __init_memblock memblock_set_current_limit(phys_addr_t limit)
 	memblock.current_limit = limit;
 }
 
-static void __init_memblock memblock_dump(struct memblock_type *region, char *name)
+static void __init_memblock memblock_dump(struct memblock_type *type, char *name)
 {
 	unsigned long long base, size;
 	int i;
 
-	pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);
-
-	for (i = 0; i < region->cnt; i++) {
-		base = region->regions[i].base;
-		size = region->regions[i].size;
+	pr_info(" %s.cnt  = 0x%lx\n", name, type->cnt);
 
-		pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes\n",
-		    name, i, base, base + size - 1, size);
+	for (i = 0; i < type->cnt; i++) {
+		struct memblock_region *rgn = &type->regions[i];
+		char nid_buf[32] = "";
+
+		base = rgn->base;
+		size = rgn->size;
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+		if (memblock_get_region_node(rgn) != MAX_NUMNODES)
+			snprintf(nid_buf, sizeof(nid_buf), " on node %d",
+				 memblock_get_region_node(rgn));
+#endif
+		pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s\n",
+			name, i, base, base + size - 1, size, nid_buf);
 	}
 }
 
@@ -759,11 +863,13 @@ void __init memblock_init(void)
 	 */
 	memblock.memory.regions[0].base = 0;
 	memblock.memory.regions[0].size = 0;
+	memblock_set_region_node(&memblock.memory.regions[0], MAX_NUMNODES);
 	memblock.memory.cnt = 1;
 
 	/* Ditto. */
 	memblock.reserved.regions[0].base = 0;
 	memblock.reserved.regions[0].size = 0;
+	memblock_set_region_node(&memblock.reserved.regions[0], MAX_NUMNODES);
 	memblock.reserved.cnt = 1;
 
 	memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8ab5e5e7fdad..3c7ea45ffba9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -182,28 +182,31 @@ static unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
 
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
-  /*
-   * MAX_ACTIVE_REGIONS determines the maximum number of distinct
-   * ranges of memory (RAM) that may be registered with add_active_range().
-   * Ranges passed to add_active_range() will be merged if possible
-   * so the number of times add_active_range() can be called is
-   * related to the number of nodes and the number of holes
-   */
-  #ifdef CONFIG_MAX_ACTIVE_REGIONS
-    /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */
-    #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
-  #else
-    #if MAX_NUMNODES >= 32
-      /* If there can be many nodes, allow up to 50 holes per node */
-      #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
+  #ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+    /*
+     * MAX_ACTIVE_REGIONS determines the maximum number of distinct ranges
+     * of memory (RAM) that may be registered with add_active_range().
+     * Ranges passed to add_active_range() will be merged if possible so
+     * the number of times add_active_range() can be called is related to
+     * the number of nodes and the number of holes
+     */
+    #ifdef CONFIG_MAX_ACTIVE_REGIONS
+      /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */
+      #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
     #else
-      /* By default, allow up to 256 distinct regions */
-      #define MAX_ACTIVE_REGIONS 256
+      #if MAX_NUMNODES >= 32
+        /* If there can be many nodes, allow up to 50 holes per node */
+        #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
+      #else
+        /* By default, allow up to 256 distinct regions */
+        #define MAX_ACTIVE_REGIONS 256
+      #endif
     #endif
-  #endif
 
-  static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
-  static int __meminitdata nr_nodemap_entries;
+    static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
+    static int __meminitdata nr_nodemap_entries;
+#endif /* !CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+
   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __initdata required_kernelcore;
@@ -4268,6 +4271,7 @@ static inline void setup_nr_node_ids(void)
 }
 #endif
 
+#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 /*
  * Common iterator interface used to define for_each_mem_pfn_range().
  */
@@ -4456,6 +4460,11 @@ void __init sort_node_map(void)
 			sizeof(struct node_active_region),
 			cmp_node_active_region, NULL);
 }
+#else /* !CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+static inline void sort_node_map(void)
+{
+}
+#endif
 
 /**
  * node_map_pfn_alignment - determine the maximum internode alignment
-- 
cgit v1.2.3


From 35fd0808d7d8d001cd72f112e3bca84664b596a3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Jul 2011 11:15:59 +0200
Subject: memblock: Implement for_each_free_mem_range()

Implement for_each_free_mem_range() which iterates over free memory
areas according to memblock (memory && !reserved).  This will be used
to simplify memblock users.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1310462166-31469-7-git-send-email-tj@kernel.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/memblock.h | 20 +++++++++++++
 mm/memblock.c            | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c36a55d3c1c2..31def584cceb 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -61,6 +61,26 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size);
 extern long memblock_free(phys_addr_t base, phys_addr_t size);
 extern long memblock_reserve(phys_addr_t base, phys_addr_t size);
 
+extern void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
+				  phys_addr_t *out_end, int *out_nid);
+
+/**
+ * for_each_free_mem_range - iterate through free memblock areas
+ * @i: u64 used as loop variable
+ * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ *
+ * Walks over free (memory && !reserved) areas of memblock.  Available as
+ * soon as memblock is initialized.
+ */
+#define for_each_free_mem_range(i, nid, p_start, p_end, p_nid)		\
+	for (i = 0,							\
+	     __next_free_mem_range(&i, nid, p_start, p_end, p_nid);	\
+	     i != (u64)ULLONG_MAX;					\
+	     __next_free_mem_range(&i, nid, p_start, p_end, p_nid))
+
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 extern int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index e815f4b75809..c4a8750406fc 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -461,6 +461,82 @@ long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
 	return memblock_add_region(_rgn, base, size);
 }
 
+/**
+ * __next_free_mem_range - next function for for_each_free_mem_range()
+ * @idx: pointer to u64 loop variable
+ * @nid: nid: node selector, %MAX_NUMNODES for all nodes
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ *
+ * Find the first free area from *@idx which matches @nid, fill the out
+ * parameters, and update *@idx for the next iteration.  The lower 32bit of
+ * *@idx contains index into memory region and the upper 32bit indexes the
+ * areas before each reserved region.  For example, if reserved regions
+ * look like the following,
+ *
+ *	0:[0-16), 1:[32-48), 2:[128-130)
+ *
+ * The upper 32bit indexes the following regions.
+ *
+ *	0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
+ *
+ * As both region arrays are sorted, the function advances the two indices
+ * in lockstep and returns each intersection.
+ */
+void __init_memblock __next_free_mem_range(u64 *idx, int nid,
+					   phys_addr_t *out_start,
+					   phys_addr_t *out_end, int *out_nid)
+{
+	struct memblock_type *mem = &memblock.memory;
+	struct memblock_type *rsv = &memblock.reserved;
+	int mi = *idx & 0xffffffff;
+	int ri = *idx >> 32;
+
+	for ( ; mi < mem->cnt; mi++) {
+		struct memblock_region *m = &mem->regions[mi];
+		phys_addr_t m_start = m->base;
+		phys_addr_t m_end = m->base + m->size;
+
+		/* only memory regions are associated with nodes, check it */
+		if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
+			continue;
+
+		/* scan areas before each reservation for intersection */
+		for ( ; ri < rsv->cnt + 1; ri++) {
+			struct memblock_region *r = &rsv->regions[ri];
+			phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
+			phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
+
+			/* if ri advanced past mi, break out to advance mi */
+			if (r_start >= m_end)
+				break;
+			/* if the two regions intersect, we're done */
+			if (m_start < r_end) {
+				if (out_start)
+					*out_start = max(m_start, r_start);
+				if (out_end)
+					*out_end = min(m_end, r_end);
+				if (out_nid)
+					*out_nid = memblock_get_region_node(m);
+				/*
+				 * The region which ends first is advanced
+				 * for the next iteration.
+				 */
+				if (m_end <= r_end)
+					mi++;
+				else
+					ri++;
+				*idx = (u32)mi | (u64)ri << 32;
+				return;
+			}
+		}
+	}
+
+	/* signal end of iteration */
+	*idx = ULLONG_MAX;
+}
+
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 /*
  * Common iterator interface used to define for_each_mem_range().
-- 
cgit v1.2.3


From 64a02daacbc880bac1d6b3aeefbcd226a9341fa7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Jul 2011 11:16:01 +0200
Subject: memblock, x86: Make free_all_memory_core_early() explicitly free
 lowmem only

nomemblock is currently used only by x86 and on x86_32
free_all_memory_core_early() silently freed only the low mem because
get_free_all_memory_range() in arch/x86/mm/memblock.c implicitly
limited range to max_low_pfn.

Rename free_all_memory_core_early() to free_low_memory_core_early()
and make it call __get_free_all_memory_range() and limit the range to
max_low_pfn explicitly.  This makes things clearer and also is
consistent with the bootmem behavior.

This leaves get_free_all_memory_range() without any user.  Kill it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1310462166-31469-9-git-send-email-tj@kernel.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/memblock.h |  1 -
 arch/x86/mm/memblock.c          | 10 ----------
 arch/x86/mm/numa_64.c           |  2 +-
 include/linux/bootmem.h         |  2 +-
 mm/nobootmem.c                  |  8 ++++----
 5 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
index d2a5a59bd358..6c72ecaee577 100644
--- a/arch/x86/include/asm/memblock.h
+++ b/arch/x86/include/asm/memblock.h
@@ -8,7 +8,6 @@ void memblock_x86_free_range(u64 start, u64 end);
 struct range;
 int __get_free_all_memory_range(struct range **range, int nodeid,
 			 unsigned long start_pfn, unsigned long end_pfn);
-int get_free_all_memory_range(struct range **rangep, int nodeid);
 
 u64 memblock_x86_hole_size(u64 start, u64 end);
 u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
index 648d47d52a86..0e8442a9baff 100644
--- a/arch/x86/mm/memblock.c
+++ b/arch/x86/mm/memblock.c
@@ -89,16 +89,6 @@ int __init __get_free_all_memory_range(struct range **rangep, int nodeid,
 	return nr_range;
 }
 
-int __init get_free_all_memory_range(struct range **rangep, int nodeid)
-{
-	unsigned long end_pfn = -1UL;
-
-#ifdef CONFIG_X86_32
-	end_pfn = max_low_pfn;
-#endif
-	return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn);
-}
-
 static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free)
 {
 	int i, count;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dd27f401f0a0..92e27119ee1a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -19,7 +19,7 @@ unsigned long __init numa_free_all_bootmem(void)
 	for_each_online_node(i)
 		pages += free_all_bootmem_node(NODE_DATA(i));
 
-	pages += free_all_memory_core_early(MAX_NUMNODES);
+	pages += free_low_memory_core_early(MAX_NUMNODES);
 
 	return pages;
 }
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index ab344a521105..66d3e954eb6c 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -44,7 +44,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 				       unsigned long endpfn);
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
 
-unsigned long free_all_memory_core_early(int nodeid);
+extern unsigned long free_low_memory_core_early(int nodeid);
 extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
 extern unsigned long free_all_bootmem(void);
 
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index c78162668bc4..2037a8a04761 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -106,7 +106,7 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
 		__free_pages_bootmem(pfn_to_page(i), 0);
 }
 
-unsigned long __init free_all_memory_core_early(int nodeid)
+unsigned long __init free_low_memory_core_early(int nodeid)
 {
 	int i;
 	u64 start, end;
@@ -114,7 +114,7 @@ unsigned long __init free_all_memory_core_early(int nodeid)
 	struct range *range = NULL;
 	int nr_range;
 
-	nr_range = get_free_all_memory_range(&range, nodeid);
+	nr_range = __get_free_all_memory_range(&range, nodeid, 0, max_low_pfn);
 
 	for (i = 0; i < nr_range; i++) {
 		start = range[i].start;
@@ -136,7 +136,7 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
 	register_page_bootmem_info_node(pgdat);
 
-	/* free_all_memory_core_early(MAX_NUMNODES) will be called later */
+	/* free_low_memory_core_early(MAX_NUMNODES) will be called later */
 	return 0;
 }
 
@@ -154,7 +154,7 @@ unsigned long __init free_all_bootmem(void)
 	 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
 	 *  will be used instead of only Node0 related
 	 */
-	return free_all_memory_core_early(MAX_NUMNODES);
+	return free_low_memory_core_early(MAX_NUMNODES);
 }
 
 /**
-- 
cgit v1.2.3


From c378ddd53f9b8832a46fd4fec050a97fc2269858 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 14 Jul 2011 11:46:03 +0200
Subject: memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option

From 6839454ae63f1eb21e515c10229ca95c22955fec Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 14 Jul 2011 11:22:17 +0200

Make ARCH_DISCARD_MEMBLOCK a config option so that it can be handled
together with other MEMBLOCK options.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20110714094603.GH3455@htj.dyndns.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Kconfig                | 1 +
 arch/x86/include/asm/memblock.h | 2 --
 include/linux/memblock.h        | 2 +-
 mm/Kconfig                      | 3 +++
 mm/memblock.c                   | 2 +-
 5 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 97f08941dd79..28116d4f7b64 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,6 +26,7 @@ config X86
 	select HAVE_KPROBES
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
+	select ARCH_DISCARD_MEMBLOCK
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_FRAME_POINTERS
 	select HAVE_DMA_ATTRS
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
index 17a882e90ada..bc5667081aea 100644
--- a/arch/x86/include/asm/memblock.h
+++ b/arch/x86/include/asm/memblock.h
@@ -1,8 +1,6 @@
 #ifndef _X86_MEMBLOCK_H
 #define _X86_MEMBLOCK_H
 
-#define ARCH_DISCARD_MEMBLOCK
-
 void memblock_x86_reserve_range(u64 start, u64 end, char *name);
 void memblock_x86_free_range(u64 start, u64 end);
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 31def584cceb..2491355bb6e4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -197,7 +197,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 	     region++)
 
 
-#ifdef ARCH_DISCARD_MEMBLOCK
+#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
 #define __init_memblock __meminit
 #define __initdata_memblock __meminitdata
 #else
diff --git a/mm/Kconfig b/mm/Kconfig
index 30a5d4792b83..7c5697116fcf 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -134,6 +134,9 @@ config HAVE_MEMBLOCK
 config HAVE_MEMBLOCK_NODE_MAP
 	boolean
 
+config ARCH_DISCARD_MEMBLOCK
+	boolean
+
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
diff --git a/mm/memblock.c b/mm/memblock.c
index c4a8750406fc..ebc6119f1280 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -959,7 +959,7 @@ static int __init early_memblock(char *p)
 }
 early_param("memblock", early_memblock);
 
-#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK)
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK)
 
 static int memblock_debug_show(struct seq_file *m, void *private)
 {
-- 
cgit v1.2.3


From 24aa07882b672fff2da2f5c955759f0bd13d32d5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Jul 2011 11:16:06 +0200
Subject: memblock, x86: Replace memblock_x86_reserve/free_range() with generic
 ones

Other than sanity check and debug message, the x86 specific version of
memblock reserve/free functions are simple wrappers around the generic
versions - memblock_reserve/free().

This patch adds debug messages with caller identification to the
generic versions and replaces x86 specific ones and kills them.
arch/x86/include/asm/memblock.h and arch/x86/mm/memblock.c are empty
after this change and removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1310462166-31469-14-git-send-email-tj@kernel.org
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/memblock.h |  7 -------
 arch/x86/kernel/aperture_64.c   |  2 +-
 arch/x86/kernel/check.c         |  2 +-
 arch/x86/kernel/head.c          |  2 +-
 arch/x86/kernel/head32.c        |  5 +++--
 arch/x86/kernel/head64.c        |  5 +++--
 arch/x86/kernel/mpparse.c       |  6 ++----
 arch/x86/kernel/setup.c         | 17 ++++++++---------
 arch/x86/kernel/trampoline.c    |  2 +-
 arch/x86/mm/Makefile            |  2 --
 arch/x86/mm/init.c              |  6 +++---
 arch/x86/mm/memblock.c          | 34 ----------------------------------
 arch/x86/mm/memtest.c           |  2 +-
 arch/x86/mm/numa.c              |  5 ++---
 arch/x86/mm/numa_32.c           |  6 +++---
 arch/x86/mm/numa_emulation.c    |  4 ++--
 arch/x86/platform/efi/efi.c     |  6 ++----
 arch/x86/xen/mmu.c              | 12 ++++--------
 arch/x86/xen/setup.c            |  7 +++----
 include/linux/memblock.h        |  2 --
 mm/memblock.c                   |  5 +++++
 mm/nobootmem.c                  |  6 +++---
 22 files changed, 48 insertions(+), 97 deletions(-)
 delete mode 100644 arch/x86/include/asm/memblock.h
 delete mode 100644 arch/x86/mm/memblock.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
deleted file mode 100644
index bc5667081aea..000000000000
--- a/arch/x86/include/asm/memblock.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _X86_MEMBLOCK_H
-#define _X86_MEMBLOCK_H
-
-void memblock_x86_reserve_range(u64 start, u64 end, char *name);
-void memblock_x86_free_range(u64 start, u64 end);
-
-#endif
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 56363082bbdf..6e76c191a835 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -94,7 +94,7 @@ static u32 __init allocate_aperture(void)
 				addr, aper_size>>10);
 		return 0;
 	}
-	memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
+	memblock_reserve(addr, aper_size);
 	/*
 	 * Kmemleak should not scan this block as it may not be mapped via the
 	 * kernel direct mapping.
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 621cd23bb4e7..5da1269e8ddc 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -91,7 +91,7 @@ void __init setup_bios_corruption_check(void)
 		if (start >= end)
 			continue;
 
-		memblock_x86_reserve_range(start, end, "SCAN RAM");
+		memblock_reserve(start, end - start);
 		scan_areas[num_scan_areas].addr = start;
 		scan_areas[num_scan_areas].size = end - start;
 
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index af0699ba48cf..48d9d4ea1020 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void)
 		lowmem = 0x9f000;
 
 	/* reserve all memory between lowmem and the 1MB mark */
-	memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
+	memblock_reserve(lowmem, 0x100000 - lowmem);
 }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3bb08509a7a1..be9282bcda72 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -33,7 +33,8 @@ void __init i386_start_kernel(void)
 {
 	memblock_init();
 
-	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+	memblock_reserve(__pa_symbol(&_text),
+			 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* Reserve INITRD */
@@ -42,7 +43,7 @@ void __init i386_start_kernel(void)
 		u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 		u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 		u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
-		memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
+		memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
 	}
 #endif
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5655c2272adb..fd25b11549b8 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -100,7 +100,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
 
 	memblock_init();
 
-	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+	memblock_reserve(__pa_symbol(&_text),
+			 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* Reserve INITRD */
@@ -109,7 +110,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
 		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
 		unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
 		unsigned long ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
-		memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
+		memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
 	}
 #endif
 
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 8faeaa0ed2cc..a6b79c16ec78 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early)
 
 static void __init smp_reserve_memory(struct mpf_intel *mpf)
 {
-	unsigned long size = get_mpc_size(mpf->physptr);
-
-	memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
+	memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
 }
 
 static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
 			       mpf, (u64)virt_to_phys(mpf));
 
 			mem = virt_to_phys(mpf);
-			memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
+			memblock_reserve(mem, sizeof(*mpf));
 			if (mpf->physptr)
 				smp_reserve_memory(mpf);
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 31ffe20d5d27..97d227ec995d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -306,7 +306,8 @@ static void __init cleanup_highmap(void)
 static void __init reserve_brk(void)
 {
 	if (_brk_end > _brk_start)
-		memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK");
+		memblock_reserve(__pa(_brk_start),
+				 __pa(_brk_end) - __pa(_brk_start));
 
 	/* Mark brk area as locked down and no longer taking any
 	   new allocations */
@@ -337,7 +338,7 @@ static void __init relocate_initrd(void)
 
 	/* Note: this includes all the lowmem currently occupied by
 	   the initrd, we rely on that fact to keep the data intact. */
-	memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK");
+	memblock_reserve(ramdisk_here, area_size);
 	initrd_start = ramdisk_here + PAGE_OFFSET;
 	initrd_end   = initrd_start + ramdisk_size;
 	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -393,7 +394,7 @@ static void __init reserve_initrd(void)
 	initrd_start = 0;
 
 	if (ramdisk_size >= (end_of_lowmem>>1)) {
-		memblock_x86_free_range(ramdisk_image, ramdisk_end);
+		memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
 		printk(KERN_ERR "initrd too large to handle, "
 		       "disabling initrd\n");
 		return;
@@ -416,7 +417,7 @@ static void __init reserve_initrd(void)
 
 	relocate_initrd();
 
-	memblock_x86_free_range(ramdisk_image, ramdisk_end);
+	memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
 }
 #else
 static void __init reserve_initrd(void)
@@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void)
 {
 	struct setup_data *data;
 	u64 pa_data;
-	char buf[32];
 
 	if (boot_params.hdr.version < 0x0209)
 		return;
 	pa_data = boot_params.hdr.setup_data;
 	while (pa_data) {
 		data = early_memremap(pa_data, sizeof(*data));
-		sprintf(buf, "setup data %x", data->type);
-		memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
+		memblock_reserve(pa_data, sizeof(*data) + data->len);
 		pa_data = data->next;
 		early_iounmap(data, sizeof(*data));
 	}
@@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void)
 			return;
 		}
 	}
-	memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL");
+	memblock_reserve(crash_base, crash_size);
 
 	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
 			"for crashkernel (System RAM: %ldMB)\n",
@@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void)
 	addr = find_ibft_region(&size);
 
 	if (size)
-		memblock_x86_reserve_range(addr, addr + size, "* ibft");
+		memblock_reserve(addr, size);
 }
 
 static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a1f13ddb06e0..a73b61055ad6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -18,7 +18,7 @@ void __init setup_trampolines(void)
 		panic("Cannot allocate trampoline\n");
 
 	x86_trampoline_base = __va(mem);
-	memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE");
+	memblock_reserve(mem, size);
 
 	printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
 	       x86_trampoline_base, (unsigned long long)mem, size);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3d11327c9ab4..23d8e5fecf76 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -27,6 +27,4 @@ obj-$(CONFIG_AMD_NUMA)		+= amdtopology.o
 obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
-obj-$(CONFIG_HAVE_MEMBLOCK)		+= memblock.o
-
 obj-$(CONFIG_MEMTEST)		+= memtest.o
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 13cf05a61605..0b736b99d925 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,7 +81,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 
 void __init native_pagetable_reserve(u64 start, u64 end)
 {
-	memblock_x86_reserve_range(start, end, "PGTABLE");
+	memblock_reserve(start, end - start);
 }
 
 struct map_range {
@@ -280,8 +280,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 	 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
 	 * so that they can be reused for other purposes.
 	 *
-	 * On native it just means calling memblock_x86_reserve_range, on Xen it
-	 * also means marking RW the pagetable pages that we allocated before
+	 * On native it just means calling memblock_reserve, on Xen it also
+	 * means marking RW the pagetable pages that we allocated before
 	 * but that haven't been used.
 	 *
 	 * In fact on xen we mark RO the whole range pgt_buf_start -
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
deleted file mode 100644
index 7325c5d8ace5..000000000000
--- a/arch/x86/mm/memblock.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/memblock.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-#include <linux/range.h>
-
-void __init memblock_x86_reserve_range(u64 start, u64 end, char *name)
-{
-	if (start == end)
-		return;
-
-	if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end))
-		return;
-
-	memblock_dbg("    memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name);
-
-	memblock_reserve(start, end - start);
-}
-
-void __init memblock_x86_free_range(u64 start, u64 end)
-{
-	if (start == end)
-		return;
-
-	if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end))
-		return;
-
-	memblock_dbg("       memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1);
-
-	memblock_free(start, end - start);
-}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 46a5ff25eda4..c80b9fb95734 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -34,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
 	       (unsigned long long) pattern,
 	       (unsigned long long) start_bad,
 	       (unsigned long long) end_bad);
-	memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM");
+	memblock_reserve(start_bad, end_bad - start_bad);
 }
 
 static void __init memtest(u64 pattern, u64 start_phys, u64 size)
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 88e562729967..496f494593bf 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -364,8 +364,7 @@ void __init numa_reset_distance(void)
 
 	/* numa_distance could be 1LU marking allocation failure, test cnt */
 	if (numa_distance_cnt)
-		memblock_x86_free_range(__pa(numa_distance),
-					__pa(numa_distance) + size);
+		memblock_free(__pa(numa_distance), size);
 	numa_distance_cnt = 0;
 	numa_distance = NULL;	/* enable table creation */
 }
@@ -394,7 +393,7 @@ static int __init numa_alloc_distance(void)
 		numa_distance = (void *)1LU;
 		return -ENOMEM;
 	}
-	memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
+	memblock_reserve(phys, size);
 
 	numa_distance = __va(phys);
 	numa_distance_cnt = cnt;
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 58878b536ef2..534255a36b6b 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -204,7 +204,7 @@ void __init init_alloc_remap(int nid, u64 start, u64 end)
 			   size, nid);
 		return;
 	}
-	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
+	memblock_reserve(node_pa, size);
 
 	remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
 					  max_low_pfn << PAGE_SHIFT,
@@ -212,10 +212,10 @@ void __init init_alloc_remap(int nid, u64 start, u64 end)
 	if (!remap_pa) {
 		pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
 			   size, nid);
-		memblock_x86_free_range(node_pa, node_pa + size);
+		memblock_free(node_pa, size);
 		return;
 	}
-	memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
+	memblock_reserve(remap_pa, size);
 	remap_va = phys_to_virt(remap_pa);
 
 	/* perform actual remap */
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 971fe70549b3..46db56845f18 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -361,7 +361,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 			pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
 			goto no_emu;
 		}
-		memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST");
+		memblock_reserve(phys, phys_size);
 		phys_dist = __va(phys);
 
 		for (i = 0; i < numa_dist_cnt; i++)
@@ -430,7 +430,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 
 	/* free the copied physical distance table */
 	if (phys_dist)
-		memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size);
+		memblock_free(__pa(phys_dist), phys_size);
 	return;
 
 no_emu:
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index a4c322ca1a5d..3b4e86bda3cb 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -280,8 +280,7 @@ void __init efi_memblock_x86_reserve_range(void)
 		boot_params.efi_info.efi_memdesc_size;
 	memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
 	memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
-	memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size,
-		      "EFI memmap");
+	memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
 }
 
 #if EFI_DEBUG
@@ -332,8 +331,7 @@ void __init efi_reserve_boot_services(void)
 					"[0x%010llx-0x%010llx]\n",
 						start, start+size-1);
 		} else
-			memblock_x86_reserve_range(start, start+size,
-							"EFI Boot");
+			memblock_reserve(start, size);
 	}
 }
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0ccccb67a993..ad54fa10f8a2 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1720,10 +1720,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 	__xen_write_cr3(true, __pa(pgd));
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 
-	memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
-		      __pa(xen_start_info->pt_base +
-			   xen_start_info->nr_pt_frames * PAGE_SIZE),
-		      "XEN PAGETABLES");
+	memblock_reserve(__pa(xen_start_info->pt_base),
+			 xen_start_info->nr_pt_frames * PAGE_SIZE);
 
 	return pgd;
 }
@@ -1799,10 +1797,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 			  PFN_DOWN(__pa(initial_page_table)));
 	xen_write_cr3(__pa(initial_page_table));
 
-	memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
-		      __pa(xen_start_info->pt_base +
-			   xen_start_info->nr_pt_frames * PAGE_SIZE),
-		      "XEN PAGETABLES");
+	memblock_reserve(__pa(xen_start_info->pt_base),
+			 xen_start_info->nr_pt_frames * PAGE_SIZE));
 
 	return initial_page_table;
 }
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 60aeeb56948f..73daaf75801a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -63,7 +63,7 @@ static void __init xen_add_extra_mem(unsigned long pages)
 	e820_add_region(extra_start, size, E820_RAM);
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
-	memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");
+	memblock_reserve(extra_start, size);
 
 	xen_extra_mem_size += size;
 
@@ -287,9 +287,8 @@ char * __init xen_memory_setup(void)
 	 *  - xen_start_info
 	 * See comment above "struct start_info" in <xen/interface/xen.h>
 	 */
-	memblock_x86_reserve_range(__pa(xen_start_info->mfn_list),
-		      __pa(xen_start_info->pt_base),
-			"XEN START INFO");
+	memblock_reserve(__pa(xen_start_info->mfn_list),
+			 xen_start_info->pt_base - xen_start_info->mfn_list);
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 2491355bb6e4..90746318cec4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -17,8 +17,6 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 
-#include <asm/memblock.h>
-
 #define INIT_MEMBLOCK_REGIONS	128
 
 struct memblock_region {
diff --git a/mm/memblock.c b/mm/memblock.c
index ebc6119f1280..0cb4da657b9d 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -449,6 +449,9 @@ long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
 
 long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
 {
+	memblock_dbg("   memblock_free: [%#016llx-%#016llx] %pF\n",
+		     base, base + size, (void *)_RET_IP_);
+
 	return __memblock_remove(&memblock.reserved, base, size);
 }
 
@@ -456,6 +459,8 @@ long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
 {
 	struct memblock_type *_rgn = &memblock.reserved;
 
+	memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n",
+		     base, base + size, (void *)_RET_IP_);
 	BUG_ON(0 == size);
 
 	return memblock_add_region(_rgn, base, size);
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7075bc00fa84..29d948ce6d0f 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -47,7 +47,7 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 
 	ptr = phys_to_virt(addr);
 	memset(ptr, 0, size);
-	memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
+	memblock_reserve(addr, size);
 	/*
 	 * The min_count is set to 0 so that bootmem allocated blocks
 	 * are never reported as leaks.
@@ -175,7 +175,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 			      unsigned long size)
 {
 	kmemleak_free_part(__va(physaddr), size);
-	memblock_x86_free_range(physaddr, physaddr + size);
+	memblock_free(physaddr, size);
 }
 
 /**
@@ -190,7 +190,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 void __init free_bootmem(unsigned long addr, unsigned long size)
 {
 	kmemleak_free_part(__va(addr), size);
-	memblock_x86_free_range(addr, addr + size);
+	memblock_free(addr, size);
 }
 
 static void * __init ___alloc_bootmem_nopanic(unsigned long size,
-- 
cgit v1.2.3


From 9ad63986c606c60e2e916b1b96f22991f966d9cc Mon Sep 17 00:00:00 2001
From: Dima Zavin <dima@android.com>
Date: Sun, 10 Jul 2011 16:01:15 -0700
Subject: pda_power: Add support for using otg transceiver events

If the platform data sets the use_otg_notifier flag,
the driver will now register an otg notifier callback and listen
to transceiver events for AC/USB plug-in events instead. This would
normally be used by not specifying is_xx_online callbacks and
not specifying any irqs so the state machine is completely driven
from OTG xceiver events.

Signed-off-by: Dima Zavin <dima@android.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/pda_power.c | 71 +++++++++++++++++++++++++++++++++++++++--------
 include/linux/pda_power.h |  2 ++
 2 files changed, 61 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c
index 69f8aa3a6a4b..81b720107c3a 100644
--- a/drivers/power/pda_power.c
+++ b/drivers/power/pda_power.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
+#include <linux/notifier.h>
 #include <linux/power_supply.h>
 #include <linux/pda_power.h>
 #include <linux/regulator/consumer.h>
@@ -38,9 +39,8 @@ static struct timer_list supply_timer;
 static struct timer_list polling_timer;
 static int polling;
 
-#ifdef CONFIG_USB_OTG_UTILS
 static struct otg_transceiver *transceiver;
-#endif
+static struct notifier_block otg_nb;
 static struct regulator *ac_draw;
 
 enum {
@@ -222,7 +222,42 @@ static void polling_timer_func(unsigned long unused)
 #ifdef CONFIG_USB_OTG_UTILS
 static int otg_is_usb_online(void)
 {
-	return (transceiver->state == OTG_STATE_B_PERIPHERAL);
+	return (transceiver->last_event == USB_EVENT_VBUS ||
+		transceiver->last_event == USB_EVENT_ENUMERATED);
+}
+
+static int otg_is_ac_online(void)
+{
+	return (transceiver->last_event == USB_EVENT_CHARGER);
+}
+
+static int otg_handle_notification(struct notifier_block *nb,
+		unsigned long event, void *unused)
+{
+	switch (event) {
+	case USB_EVENT_CHARGER:
+		ac_status = PDA_PSY_TO_CHANGE;
+		break;
+	case USB_EVENT_VBUS:
+	case USB_EVENT_ENUMERATED:
+		usb_status = PDA_PSY_TO_CHANGE;
+		break;
+	case USB_EVENT_NONE:
+		ac_status = PDA_PSY_TO_CHANGE;
+		usb_status = PDA_PSY_TO_CHANGE;
+		break;
+	default:
+		return NOTIFY_OK;
+	}
+
+	/*
+	 * Wait a bit before reading ac/usb line status and setting charger,
+	 * because ac/usb status readings may lag from irq.
+	 */
+	mod_timer(&charger_timer,
+		  jiffies + msecs_to_jiffies(pdata->wait_for_status));
+
+	return NOTIFY_OK;
 }
 #endif
 
@@ -282,6 +317,14 @@ static int pda_power_probe(struct platform_device *pdev)
 		ret = PTR_ERR(ac_draw);
 	}
 
+	transceiver = otg_get_transceiver();
+	if (transceiver && !pdata->is_usb_online) {
+		pdata->is_usb_online = otg_is_usb_online;
+	}
+	if (transceiver && !pdata->is_ac_online) {
+		pdata->is_ac_online = otg_is_ac_online;
+	}
+
 	if (pdata->is_ac_online) {
 		ret = power_supply_register(&pdev->dev, &pda_psy_ac);
 		if (ret) {
@@ -303,13 +346,6 @@ static int pda_power_probe(struct platform_device *pdev)
 		}
 	}
 
-#ifdef CONFIG_USB_OTG_UTILS
-	transceiver = otg_get_transceiver();
-	if (transceiver && !pdata->is_usb_online) {
-		pdata->is_usb_online = otg_is_usb_online;
-	}
-#endif
-
 	if (pdata->is_usb_online) {
 		ret = power_supply_register(&pdev->dev, &pda_psy_usb);
 		if (ret) {
@@ -331,6 +367,16 @@ static int pda_power_probe(struct platform_device *pdev)
 		}
 	}
 
+	if (transceiver && pdata->use_otg_notifier) {
+		otg_nb.notifier_call = otg_handle_notification;
+		ret = otg_register_notifier(transceiver, &otg_nb);
+		if (ret) {
+			dev_err(dev, "failure to register otg notifier\n");
+			goto otg_reg_notifier_failed;
+		}
+		polling = 0;
+	}
+
 	if (polling) {
 		dev_dbg(dev, "will poll for status\n");
 		setup_timer(&polling_timer, polling_timer_func, 0);
@@ -343,16 +389,17 @@ static int pda_power_probe(struct platform_device *pdev)
 
 	return 0;
 
+otg_reg_notifier_failed:
+	if (pdata->is_usb_online && usb_irq)
+		free_irq(usb_irq->start, &pda_psy_usb);
 usb_irq_failed:
 	if (pdata->is_usb_online)
 		power_supply_unregister(&pda_psy_usb);
 usb_supply_failed:
 	if (pdata->is_ac_online && ac_irq)
 		free_irq(ac_irq->start, &pda_psy_ac);
-#ifdef CONFIG_USB_OTG_UTILS
 	if (transceiver)
 		otg_put_transceiver(transceiver);
-#endif
 ac_irq_failed:
 	if (pdata->is_ac_online)
 		power_supply_unregister(&pda_psy_ac);
diff --git a/include/linux/pda_power.h b/include/linux/pda_power.h
index c9e4d814ff77..2bb62bf296ac 100644
--- a/include/linux/pda_power.h
+++ b/include/linux/pda_power.h
@@ -35,6 +35,8 @@ struct pda_power_pdata {
 	unsigned int polling_interval; /* msecs, default is 2000 */
 
 	unsigned long ac_max_uA; /* current to draw when on AC */
+
+	bool use_otg_notifier;
 };
 
 #endif /* __PDA_POWER_H__ */
-- 
cgit v1.2.3


From 854a68521badc48460c9cbcdf37b220865836ac3 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Tue, 27 Sep 2011 12:35:21 -0400
Subject: add ELF machine define for TI C6X DSPs

Signed-off-by: Mark Salter <msalter@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/elf-em.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 18bea78fe47b..8e2b7bac4378 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -33,6 +33,7 @@
 #define EM_H8_300	46	/* Renesas H8/300,300H,H8S */
 #define EM_MN10300	89	/* Panasonic/MEI MN10300, AM33 */
 #define EM_BLACKFIN     106     /* ADI Blackfin Processor */
+#define EM_TI_C6000	140	/* TI C6X DSPs */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
 #define EM_AVR32	0x18ad	/* Atmel AVR32 */
 
-- 
cgit v1.2.3


From a390e85cfe91c346ff4745bcd45ad0a7e7101aa2 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Thu, 6 Oct 2011 10:07:44 +0300
Subject: wl12xx: move common init code from bus modules to main

Move all common parts from sdio.c and spi.c to main.c, since they now
can be handled as part of the platform driver.

Signed-off-by: Felipe Balbi <balbi@ti.com>
[forward-ported, cleaned-up and rephrased commit message]
[added a bunch of fixes and a new pdata element]
[moved some new code into main.c as well]
Signed-off-by: Luciano Coelho <coelho@ti.com>
---
 drivers/net/wireless/wl12xx/io.c                   |  11 +-
 drivers/net/wireless/wl12xx/io.h                   |  14 +-
 drivers/net/wireless/wl12xx/main.c                 | 110 ++++++++++++-
 drivers/net/wireless/wl12xx/sdio.c                 | 174 ++++-----------------
 drivers/net/wireless/wl12xx/spi.c                  | 161 +++----------------
 drivers/net/wireless/wl12xx/wl12xx.h               |  17 +-
 drivers/net/wireless/wl12xx/wl12xx_platform_data.c |   4 +-
 include/linux/wl12xx.h                             |   5 +-
 8 files changed, 184 insertions(+), 312 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/wl12xx/io.c b/drivers/net/wireless/wl12xx/io.c
index c2da66f45046..1a7df8a7ed2d 100644
--- a/drivers/net/wireless/wl12xx/io.c
+++ b/drivers/net/wireless/wl12xx/io.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
+#include <linux/interrupt.h>
 
 #include "wl12xx.h"
 #include "wl12xx_80211.h"
@@ -46,7 +47,7 @@
 bool wl1271_set_block_size(struct wl1271 *wl)
 {
 	if (wl->if_ops->set_block_size) {
-		wl->if_ops->set_block_size(wl, WL12XX_BUS_BLOCK_SIZE);
+		wl->if_ops->set_block_size(wl->dev, WL12XX_BUS_BLOCK_SIZE);
 		return true;
 	}
 
@@ -55,12 +56,12 @@ bool wl1271_set_block_size(struct wl1271 *wl)
 
 void wl1271_disable_interrupts(struct wl1271 *wl)
 {
-	wl->if_ops->disable_irq(wl);
+	disable_irq(wl->irq);
 }
 
 void wl1271_enable_interrupts(struct wl1271 *wl)
 {
-	wl->if_ops->enable_irq(wl);
+	enable_irq(wl->irq);
 }
 
 /* Set the SPI partitions to access the chip addresses
@@ -128,13 +129,13 @@ EXPORT_SYMBOL_GPL(wl1271_set_partition);
 void wl1271_io_reset(struct wl1271 *wl)
 {
 	if (wl->if_ops->reset)
-		wl->if_ops->reset(wl);
+		wl->if_ops->reset(wl->dev);
 }
 
 void wl1271_io_init(struct wl1271 *wl)
 {
 	if (wl->if_ops->init)
-		wl->if_ops->init(wl);
+		wl->if_ops->init(wl->dev);
 }
 
 void wl1271_top_reg_write(struct wl1271 *wl, int addr, u16 val)
diff --git a/drivers/net/wireless/wl12xx/io.h b/drivers/net/wireless/wl12xx/io.h
index e839341dfafe..e82dad19aa30 100644
--- a/drivers/net/wireless/wl12xx/io.h
+++ b/drivers/net/wireless/wl12xx/io.h
@@ -51,23 +51,17 @@ void wl1271_enable_interrupts(struct wl1271 *wl);
 void wl1271_io_reset(struct wl1271 *wl);
 void wl1271_io_init(struct wl1271 *wl);
 
-static inline struct device *wl1271_wl_to_dev(struct wl1271 *wl)
-{
-	return wl->if_ops->dev(wl);
-}
-
-
 /* Raw target IO, address is not translated */
 static inline void wl1271_raw_write(struct wl1271 *wl, int addr, void *buf,
 				    size_t len, bool fixed)
 {
-	wl->if_ops->write(wl, addr, buf, len, fixed);
+	wl->if_ops->write(wl->dev, addr, buf, len, fixed);
 }
 
 static inline void wl1271_raw_read(struct wl1271 *wl, int addr, void *buf,
 				   size_t len, bool fixed)
 {
-	wl->if_ops->read(wl, addr, buf, len, fixed);
+	wl->if_ops->read(wl->dev, addr, buf, len, fixed);
 }
 
 static inline u32 wl1271_raw_read32(struct wl1271 *wl, int addr)
@@ -155,13 +149,13 @@ static inline void wl1271_write32(struct wl1271 *wl, int addr, u32 val)
 
 static inline void wl1271_power_off(struct wl1271 *wl)
 {
-	wl->if_ops->power(wl, false);
+	wl->if_ops->power(wl->dev, false);
 	clear_bit(WL1271_FLAG_GPIO_POWER, &wl->flags);
 }
 
 static inline int wl1271_power_on(struct wl1271 *wl)
 {
-	int ret = wl->if_ops->power(wl, true);
+	int ret = wl->if_ops->power(wl->dev, true);
 	if (ret == 0)
 		set_bit(WL1271_FLAG_GPIO_POWER, &wl->flags);
 
diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c
index 3262e8a6c475..1cf987785053 100644
--- a/drivers/net/wireless/wl12xx/main.c
+++ b/drivers/net/wireless/wl12xx/main.c
@@ -32,6 +32,7 @@
 #include <linux/slab.h>
 #include <linux/wl12xx.h>
 #include <linux/sched.h>
+#include <linux/interrupt.h>
 
 #include "wl12xx.h"
 #include "wl12xx_80211.h"
@@ -1067,7 +1068,7 @@ static int wl1271_fetch_firmware(struct wl1271 *wl)
 
 	wl1271_debug(DEBUG_BOOT, "booting firmware %s", fw_name);
 
-	ret = request_firmware(&fw, fw_name, wl1271_wl_to_dev(wl));
+	ret = request_firmware(&fw, fw_name, wl->dev);
 
 	if (ret < 0) {
 		wl1271_error("could not get firmware: %d", ret);
@@ -1105,7 +1106,7 @@ static int wl1271_fetch_nvs(struct wl1271 *wl)
 	const struct firmware *fw;
 	int ret;
 
-	ret = request_firmware(&fw, WL12XX_NVS_NAME, wl1271_wl_to_dev(wl));
+	ret = request_firmware(&fw, WL12XX_NVS_NAME, wl->dev);
 
 	if (ret < 0) {
 		wl1271_error("could not get nvs file: %d", ret);
@@ -4979,7 +4980,7 @@ int wl1271_init_ieee80211(struct wl1271 *wl)
 
 	wl->hw->wiphy->reg_notifier = wl1271_reg_notify;
 
-	SET_IEEE80211_DEV(wl->hw, wl1271_wl_to_dev(wl));
+	SET_IEEE80211_DEV(wl->hw, wl->dev);
 
 	wl->hw->sta_data_size = sizeof(struct wl1271_station);
 	wl->hw->vif_data_size = sizeof(struct wl12xx_vif);
@@ -5200,13 +5201,116 @@ int wl1271_free_hw(struct wl1271 *wl)
 }
 EXPORT_SYMBOL_GPL(wl1271_free_hw);
 
+static irqreturn_t wl12xx_hardirq(int irq, void *cookie)
+{
+	struct wl1271 *wl = cookie;
+	unsigned long flags;
+
+	wl1271_debug(DEBUG_IRQ, "IRQ");
+
+	/* complete the ELP completion */
+	spin_lock_irqsave(&wl->wl_lock, flags);
+	set_bit(WL1271_FLAG_IRQ_RUNNING, &wl->flags);
+	if (wl->elp_compl) {
+		complete(wl->elp_compl);
+		wl->elp_compl = NULL;
+	}
+
+	if (test_bit(WL1271_FLAG_SUSPENDED, &wl->flags)) {
+		/* don't enqueue a work right now. mark it as pending */
+		set_bit(WL1271_FLAG_PENDING_WORK, &wl->flags);
+		wl1271_debug(DEBUG_IRQ, "should not enqueue work");
+		disable_irq_nosync(wl->irq);
+		pm_wakeup_event(wl->dev, 0);
+		spin_unlock_irqrestore(&wl->wl_lock, flags);
+		return IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&wl->wl_lock, flags);
+
+	return IRQ_WAKE_THREAD;
+}
+
 static int __devinit wl12xx_probe(struct platform_device *pdev)
 {
+	struct wl12xx_platform_data *pdata = pdev->dev.platform_data;
+	struct ieee80211_hw *hw;
+	struct wl1271 *wl;
+	unsigned long irqflags;
+	int ret = -ENODEV;
+
+	hw = wl1271_alloc_hw();
+	if (IS_ERR(hw)) {
+		wl1271_error("can't allocate hw");
+		ret = PTR_ERR(hw);
+		goto out;
+	}
+
+	wl = hw->priv;
+	wl->irq = platform_get_irq(pdev, 0);
+	wl->ref_clock = pdata->board_ref_clock;
+	wl->tcxo_clock = pdata->board_tcxo_clock;
+	wl->platform_quirks = pdata->platform_quirks;
+	wl->set_power = pdata->set_power;
+	wl->dev = &pdev->dev;
+	wl->if_ops = pdata->ops;
+
+	platform_set_drvdata(pdev, wl);
+
+	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
+		irqflags = IRQF_TRIGGER_RISING;
+	else
+		irqflags = IRQF_TRIGGER_HIGH | IRQF_ONESHOT;
+
+	ret = request_threaded_irq(wl->irq, wl12xx_hardirq, wl1271_irq,
+				   irqflags,
+				   pdev->name, wl);
+	if (ret < 0) {
+		wl1271_error("request_irq() failed: %d", ret);
+		goto out_free_hw;
+	}
+
+	ret = enable_irq_wake(wl->irq);
+	if (!ret) {
+		wl->irq_wake_enabled = true;
+		device_init_wakeup(wl->dev, 1);
+		if (pdata->pwr_in_suspend)
+			hw->wiphy->wowlan.flags = WIPHY_WOWLAN_ANY;
+
+	}
+	disable_irq(wl->irq);
+
+	ret = wl1271_init_ieee80211(wl);
+	if (ret)
+		goto out_irq;
+
+	ret = wl1271_register_hw(wl);
+	if (ret)
+		goto out_irq;
+
 	return 0;
+
+out_irq:
+	free_irq(wl->irq, wl);
+
+out_free_hw:
+	wl1271_free_hw(wl);
+
+out:
+	return ret;
 }
 
 static int __devexit wl12xx_remove(struct platform_device *pdev)
 {
+	struct wl1271 *wl = platform_get_drvdata(pdev);
+
+	if (wl->irq_wake_enabled) {
+		device_init_wakeup(wl->dev, 0);
+		disable_irq_wake(wl->irq);
+	}
+	wl1271_unregister_hw(wl);
+	free_irq(wl->irq, wl);
+	wl1271_free_hw(wl);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/wl12xx/sdio.c b/drivers/net/wireless/wl12xx/sdio.c
index e7ee5d155d34..78e5352c4037 100644
--- a/drivers/net/wireless/wl12xx/sdio.c
+++ b/drivers/net/wireless/wl12xx/sdio.c
@@ -47,7 +47,6 @@
 
 struct wl12xx_sdio_glue {
 	struct device *dev;
-	struct wl1271 *wl;
 	struct platform_device *core;
 };
 
@@ -57,67 +56,22 @@ static const struct sdio_device_id wl1271_devices[] __devinitconst = {
 };
 MODULE_DEVICE_TABLE(sdio, wl1271_devices);
 
-static void wl1271_sdio_set_block_size(struct wl1271 *wl, unsigned int blksz)
+static void wl1271_sdio_set_block_size(struct device *child,
+				       unsigned int blksz)
 {
-	sdio_claim_host(wl->if_priv);
-	sdio_set_block_size(wl->if_priv, blksz);
-	sdio_release_host(wl->if_priv);
-}
-
-static inline struct wl12xx_sdio_glue *wl_to_glue(struct wl1271 *wl)
-{
-	return wl->if_priv;
-}
-
-static struct device *wl1271_sdio_wl_to_dev(struct wl1271 *wl)
-{
-	return wl_to_glue(wl)->dev;
-}
-
-static irqreturn_t wl1271_hardirq(int irq, void *cookie)
-{
-	struct wl1271 *wl = cookie;
-	unsigned long flags;
-
-	wl1271_debug(DEBUG_IRQ, "IRQ");
-
-	/* complete the ELP completion */
-	spin_lock_irqsave(&wl->wl_lock, flags);
-	set_bit(WL1271_FLAG_IRQ_RUNNING, &wl->flags);
-	if (wl->elp_compl) {
-		complete(wl->elp_compl);
-		wl->elp_compl = NULL;
-	}
-
-	if (test_bit(WL1271_FLAG_SUSPENDED, &wl->flags)) {
-		/* don't enqueue a work right now. mark it as pending */
-		set_bit(WL1271_FLAG_PENDING_WORK, &wl->flags);
-		wl1271_debug(DEBUG_IRQ, "should not enqueue work");
-		disable_irq_nosync(wl->irq);
-		pm_wakeup_event(wl1271_sdio_wl_to_dev(wl), 0);
-		spin_unlock_irqrestore(&wl->wl_lock, flags);
-		return IRQ_HANDLED;
-	}
-	spin_unlock_irqrestore(&wl->wl_lock, flags);
-
-	return IRQ_WAKE_THREAD;
-}
-
-static void wl1271_sdio_disable_interrupts(struct wl1271 *wl)
-{
-	disable_irq(wl->irq);
-}
+	struct wl12xx_sdio_glue *glue = dev_get_drvdata(child->parent);
+	struct sdio_func *func = dev_to_sdio_func(glue->dev);
 
-static void wl1271_sdio_enable_interrupts(struct wl1271 *wl)
-{
-	enable_irq(wl->irq);
+	sdio_claim_host(func);
+	sdio_set_block_size(func, blksz);
+	sdio_release_host(func);
 }
 
-static void wl1271_sdio_raw_read(struct wl1271 *wl, int addr, void *buf,
+static void wl12xx_sdio_raw_read(struct device *child, int addr, void *buf,
 				 size_t len, bool fixed)
 {
 	int ret;
-	struct wl12xx_sdio_glue *glue = wl_to_glue(wl);
+	struct wl12xx_sdio_glue *glue = dev_get_drvdata(child->parent);
 	struct sdio_func *func = dev_to_sdio_func(glue->dev);
 
 	if (unlikely(addr == HW_ACCESS_ELP_CTRL_REG_ADDR)) {
@@ -139,11 +93,11 @@ static void wl1271_sdio_raw_read(struct wl1271 *wl, int addr, void *buf,
 		wl1271_error("sdio read failed (%d)", ret);
 }
 
-static void wl1271_sdio_raw_write(struct wl1271 *wl, int addr, void *buf,
+static void wl12xx_sdio_raw_write(struct device *child, int addr, void *buf,
 				  size_t len, bool fixed)
 {
 	int ret;
-	struct wl12xx_sdio_glue *glue = wl_to_glue(wl);
+	struct wl12xx_sdio_glue *glue = dev_get_drvdata(child->parent);
 	struct sdio_func *func = dev_to_sdio_func(glue->dev);
 
 	if (unlikely(addr == HW_ACCESS_ELP_CTRL_REG_ADDR)) {
@@ -165,10 +119,9 @@ static void wl1271_sdio_raw_write(struct wl1271 *wl, int addr, void *buf,
 		wl1271_error("sdio write failed (%d)", ret);
 }
 
-static int wl1271_sdio_power_on(struct wl1271 *wl)
+static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue)
 {
 	int ret;
-	struct wl12xx_sdio_glue *glue = wl_to_glue(wl);
 	struct sdio_func *func = dev_to_sdio_func(glue->dev);
 
 	/* If enabled, tell runtime PM not to power off the card */
@@ -190,10 +143,9 @@ out:
 	return ret;
 }
 
-static int wl1271_sdio_power_off(struct wl1271 *wl)
+static int wl12xx_sdio_power_off(struct wl12xx_sdio_glue *glue)
 {
 	int ret;
-	struct wl12xx_sdio_glue *glue = wl_to_glue(wl);
 	struct sdio_func *func = dev_to_sdio_func(glue->dev);
 
 	sdio_disable_func(func);
@@ -211,33 +163,29 @@ static int wl1271_sdio_power_off(struct wl1271 *wl)
 	return ret;
 }
 
-static int wl1271_sdio_set_power(struct wl1271 *wl, bool enable)
+static int wl12xx_sdio_set_power(struct device *child, bool enable)
 {
+	struct wl12xx_sdio_glue *glue = dev_get_drvdata(child->parent);
+
 	if (enable)
-		return wl1271_sdio_power_on(wl);
+		return wl12xx_sdio_power_on(glue);
 	else
-		return wl1271_sdio_power_off(wl);
+		return wl12xx_sdio_power_off(glue);
 }
 
 static struct wl1271_if_operations sdio_ops = {
-	.read		= wl1271_sdio_raw_read,
-	.write		= wl1271_sdio_raw_write,
-	.power		= wl1271_sdio_set_power,
-	.dev		= wl1271_sdio_wl_to_dev,
-	.enable_irq	= wl1271_sdio_enable_interrupts,
-	.disable_irq	= wl1271_sdio_disable_interrupts,
+	.read		= wl12xx_sdio_raw_read,
+	.write		= wl12xx_sdio_raw_write,
+	.power		= wl12xx_sdio_set_power,
 	.set_block_size = wl1271_sdio_set_block_size,
 };
 
 static int __devinit wl1271_probe(struct sdio_func *func,
 				  const struct sdio_device_id *id)
 {
-	struct ieee80211_hw *hw;
-	const struct wl12xx_platform_data *wlan_data;
-	struct wl1271 *wl;
+	struct wl12xx_platform_data *wlan_data;
 	struct wl12xx_sdio_glue *glue;
 	struct resource res[1];
-	unsigned long irqflags;
 	mmc_pm_flag_t mmcflags;
 	int ret = -ENOMEM;
 
@@ -251,20 +199,7 @@ static int __devinit wl1271_probe(struct sdio_func *func,
 		goto out;
 	}
 
-	hw = wl1271_alloc_hw();
-	if (IS_ERR(hw)) {
-		wl1271_error("can't allocate hw");
-		ret = PTR_ERR(hw);
-		goto out_free_glue;
-	}
-
-	wl = hw->priv;
-
 	glue->dev = &func->dev;
-	glue->wl = wl;
-
-	wl->if_priv = glue;
-	wl->if_ops = &sdio_ops;
 
 	/* Grab access to FN0 for ELP reg. */
 	func->card->quirks |= MMC_QUIRK_LENIENT_FN0;
@@ -276,48 +211,17 @@ static int __devinit wl1271_probe(struct sdio_func *func,
 	if (IS_ERR(wlan_data)) {
 		ret = PTR_ERR(wlan_data);
 		wl1271_error("missing wlan platform data: %d", ret);
-		goto out_free_hw;
-	}
-
-	wl->irq = wlan_data->irq;
-	wl->ref_clock = wlan_data->board_ref_clock;
-	wl->tcxo_clock = wlan_data->board_tcxo_clock;
-	wl->platform_quirks = wlan_data->platform_quirks;
-
-	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
-		irqflags = IRQF_TRIGGER_RISING;
-	else
-		irqflags = IRQF_TRIGGER_HIGH | IRQF_ONESHOT;
-
-	ret = request_threaded_irq(wl->irq, wl1271_hardirq, wl1271_irq,
-				   irqflags,
-				   DRIVER_NAME, wl);
-	if (ret < 0) {
-		wl1271_error("request_irq() failed: %d", ret);
-		goto out_free_hw;
+		goto out_free_glue;
 	}
 
-	ret = enable_irq_wake(wl->irq);
-	if (!ret) {
-		wl->irq_wake_enabled = true;
-		device_init_wakeup(wl1271_sdio_wl_to_dev(wl), 1);
+	/* if sdio can keep power while host is suspended, enable wow */
+	mmcflags = sdio_get_host_pm_caps(func);
+	wl1271_debug(DEBUG_SDIO, "sdio PM caps = 0x%x", mmcflags);
 
-		/* if sdio can keep power while host is suspended, enable wow */
-		mmcflags = sdio_get_host_pm_caps(func);
-		wl1271_debug(DEBUG_SDIO, "sdio PM caps = 0x%x", mmcflags);
+	if (mmcflags & MMC_PM_KEEP_POWER)
+		wlan_data->pwr_in_suspend = true;
 
-		if (mmcflags & MMC_PM_KEEP_POWER)
-			hw->wiphy->wowlan.flags = WIPHY_WOWLAN_ANY;
-	}
-	disable_irq(wl->irq);
-
-	ret = wl1271_init_ieee80211(wl);
-	if (ret)
-		goto out_irq;
-
-	ret = wl1271_register_hw(wl);
-	if (ret)
-		goto out_irq;
+	wlan_data->ops = &sdio_ops;
 
 	sdio_set_drvdata(func, glue);
 
@@ -328,7 +232,7 @@ static int __devinit wl1271_probe(struct sdio_func *func,
 	if (!glue->core) {
 		wl1271_error("can't allocate platform_device");
 		ret = -ENOMEM;
-		goto out_unreg_hw;
+		goto out_free_glue;
 	}
 
 	glue->core->dev.parent = &func->dev;
@@ -362,17 +266,9 @@ static int __devinit wl1271_probe(struct sdio_func *func,
 out_dev_put:
 	platform_device_put(glue->core);
 
-out_unreg_hw:
-	wl1271_unregister_hw(wl);
-
-out_irq:
-	free_irq(wl->irq, wl);
-
-out_free_hw:
-	wl1271_free_hw(wl);
-
 out_free_glue:
 	kfree(glue);
+
 out:
 	return ret;
 }
@@ -380,18 +276,10 @@ out:
 static void __devexit wl1271_remove(struct sdio_func *func)
 {
 	struct wl12xx_sdio_glue *glue = sdio_get_drvdata(func);
-	struct wl1271 *wl = glue->wl;
 
 	/* Undo decrement done above in wl1271_probe */
 	pm_runtime_get_noresume(&func->dev);
 
-	wl1271_unregister_hw(wl);
-	if (wl->irq_wake_enabled) {
-		device_init_wakeup(wl1271_sdio_wl_to_dev(wl), 0);
-		disable_irq_wake(wl->irq);
-	}
-	free_irq(wl->irq, wl);
-	wl1271_free_hw(wl);
 	platform_device_del(glue->core);
 	platform_device_put(glue->core);
 	kfree(glue);
diff --git a/drivers/net/wireless/wl12xx/spi.c b/drivers/net/wireless/wl12xx/spi.c
index 2dd659886a04..22c1337ba883 100644
--- a/drivers/net/wireless/wl12xx/spi.c
+++ b/drivers/net/wireless/wl12xx/spi.c
@@ -72,33 +72,12 @@
 
 struct wl12xx_spi_glue {
 	struct device *dev;
-	struct wl1271 *wl;
 	struct platform_device *core;
 };
 
-static inline struct wl12xx_spi_glue *wl_to_glue(struct wl1271 *wl)
+static void wl12xx_spi_reset(struct device *child)
 {
-	return wl->if_priv;
-}
-
-static struct device *wl1271_spi_wl_to_dev(struct wl1271 *wl)
-{
-	return wl_to_glue(wl)->dev;
-}
-
-static void wl1271_spi_disable_interrupts(struct wl1271 *wl)
-{
-	disable_irq(wl->irq);
-}
-
-static void wl1271_spi_enable_interrupts(struct wl1271 *wl)
-{
-	enable_irq(wl->irq);
-}
-
-static void wl1271_spi_reset(struct wl1271 *wl)
-{
-	struct wl12xx_spi_glue *glue = wl_to_glue(wl);
+	struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent);
 	u8 *cmd;
 	struct spi_transfer t;
 	struct spi_message m;
@@ -124,9 +103,9 @@ static void wl1271_spi_reset(struct wl1271 *wl)
 	kfree(cmd);
 }
 
-static void wl1271_spi_init(struct wl1271 *wl)
+static void wl12xx_spi_init(struct device *child)
 {
-	struct wl12xx_spi_glue *glue = wl_to_glue(wl);
+	struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent);
 	u8 crc[WSPI_INIT_CMD_CRC_LEN], *cmd;
 	struct spi_transfer t;
 	struct spi_message m;
@@ -181,9 +160,10 @@ static void wl1271_spi_init(struct wl1271 *wl)
 
 #define WL1271_BUSY_WORD_TIMEOUT 1000
 
-static int wl1271_spi_read_busy(struct wl1271 *wl)
+static int wl12xx_spi_read_busy(struct device *child)
 {
-	struct wl12xx_spi_glue *glue = wl_to_glue(wl);
+	struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent);
+	struct wl1271 *wl = dev_get_drvdata(child);
 	struct spi_transfer t[1];
 	struct spi_message m;
 	u32 *busy_buf;
@@ -215,10 +195,11 @@ static int wl1271_spi_read_busy(struct wl1271 *wl)
 	return -ETIMEDOUT;
 }
 
-static void wl1271_spi_raw_read(struct wl1271 *wl, int addr, void *buf,
+static void wl12xx_spi_raw_read(struct device *child, int addr, void *buf,
 				size_t len, bool fixed)
 {
-	struct wl12xx_spi_glue *glue = wl_to_glue(wl);
+	struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent);
+	struct wl1271 *wl = dev_get_drvdata(child);
 	struct spi_transfer t[2];
 	struct spi_message m;
 	u32 *busy_buf;
@@ -257,7 +238,7 @@ static void wl1271_spi_raw_read(struct wl1271 *wl, int addr, void *buf,
 		spi_sync(to_spi_device(glue->dev), &m);
 
 		if (!(busy_buf[WL1271_BUSY_WORD_CNT - 1] & 0x1) &&
-		    wl1271_spi_read_busy(wl)) {
+		    wl12xx_spi_read_busy(child)) {
 			memset(buf, 0, chunk_len);
 			return;
 		}
@@ -282,10 +263,10 @@ static void wl1271_spi_raw_read(struct wl1271 *wl, int addr, void *buf,
 	}
 }
 
-static void wl1271_spi_raw_write(struct wl1271 *wl, int addr, void *buf,
-			  size_t len, bool fixed)
+static void wl12xx_spi_raw_write(struct device *child, int addr, void *buf,
+				 size_t len, bool fixed)
 {
-	struct wl12xx_spi_glue *glue = wl_to_glue(wl);
+	struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent);
 	struct spi_transfer t[2 * WSPI_MAX_NUM_OF_CHUNKS];
 	struct spi_message m;
 	u32 commands[WSPI_MAX_NUM_OF_CHUNKS];
@@ -333,42 +314,11 @@ static void wl1271_spi_raw_write(struct wl1271 *wl, int addr, void *buf,
 	spi_sync(to_spi_device(glue->dev), &m);
 }
 
-static irqreturn_t wl1271_hardirq(int irq, void *cookie)
-{
-	struct wl1271 *wl = cookie;
-	unsigned long flags;
-
-	wl1271_debug(DEBUG_IRQ, "IRQ");
-
-	/* complete the ELP completion */
-	spin_lock_irqsave(&wl->wl_lock, flags);
-	set_bit(WL1271_FLAG_IRQ_RUNNING, &wl->flags);
-	if (wl->elp_compl) {
-		complete(wl->elp_compl);
-		wl->elp_compl = NULL;
-	}
-	spin_unlock_irqrestore(&wl->wl_lock, flags);
-
-	return IRQ_WAKE_THREAD;
-}
-
-static int wl1271_spi_set_power(struct wl1271 *wl, bool enable)
-{
-	if (wl->set_power)
-		wl->set_power(enable);
-
-	return 0;
-}
-
 static struct wl1271_if_operations spi_ops = {
-	.read		= wl1271_spi_raw_read,
-	.write		= wl1271_spi_raw_write,
-	.reset		= wl1271_spi_reset,
-	.init		= wl1271_spi_init,
-	.power		= wl1271_spi_set_power,
-	.dev		= wl1271_spi_wl_to_dev,
-	.enable_irq	= wl1271_spi_enable_interrupts,
-	.disable_irq	= wl1271_spi_disable_interrupts,
+	.read		= wl12xx_spi_raw_read,
+	.write		= wl12xx_spi_raw_write,
+	.reset		= wl12xx_spi_reset,
+	.init		= wl12xx_spi_init,
 	.set_block_size = NULL,
 };
 
@@ -376,10 +326,7 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 {
 	struct wl12xx_spi_glue *glue;
 	struct wl12xx_platform_data *pdata;
-	struct ieee80211_hw *hw;
-	struct wl1271 *wl;
 	struct resource res[1];
-	unsigned long irqflags;
 	int ret = -ENOMEM;
 
 	pdata = spi->dev.platform_data;
@@ -388,27 +335,17 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 		return -ENODEV;
 	}
 
+	pdata->ops = &spi_ops;
+
 	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
 	if (!glue) {
 		wl1271_error("can't allocate glue");
 		goto out;
 	}
 
-	hw = wl1271_alloc_hw();
-	if (IS_ERR(hw)) {
-		ret = PTR_ERR(hw);
-		goto out_free_glue;
-	}
-
-	wl = hw->priv;
-
 	glue->dev = &spi->dev;
-	glue->wl = wl;
 
 	spi_set_drvdata(spi, glue);
-	wl->if_priv = glue;
-
-	wl->if_ops = &spi_ops;
 
 	/* This is the only SPI value that we need to set here, the rest
 	 * comes from the board-peripherals file */
@@ -417,55 +354,14 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 	ret = spi_setup(spi);
 	if (ret < 0) {
 		wl1271_error("spi_setup failed");
-		goto out_free_hw;
-	}
-
-	wl->set_power = pdata->set_power;
-	if (!wl->set_power) {
-		wl1271_error("set power function missing in platform data");
-		ret = -ENODEV;
-		goto out_free_hw;
-	}
-
-	wl->ref_clock = pdata->board_ref_clock;
-	wl->tcxo_clock = pdata->board_tcxo_clock;
-	wl->platform_quirks = pdata->platform_quirks;
-
-	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
-		irqflags = IRQF_TRIGGER_RISING;
-	else
-		irqflags = IRQF_TRIGGER_HIGH | IRQF_ONESHOT;
-
-	wl->irq = spi->irq;
-	if (wl->irq < 0) {
-		wl1271_error("irq missing in platform data");
-		ret = -ENODEV;
-		goto out_free_hw;
-	}
-
-	ret = request_threaded_irq(wl->irq, wl1271_hardirq, wl1271_irq,
-				   irqflags,
-				   DRIVER_NAME, wl);
-	if (ret < 0) {
-		wl1271_error("request_irq() failed: %d", ret);
-		goto out_free_hw;
+		goto out_free_glue;
 	}
 
-	disable_irq(wl->irq);
-
-	ret = wl1271_init_ieee80211(wl);
-	if (ret)
-		goto out_irq;
-
-	ret = wl1271_register_hw(wl);
-	if (ret)
-		goto out_irq;
-
 	glue->core = platform_device_alloc("wl12xx-spi", -1);
 	if (!glue->core) {
 		wl1271_error("can't allocate platform_device");
 		ret = -ENOMEM;
-		goto out_unreg_hw;
+		goto out_free_glue;
 	}
 
 	glue->core->dev.parent = &spi->dev;
@@ -499,15 +395,6 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 out_dev_put:
 	platform_device_put(glue->core);
 
-out_unreg_hw:
-	wl1271_unregister_hw(wl);
-
-out_irq:
-	free_irq(wl->irq, wl);
-
-out_free_hw:
-	wl1271_free_hw(wl);
-
 out_free_glue:
 	kfree(glue);
 out:
@@ -517,11 +404,7 @@ out:
 static int __devexit wl1271_remove(struct spi_device *spi)
 {
 	struct wl12xx_spi_glue *glue = spi_get_drvdata(spi);
-	struct wl1271 *wl = glue->wl;
 
-	wl1271_unregister_hw(wl);
-	free_irq(wl->irq, wl);
-	wl1271_free_hw(wl);
 	platform_device_del(glue->core);
 	platform_device_put(glue->core);
 	kfree(glue);
diff --git a/drivers/net/wireless/wl12xx/wl12xx.h b/drivers/net/wireless/wl12xx/wl12xx.h
index 8815fd9a0f47..d2028939eee5 100644
--- a/drivers/net/wireless/wl12xx/wl12xx.h
+++ b/drivers/net/wireless/wl12xx/wl12xx.h
@@ -288,17 +288,14 @@ struct wl1271_scan {
 };
 
 struct wl1271_if_operations {
-	void (*read)(struct wl1271 *wl, int addr, void *buf, size_t len,
+	void (*read)(struct device *child, int addr, void *buf, size_t len,
 		     bool fixed);
-	void (*write)(struct wl1271 *wl, int addr, void *buf, size_t len,
+	void (*write)(struct device *child, int addr, void *buf, size_t len,
 		     bool fixed);
-	void (*reset)(struct wl1271 *wl);
-	void (*init)(struct wl1271 *wl);
-	int (*power)(struct wl1271 *wl, bool enable);
-	struct device* (*dev)(struct wl1271 *wl);
-	void (*enable_irq)(struct wl1271 *wl);
-	void (*disable_irq)(struct wl1271 *wl);
-	void (*set_block_size) (struct wl1271 *wl, unsigned int blksz);
+	void (*reset)(struct device *child);
+	void (*init)(struct device *child);
+	int (*power)(struct device *child, bool enable);
+	void (*set_block_size) (struct device *child, unsigned int blksz);
 };
 
 #define MAX_NUM_KEYS 14
@@ -362,6 +359,8 @@ struct wl1271 {
 	struct ieee80211_hw *hw;
 	bool mac80211_registered;
 
+	struct device *dev;
+
 	void *if_priv;
 
 	struct wl1271_if_operations *if_ops;
diff --git a/drivers/net/wireless/wl12xx/wl12xx_platform_data.c b/drivers/net/wireless/wl12xx/wl12xx_platform_data.c
index 973b11060a8f..3c96b332184e 100644
--- a/drivers/net/wireless/wl12xx/wl12xx_platform_data.c
+++ b/drivers/net/wireless/wl12xx/wl12xx_platform_data.c
@@ -2,7 +2,7 @@
 #include <linux/err.h>
 #include <linux/wl12xx.h>
 
-static const struct wl12xx_platform_data *platform_data;
+static struct wl12xx_platform_data *platform_data;
 
 int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
 {
@@ -18,7 +18,7 @@ int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
 	return 0;
 }
 
-const struct wl12xx_platform_data *wl12xx_get_platform_data(void)
+struct wl12xx_platform_data *wl12xx_get_platform_data(void)
 {
 	if (!platform_data)
 		return ERR_PTR(-ENODEV);
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index 4b697395326e..0d6373195d32 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -54,6 +54,9 @@ struct wl12xx_platform_data {
 	int board_ref_clock;
 	int board_tcxo_clock;
 	unsigned long platform_quirks;
+	bool pwr_in_suspend;
+
+	struct wl1271_if_operations *ops;
 };
 
 /* Platform does not support level trigger interrupts */
@@ -73,6 +76,6 @@ int wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
 
 #endif
 
-const struct wl12xx_platform_data *wl12xx_get_platform_data(void);
+struct wl12xx_platform_data *wl12xx_get_platform_data(void);
 
 #endif
-- 
cgit v1.2.3


From 49920bc66984a512f4bcc7735a61642cd0e4d6f2 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Thu, 13 Oct 2011 15:15:27 +0530
Subject: dmaengine: add new enum dma_transfer_direction

This new enum removes usage of dma_data_direction for dma direction. The new
enum cleans tells the DMA direction and mode
This further paves way for merging the dmaengine _prep operations and also for
interleaved dma

Suggested-by: Jassi Brar <jaswinder.singh@linaro.org>
Reviewed-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/dmaengine.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ace51af4369f..d946ef7f5e67 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -23,7 +23,6 @@
 
 #include <linux/device.h>
 #include <linux/uio.h>
-#include <linux/dma-direction.h>
 #include <linux/scatterlist.h>
 
 /**
@@ -75,6 +74,19 @@ enum dma_transaction_type {
 /* last transaction type for creation of the capabilities mask */
 #define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
 
+/**
+ * enum dma_transfer_direction - dma transfer mode and direction indicator
+ * @DMA_MEM_TO_MEM: Async/Memcpy mode
+ * @DMA_MEM_TO_DEV: Slave mode & From Memory to Device
+ * @DMA_DEV_TO_MEM: Slave mode & From Device to Memory
+ * @DMA_DEV_TO_DEV: Slave mode & From Device to Device
+ */
+enum dma_transfer_direction {
+	DMA_MEM_TO_MEM,
+	DMA_MEM_TO_DEV,
+	DMA_DEV_TO_MEM,
+	DMA_DEV_TO_DEV,
+};
 
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
@@ -267,7 +279,7 @@ enum dma_slave_buswidth {
  * struct, if applicable.
  */
 struct dma_slave_config {
-	enum dma_data_direction direction;
+	enum dma_transfer_direction direction;
 	dma_addr_t src_addr;
 	dma_addr_t dst_addr;
 	enum dma_slave_buswidth src_addr_width;
@@ -490,11 +502,11 @@ struct dma_device {
 
 	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction);
+		size_t period_len, enum dma_transfer_direction direction);
 	int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		unsigned long arg);
 
@@ -520,7 +532,7 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
 
 static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
 	struct dma_chan *chan, void *buf, size_t len,
-	enum dma_data_direction dir, unsigned long flags)
+	enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct scatterlist sg;
 	sg_init_one(&sg, buf, len);
-- 
cgit v1.2.3


From db8196df4bb6f117caa163aa73b0f16fd62290bd Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Thu, 13 Oct 2011 22:34:23 +0530
Subject: dmaengine: move drivers to dma_transfer_direction

fixup usage of dma direction by introducing dma_transfer_direction,
this patch moves dma/drivers/* to use new enum

Cc: Jassi Brar <jaswinder.singh@linaro.org>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Viresh Kumar <viresh.kumar@st.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Mika Westerberg <mika.westerberg@iki.fi>
Cc: H Hartley Sweeten <hartleys@visionengravers.com>
Cc: Li Yang <leoli@freescale.com>
Cc: Zhang Wei <zw@zh-kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Cc: Shawn Guo <shawn.guo@freescale.com>
Cc: Yong Wang <yong.y.wang@intel.com>
Cc: Tomoya MORINAGA <tomoya-linux@dsn.lapis-semi.com>
Cc: Boojin Kim <boojin.kim@samsung.com>
Cc: Barry Song <Baohua.Song@csr.com>
Acked-by: Mika Westerberg <mika.westerberg@iki.fi>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 arch/arm/mach-ep93xx/include/mach/dma.h        |  6 +++---
 arch/arm/plat-nomadik/include/plat/ste_dma40.h |  4 ++--
 drivers/dma/amba-pl08x.c                       | 24 ++++++++++++------------
 drivers/dma/at_hdmac.c                         | 22 +++++++++++-----------
 drivers/dma/coh901318.c                        | 12 ++++++------
 drivers/dma/coh901318_lli.c                    | 23 +++++++++++------------
 drivers/dma/coh901318_lli.h                    |  4 ++--
 drivers/dma/dw_dmac.c                          | 14 +++++++-------
 drivers/dma/ep93xx_dma.c                       | 22 +++++++++++-----------
 drivers/dma/fsldma.c                           |  4 ++--
 drivers/dma/imx-dma.c                          | 10 +++++-----
 drivers/dma/imx-sdma.c                         | 10 +++++-----
 drivers/dma/intel_mid_dma.c                    | 14 +++++++-------
 drivers/dma/intel_mid_dma_regs.h               |  2 +-
 drivers/dma/ipu/ipu_idmac.c                    |  4 ++--
 drivers/dma/mxs-dma.c                          |  8 ++++----
 drivers/dma/pch_dma.c                          | 12 ++++++------
 drivers/dma/pl330.c                            | 18 +++++++++---------
 drivers/dma/shdma.c                            | 25 ++++++++++++-------------
 drivers/dma/ste_dma40.c                        | 26 +++++++++++++-------------
 drivers/dma/timb_dma.c                         | 18 +++++++++---------
 drivers/dma/txx9dmac.c                         | 12 ++++++------
 include/linux/amba/pl08x.h                     |  4 ++--
 include/linux/dw_dmac.h                        |  2 +-
 include/linux/sh_dma.h                         |  2 +-
 25 files changed, 150 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ep93xx/include/mach/dma.h b/arch/arm/mach-ep93xx/include/mach/dma.h
index 46d4d876e6fb..e82c642fa53c 100644
--- a/arch/arm/mach-ep93xx/include/mach/dma.h
+++ b/arch/arm/mach-ep93xx/include/mach/dma.h
@@ -37,7 +37,7 @@
  */
 struct ep93xx_dma_data {
 	int				port;
-	enum dma_data_direction		direction;
+	enum dma_transfer_direction	direction;
 	const char			*name;
 };
 
@@ -80,14 +80,14 @@ static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan)
  * channel supports given DMA direction. Only M2P channels have such
  * limitation, for M2M channels the direction is configurable.
  */
-static inline enum dma_data_direction
+static inline enum dma_transfer_direction
 ep93xx_dma_chan_direction(struct dma_chan *chan)
 {
 	if (!ep93xx_dma_chan_is_m2p(chan))
 		return DMA_NONE;
 
 	/* even channels are for TX, odd for RX */
-	return (chan->chan_id % 2 == 0) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
 }
 
 #endif /* __ASM_ARCH_DMA_H */
diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 685c78716d95..38b041a40db4 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -187,7 +187,7 @@ static inline struct
 dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
 					    dma_addr_t addr,
 					    unsigned int size,
-					    enum dma_data_direction direction,
+					    enum dma_transfer_direction direction,
 					    unsigned long flags)
 {
 	struct scatterlist sg;
@@ -209,7 +209,7 @@ static inline struct
 dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
 					    dma_addr_t addr,
 					    unsigned int size,
-					    enum dma_data_direction direction,
+					    enum dma_transfer_direction direction,
 					    unsigned long flags)
 {
 	return NULL;
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index b7cbd1ab1db1..41c62fd0680d 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -882,9 +882,9 @@ static int prep_phy_channel(struct pl08x_dma_chan *plchan,
 		ch->signal = ret;
 
 		/* Assign the flow control signal to this channel */
-		if (txd->direction == DMA_TO_DEVICE)
+		if (txd->direction == DMA_MEM_TO_DEV)
 			txd->ccfg |= ch->signal << PL080_CONFIG_DST_SEL_SHIFT;
-		else if (txd->direction == DMA_FROM_DEVICE)
+		else if (txd->direction == DMA_DEV_TO_MEM)
 			txd->ccfg |= ch->signal << PL080_CONFIG_SRC_SEL_SHIFT;
 	}
 
@@ -1102,10 +1102,10 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 
 	/* Transfer direction */
 	plchan->runtime_direction = config->direction;
-	if (config->direction == DMA_TO_DEVICE) {
+	if (config->direction == DMA_MEM_TO_DEV) {
 		addr_width = config->dst_addr_width;
 		maxburst = config->dst_maxburst;
-	} else if (config->direction == DMA_FROM_DEVICE) {
+	} else if (config->direction == DMA_DEV_TO_MEM) {
 		addr_width = config->src_addr_width;
 		maxburst = config->src_maxburst;
 	} else {
@@ -1136,7 +1136,7 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 	cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
 	cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
 
-	if (plchan->runtime_direction == DMA_FROM_DEVICE) {
+	if (plchan->runtime_direction == DMA_DEV_TO_MEM) {
 		plchan->src_addr = config->src_addr;
 		plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
 			pl08x_select_bus(plchan->cd->periph_buses,
@@ -1152,7 +1152,7 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 		"configured channel %s (%s) for %s, data width %d, "
 		"maxburst %d words, LE, CCTL=0x%08x\n",
 		dma_chan_name(chan), plchan->name,
-		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		(config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
 		addr_width,
 		maxburst,
 		cctl);
@@ -1322,7 +1322,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 
 static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
@@ -1354,10 +1354,10 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	 */
 	txd->direction = direction;
 
-	if (direction == DMA_TO_DEVICE) {
+	if (direction == DMA_MEM_TO_DEV) {
 		txd->cctl = plchan->dst_cctl;
 		slave_addr = plchan->dst_addr;
-	} else if (direction == DMA_FROM_DEVICE) {
+	} else if (direction == DMA_DEV_TO_MEM) {
 		txd->cctl = plchan->src_cctl;
 		slave_addr = plchan->src_addr;
 	} else {
@@ -1368,10 +1368,10 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	}
 
 	if (plchan->cd->device_fc)
-		tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER_PER :
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
 			PL080_FLOW_PER2MEM_PER;
 	else
-		tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER :
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
 			PL080_FLOW_PER2MEM;
 
 	txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
@@ -1387,7 +1387,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		list_add_tail(&dsg->node, &txd->dsg_list);
 
 		dsg->len = sg_dma_len(sg);
-		if (direction == DMA_TO_DEVICE) {
+		if (direction == DMA_MEM_TO_DEV) {
 			dsg->src_addr = sg_phys(sg);
 			dsg->dst_addr = slave_addr;
 		} else {
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index fcfa0a8b5c59..7e76574e83ec 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -660,7 +660,7 @@ err_desc_get:
  */
 static struct dma_async_tx_descriptor *
 atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
@@ -678,7 +678,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n",
 			sg_len,
-			direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
 			flags);
 
 	if (unlikely(!atslave || !sg_len)) {
@@ -692,7 +692,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	ctrlb = ATC_IEN;
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		ctrla |=  ATC_DST_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
 			| ATC_SRC_ADDR_MODE_INCR
@@ -725,7 +725,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			total_len += len;
 		}
 		break;
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		ctrla |=  ATC_SRC_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_INCR
 			| ATC_SRC_ADDR_MODE_FIXED
@@ -787,7 +787,7 @@ err_desc_get:
  */
 static int
 atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	if (period_len > (ATC_BTSIZE_MAX << reg_width))
 		goto err_out;
@@ -795,7 +795,7 @@ atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
 		goto err_out;
 	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
 		goto err_out;
-	if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+	if (unlikely(!(direction & (DMA_DEV_TO_MEM | DMA_MEM_TO_DEV))))
 		goto err_out;
 
 	return 0;
@@ -810,7 +810,7 @@ err_out:
 static int
 atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 		unsigned int period_index, dma_addr_t buf_addr,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	u32		ctrla;
 	unsigned int	reg_width = atslave->reg_width;
@@ -822,7 +822,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 		| period_len >> reg_width;
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		desc->lli.saddr = buf_addr + (period_len * period_index);
 		desc->lli.daddr = atslave->tx_reg;
 		desc->lli.ctrla = ctrla;
@@ -833,7 +833,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 				| ATC_DIF(AT_DMA_PER_IF);
 		break;
 
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		desc->lli.saddr = atslave->rx_reg;
 		desc->lli.daddr = buf_addr + (period_len * period_index);
 		desc->lli.ctrla = ctrla;
@@ -861,7 +861,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
  */
 static struct dma_async_tx_descriptor *
 atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
@@ -872,7 +872,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	unsigned int		i;
 
 	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n",
-			direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
 			buf_addr,
 			periods, buf_len, period_len);
 
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index 4234f416ef11..d65a718c0f9b 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -39,7 +39,7 @@ struct coh901318_desc {
 	struct scatterlist *sg;
 	unsigned int sg_len;
 	struct coh901318_lli *lli;
-	enum dma_data_direction dir;
+	enum dma_transfer_direction dir;
 	unsigned long flags;
 	u32 head_config;
 	u32 head_ctrl;
@@ -1034,7 +1034,7 @@ coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 static struct dma_async_tx_descriptor *
 coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_data_direction direction,
+			unsigned int sg_len, enum dma_transfer_direction direction,
 			unsigned long flags)
 {
 	struct coh901318_chan *cohc = to_coh901318_chan(chan);
@@ -1077,7 +1077,7 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	ctrl_last |= cohc->runtime_ctrl;
 	ctrl |= cohc->runtime_ctrl;
 
-	if (direction == DMA_TO_DEVICE) {
+	if (direction == DMA_MEM_TO_DEV) {
 		u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
 			COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE;
 
@@ -1085,7 +1085,7 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		ctrl_chained |= tx_flags;
 		ctrl_last |= tx_flags;
 		ctrl |= tx_flags;
-	} else if (direction == DMA_FROM_DEVICE) {
+	} else if (direction == DMA_DEV_TO_MEM) {
 		u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST |
 			COH901318_CX_CTRL_DST_ADDR_INC_ENABLE;
 
@@ -1274,11 +1274,11 @@ static void coh901318_dma_set_runtimeconfig(struct dma_chan *chan,
 	int i = 0;
 
 	/* We only support mem to per or per to mem transfers */
-	if (config->direction == DMA_FROM_DEVICE) {
+	if (config->direction == DMA_DEV_TO_MEM) {
 		addr = config->src_addr;
 		addr_width = config->src_addr_width;
 		maxburst = config->src_maxburst;
-	} else if (config->direction == DMA_TO_DEVICE) {
+	} else if (config->direction == DMA_MEM_TO_DEV) {
 		addr = config->dst_addr;
 		addr_width = config->dst_addr_width;
 		maxburst = config->dst_maxburst;
diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c
index 9f7e0e6a7eea..6c0e2d4c6682 100644
--- a/drivers/dma/coh901318_lli.c
+++ b/drivers/dma/coh901318_lli.c
@@ -7,11 +7,10 @@
  * Author: Per Friden <per.friden@stericsson.com>
  */
 
-#include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
-#include <linux/dmapool.h>
 #include <linux/memory.h>
 #include <linux/gfp.h>
+#include <linux/dmapool.h>
 #include <mach/coh901318.h>
 
 #include "coh901318_lli.h"
@@ -177,18 +176,18 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
 			  struct coh901318_lli *lli,
 			  dma_addr_t buf, unsigned int size,
 			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom,
-			  enum dma_data_direction dir)
+			  enum dma_transfer_direction dir)
 {
 	int s = size;
 	dma_addr_t src;
 	dma_addr_t dst;
 
 
-	if (dir == DMA_TO_DEVICE) {
+	if (dir == DMA_MEM_TO_DEV) {
 		src = buf;
 		dst = dev_addr;
 
-	} else if (dir == DMA_FROM_DEVICE) {
+	} else if (dir == DMA_DEV_TO_MEM) {
 
 		src = dev_addr;
 		dst = buf;
@@ -215,9 +214,9 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
 
 		lli = coh901318_lli_next(lli);
 
-		if (dir == DMA_TO_DEVICE)
+		if (dir == DMA_MEM_TO_DEV)
 			src += block_size;
-		else if (dir == DMA_FROM_DEVICE)
+		else if (dir == DMA_DEV_TO_MEM)
 			dst += block_size;
 	}
 
@@ -234,7 +233,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 		      struct scatterlist *sgl, unsigned int nents,
 		      dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl,
 		      u32 ctrl_last,
-		      enum dma_data_direction dir, u32 ctrl_irq_mask)
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask)
 {
 	int i;
 	struct scatterlist *sg;
@@ -249,9 +248,9 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 
 	spin_lock(&pool->lock);
 
-	if (dir == DMA_TO_DEVICE)
+	if (dir == DMA_MEM_TO_DEV)
 		dst = dev_addr;
-	else if (dir == DMA_FROM_DEVICE)
+	else if (dir == DMA_DEV_TO_MEM)
 		src = dev_addr;
 	else
 		goto err;
@@ -269,7 +268,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 			ctrl_sg = ctrl ? ctrl : ctrl_last;
 
 
-		if (dir == DMA_TO_DEVICE)
+		if (dir == DMA_MEM_TO_DEV)
 			/* increment source address */
 			src = sg_phys(sg);
 		else
@@ -293,7 +292,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 			lli->src_addr = src;
 			lli->dst_addr = dst;
 
-			if (dir == DMA_FROM_DEVICE)
+			if (dir == DMA_DEV_TO_MEM)
 				dst += elem_size;
 			else
 				src += elem_size;
diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h
index 7a5c80990e9e..abff3714fdda 100644
--- a/drivers/dma/coh901318_lli.h
+++ b/drivers/dma/coh901318_lli.h
@@ -97,7 +97,7 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
 			  struct coh901318_lli *lli,
 			  dma_addr_t buf, unsigned int size,
 			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last,
-			  enum dma_data_direction dir);
+			  enum dma_transfer_direction dir);
 
 /**
  * coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer
@@ -119,6 +119,6 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 		      struct scatterlist *sg, unsigned int nents,
 		      dma_addr_t dev_addr, u32 ctrl_chained,
 		      u32 ctrl, u32 ctrl_last,
-		      enum dma_data_direction dir, u32 ctrl_irq_mask);
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask);
 
 #endif /* COH901318_LLI_H */
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 9bfd6d360718..decca1c3c83d 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -696,7 +696,7 @@ err_desc_get:
 
 static struct dma_async_tx_descriptor *
 dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
@@ -720,7 +720,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	prev = first = NULL;
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_DST_WIDTH(reg_width)
 				| DWC_CTLL_DST_FIX
@@ -777,7 +777,7 @@ slave_sg_todev_fill_desc:
 				goto slave_sg_todev_fill_desc;
 		}
 		break;
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_SRC_WIDTH(reg_width)
 				| DWC_CTLL_DST_INC
@@ -1165,7 +1165,7 @@ EXPORT_SYMBOL(dw_dma_cyclic_stop);
  */
 struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_data_direction direction)
+		enum dma_transfer_direction direction)
 {
 	struct dw_dma_chan		*dwc = to_dw_dma_chan(chan);
 	struct dw_cyclic_desc		*cdesc;
@@ -1206,7 +1206,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		goto out_err;
 	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
 		goto out_err;
-	if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+	if (unlikely(!(direction & (DMA_MEM_TO_DEV | DMA_DEV_TO_MEM))))
 		goto out_err;
 
 	retval = ERR_PTR(-ENOMEM);
@@ -1228,7 +1228,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 			goto out_err_desc_get;
 
 		switch (direction) {
-		case DMA_TO_DEVICE:
+		case DMA_MEM_TO_DEV:
 			desc->lli.dar = dws->tx_reg;
 			desc->lli.sar = buf_addr + (period_len * i);
 			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
@@ -1239,7 +1239,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
 			break;
-		case DMA_FROM_DEVICE:
+		case DMA_DEV_TO_MEM:
 			desc->lli.dar = buf_addr + (period_len * i);
 			desc->lli.sar = dws->rx_reg;
 			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index b47e2b803faf..009851b2aeea 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -330,7 +330,7 @@ static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
 	struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
 	u32 bus_addr;
 
-	if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_TO_DEVICE)
+	if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_MEM_TO_DEV)
 		bus_addr = desc->src_addr;
 	else
 		bus_addr = desc->dst_addr;
@@ -443,7 +443,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
 		control = (5 << M2M_CONTROL_PWSC_SHIFT);
 		control |= M2M_CONTROL_NO_HDSK;
 
-		if (data->direction == DMA_TO_DEVICE) {
+		if (data->direction == DMA_MEM_TO_DEV) {
 			control |= M2M_CONTROL_DAH;
 			control |= M2M_CONTROL_TM_TX;
 			control |= M2M_CONTROL_RSS_SSPTX;
@@ -463,7 +463,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
 		control |= M2M_CONTROL_RSS_IDE;
 		control |= M2M_CONTROL_PW_16;
 
-		if (data->direction == DMA_TO_DEVICE) {
+		if (data->direction == DMA_MEM_TO_DEV) {
 			/* Worst case from the UG */
 			control = (3 << M2M_CONTROL_PWSC_SHIFT);
 			control |= M2M_CONTROL_DAH;
@@ -803,8 +803,8 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
 			switch (data->port) {
 			case EP93XX_DMA_SSP:
 			case EP93XX_DMA_IDE:
-				if (data->direction != DMA_TO_DEVICE &&
-				    data->direction != DMA_FROM_DEVICE)
+				if (data->direction != DMA_MEM_TO_DEV &&
+				    data->direction != DMA_DEV_TO_MEM)
 					return -EINVAL;
 				break;
 			default:
@@ -952,7 +952,7 @@ fail:
  */
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-			 unsigned int sg_len, enum dma_data_direction dir,
+			 unsigned int sg_len, enum dma_transfer_direction dir,
 			 unsigned long flags)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
@@ -988,7 +988,7 @@ ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			goto fail;
 		}
 
-		if (dir == DMA_TO_DEVICE) {
+		if (dir == DMA_MEM_TO_DEV) {
 			desc->src_addr = sg_dma_address(sg);
 			desc->dst_addr = edmac->runtime_addr;
 		} else {
@@ -1032,7 +1032,7 @@ fail:
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			   size_t buf_len, size_t period_len,
-			   enum dma_data_direction dir)
+			   enum dma_transfer_direction dir)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
@@ -1065,7 +1065,7 @@ ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			goto fail;
 		}
 
-		if (dir == DMA_TO_DEVICE) {
+		if (dir == DMA_MEM_TO_DEV) {
 			desc->src_addr = dma_addr + offset;
 			desc->dst_addr = edmac->runtime_addr;
 		} else {
@@ -1133,12 +1133,12 @@ static int ep93xx_dma_slave_config(struct ep93xx_dma_chan *edmac,
 		return -EINVAL;
 
 	switch (config->direction) {
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		width = config->src_addr_width;
 		addr = config->src_addr;
 		break;
 
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		width = config->dst_addr_width;
 		addr = config->dst_addr;
 		break;
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 8a781540590c..b98070c33ca9 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -772,7 +772,7 @@ fail:
  */
 static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
 	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	/*
 	 * This operation is not supported on the Freescale DMA controller
@@ -819,7 +819,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 			return -ENXIO;
 
 		/* we set the controller burst size depending on direction */
-		if (config->direction == DMA_TO_DEVICE)
+		if (config->direction == DMA_MEM_TO_DEV)
 			size = config->dst_addr_width * config->dst_maxburst;
 		else
 			size = config->src_addr_width * config->src_maxburst;
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index d746899f36e1..678cd01dc42c 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -106,7 +106,7 @@ static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		imx_dma_disable(imxdmac->imxdma_channel);
 		return 0;
 	case DMA_SLAVE_CONFIG:
-		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
 			imxdmac->per_address = dmaengine_cfg->src_addr;
 			imxdmac->watermark_level = dmaengine_cfg->src_maxburst;
 			imxdmac->word_size = dmaengine_cfg->src_addr_width;
@@ -223,7 +223,7 @@ static void imxdma_free_chan_resources(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
@@ -240,7 +240,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		dma_length += sg->length;
 	}
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		dmamode = DMA_MODE_READ;
 	else
 		dmamode = DMA_MODE_WRITE;
@@ -270,7 +270,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 
 static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
@@ -316,7 +316,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 	imxdmac->sg_list[periods].page_link =
 		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		dmamode = DMA_MODE_READ;
 	else
 		dmamode = DMA_MODE_WRITE;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index eab1fe71259e..065de5442c93 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -246,7 +246,7 @@ struct sdma_engine;
 struct sdma_channel {
 	struct sdma_engine		*sdma;
 	unsigned int			channel;
-	enum dma_data_direction		direction;
+	enum dma_transfer_direction		direction;
 	enum sdma_peripheral_type	peripheral_type;
 	unsigned int			event_id0;
 	unsigned int			event_id1;
@@ -649,7 +649,7 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
 	int ret;
 
-	if (sdmac->direction == DMA_FROM_DEVICE) {
+	if (sdmac->direction == DMA_DEV_TO_MEM) {
 		load_address = sdmac->pc_from_device;
 	} else {
 		load_address = sdmac->pc_to_device;
@@ -910,7 +910,7 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
@@ -1007,7 +1007,7 @@ err_out:
 
 static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1092,7 +1092,7 @@ static int sdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		sdma_disable_channel(sdmac);
 		return 0;
 	case DMA_SLAVE_CONFIG:
-		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
 			sdmac->per_address = dmaengine_cfg->src_addr;
 			sdmac->watermark_level = dmaengine_cfg->src_maxburst;
 			sdmac->word_size = dmaengine_cfg->src_addr_width;
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 9e96c43a846a..6deda25fd0a8 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -394,10 +394,10 @@ static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
 							midc->dma->block_size);
 		/*Populate SAR and DAR values*/
 		sg_phy_addr = sg_phys(sg);
-		if (desc->dirn ==  DMA_TO_DEVICE) {
+		if (desc->dirn ==  DMA_MEM_TO_DEV) {
 			lli_bloc_desc->sar  = sg_phy_addr;
 			lli_bloc_desc->dar  = mids->dma_slave.dst_addr;
-		} else if (desc->dirn ==  DMA_FROM_DEVICE) {
+		} else if (desc->dirn ==  DMA_DEV_TO_MEM) {
 			lli_bloc_desc->sar  = mids->dma_slave.src_addr;
 			lli_bloc_desc->dar  = sg_phy_addr;
 		}
@@ -631,13 +631,13 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 		if (midc->dma->pimr_mask) {
 			cfg_hi.cfgx.protctl = 0x0; /*default value*/
 			cfg_hi.cfgx.fifo_mode = 1;
-			if (mids->dma_slave.direction == DMA_TO_DEVICE) {
+			if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
 				cfg_hi.cfgx.src_per = 0;
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.dst_per = 3;
 				if (mids->device_instance == 1)
 					cfg_hi.cfgx.dst_per = 1;
-			} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
+			} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.src_per = 2;
 				if (mids->device_instance == 1)
@@ -681,11 +681,11 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 		ctl_lo.ctlx.sinc = 0;
 		ctl_lo.ctlx.dinc = 0;
 	} else {
-		if (mids->dma_slave.direction == DMA_TO_DEVICE) {
+		if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
 			ctl_lo.ctlx.sinc = 0;
 			ctl_lo.ctlx.dinc = 2;
 			ctl_lo.ctlx.tt_fc = 1;
-		} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
+		} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
 			ctl_lo.ctlx.sinc = 2;
 			ctl_lo.ctlx.dinc = 0;
 			ctl_lo.ctlx.tt_fc = 2;
@@ -731,7 +731,7 @@ err_desc_get:
  */
 static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
 			struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_data_direction direction,
+			unsigned int sg_len, enum dma_transfer_direction direction,
 			unsigned long flags)
 {
 	struct intel_mid_dma_chan *midc = NULL;
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index aea5ee88ce03..c6de919a6401 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -262,7 +262,7 @@ struct intel_mid_dma_desc {
 	unsigned int			lli_length;
 	unsigned int			current_lli;
 	dma_addr_t			next;
-	enum dma_data_direction		dirn;
+	enum dma_transfer_direction		dirn;
 	enum dma_status			status;
 	enum dma_slave_buswidth		width; /*width of DMA txn*/
 	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index 6815905a772f..0cee3b30cd77 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1362,7 +1362,7 @@ static void ipu_gc_tasklet(unsigned long arg)
 /* Allocate and initialise a transfer descriptor. */
 static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
 		struct scatterlist *sgl, unsigned int sg_len,
-		enum dma_data_direction direction, unsigned long tx_flags)
+		enum dma_transfer_direction direction, unsigned long tx_flags)
 {
 	struct idmac_channel *ichan = to_idmac_chan(chan);
 	struct idmac_tx_desc *desc = NULL;
@@ -1374,7 +1374,7 @@ static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan
 	    chan->chan_id != IDMAC_IC_7)
 		return NULL;
 
-	if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE) {
+	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV) {
 		dev_err(chan->device->dev, "Invalid DMA direction %d!\n", direction);
 		return NULL;
 	}
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index b4588bdd98bb..bdf4672b2553 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -377,7 +377,7 @@ static void mxs_dma_free_chan_resources(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long append)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
@@ -450,7 +450,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 			ccw->bits |= CCW_CHAIN;
 			ccw->bits |= CCW_HALT_ON_TERM;
 			ccw->bits |= CCW_TERM_FLUSH;
-			ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+			ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
 					MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ,
 					COMMAND);
 
@@ -472,7 +472,7 @@ err_out:
 
 static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -515,7 +515,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		ccw->bits |= CCW_IRQ;
 		ccw->bits |= CCW_HALT_ON_TERM;
 		ccw->bits |= CCW_TERM_FLUSH;
-		ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+		ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
 				MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
 
 		dma_addr += period_len;
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index a6d0e3dbed07..9944e8295498 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -99,7 +99,7 @@ struct pch_dma_desc {
 struct pch_dma_chan {
 	struct dma_chan		chan;
 	void __iomem *membase;
-	enum dma_data_direction	dir;
+	enum dma_transfer_direction dir;
 	struct tasklet_struct	tasklet;
 	unsigned long		err_status;
 
@@ -224,7 +224,7 @@ static void pdc_set_dir(struct dma_chan *chan)
 		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
 				       (DMA_CTL0_BITS_PER_CH * chan->chan_id));
 		val &= mask_mode;
-		if (pd_chan->dir == DMA_TO_DEVICE)
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
 				       DMA_CTL0_DIR_SHIFT_BITS);
 		else
@@ -242,7 +242,7 @@ static void pdc_set_dir(struct dma_chan *chan)
 		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
 						 (DMA_CTL0_BITS_PER_CH * ch));
 		val &= mask_mode;
-		if (pd_chan->dir == DMA_TO_DEVICE)
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
 				       DMA_CTL0_DIR_SHIFT_BITS);
 		else
@@ -607,7 +607,7 @@ static void pd_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 			struct scatterlist *sgl, unsigned int sg_len,
-			enum dma_data_direction direction, unsigned long flags)
+			enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 	struct pch_dma_slave *pd_slave = chan->private;
@@ -623,9 +623,9 @@ static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 		return NULL;
 	}
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		reg = pd_slave->rx_reg;
-	else if (direction == DMA_TO_DEVICE)
+	else if (direction == DMA_MEM_TO_DEV)
 		reg = pd_slave->tx_reg;
 	else
 		return NULL;
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 571041477ab2..1e58eeb030d8 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -320,14 +320,14 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
 	case DMA_SLAVE_CONFIG:
 		slave_config = (struct dma_slave_config *)arg;
 
-		if (slave_config->direction == DMA_TO_DEVICE) {
+		if (slave_config->direction == DMA_MEM_TO_DEV) {
 			if (slave_config->dst_addr)
 				pch->fifo_addr = slave_config->dst_addr;
 			if (slave_config->dst_addr_width)
 				pch->burst_sz = __ffs(slave_config->dst_addr_width);
 			if (slave_config->dst_maxburst)
 				pch->burst_len = slave_config->dst_maxburst;
-		} else if (slave_config->direction == DMA_FROM_DEVICE) {
+		} else if (slave_config->direction == DMA_DEV_TO_MEM) {
 			if (slave_config->src_addr)
 				pch->fifo_addr = slave_config->src_addr;
 			if (slave_config->src_addr_width)
@@ -597,7 +597,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
 
 static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
@@ -612,13 +612,13 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 	}
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		desc->rqcfg.src_inc = 1;
 		desc->rqcfg.dst_inc = 0;
 		src = dma_addr;
 		dst = pch->fifo_addr;
 		break;
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		desc->rqcfg.src_inc = 0;
 		desc->rqcfg.dst_inc = 1;
 		src = pch->fifo_addr;
@@ -687,7 +687,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 
 static struct dma_async_tx_descriptor *
 pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flg)
 {
 	struct dma_pl330_desc *first, *desc = NULL;
@@ -702,9 +702,9 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		return NULL;
 
 	/* Make sure the direction is consistent */
-	if ((direction == DMA_TO_DEVICE &&
+	if ((direction == DMA_MEM_TO_DEV &&
 				peri->rqtype != MEMTODEV) ||
-			(direction == DMA_FROM_DEVICE &&
+			(direction == DMA_DEV_TO_MEM &&
 				peri->rqtype != DEVTOMEM)) {
 		dev_err(pch->dmac->pif.dev, "%s:%d Invalid Direction\n",
 				__func__, __LINE__);
@@ -747,7 +747,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		else
 			list_add_tail(&desc->node, &first->node);
 
-		if (direction == DMA_TO_DEVICE) {
+		if (direction == DMA_MEM_TO_DEV) {
 			desc->rqcfg.src_inc = 1;
 			desc->rqcfg.dst_inc = 0;
 			fill_px(&desc->px,
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 81809c2b46ab..592304fb41a6 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -23,7 +23,6 @@
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
-#include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/sh_dma.h>
@@ -479,19 +478,19 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan)
  * @sh_chan:	DMA channel
  * @flags:	DMA transfer flags
  * @dest:	destination DMA address, incremented when direction equals
- *		DMA_FROM_DEVICE or DMA_BIDIRECTIONAL
+ *		DMA_DEV_TO_MEM
  * @src:	source DMA address, incremented when direction equals
- *		DMA_TO_DEVICE or DMA_BIDIRECTIONAL
+ *		DMA_MEM_TO_DEV
  * @len:	DMA transfer length
  * @first:	if NULL, set to the current descriptor and cookie set to -EBUSY
  * @direction:	needed for slave DMA to decide which address to keep constant,
- *		equals DMA_BIDIRECTIONAL for MEMCPY
+ *		equals DMA_MEM_TO_MEM for MEMCPY
  * Returns 0 or an error
  * Locks: called with desc_lock held
  */
 static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
 	unsigned long flags, dma_addr_t *dest, dma_addr_t *src, size_t *len,
-	struct sh_desc **first, enum dma_data_direction direction)
+	struct sh_desc **first, enum dma_transfer_direction direction)
 {
 	struct sh_desc *new;
 	size_t copy_size;
@@ -531,9 +530,9 @@ static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
 	new->direction = direction;
 
 	*len -= copy_size;
-	if (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE)
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV)
 		*src += copy_size;
-	if (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE)
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM)
 		*dest += copy_size;
 
 	return new;
@@ -546,12 +545,12 @@ static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
  * converted to scatter-gather to guarantee consistent locking and a correct
  * list manipulation. For slave DMA direction carries the usual meaning, and,
  * logically, the SG list is RAM and the addr variable contains slave address,
- * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_BIDIRECTIONAL
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
  * and the SG list contains only one element and points at the source buffer.
  */
 static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_chan,
 	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct scatterlist *sg;
 	struct sh_desc *first = NULL, *new = NULL /* compiler... */;
@@ -592,7 +591,7 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_c
 			dev_dbg(sh_chan->dev, "Add SG #%d@%p[%d], dma %llx\n",
 				i, sg, len, (unsigned long long)sg_addr);
 
-			if (direction == DMA_FROM_DEVICE)
+			if (direction == DMA_DEV_TO_MEM)
 				new = sh_dmae_add_desc(sh_chan, flags,
 						&sg_addr, addr, &len, &first,
 						direction);
@@ -646,13 +645,13 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
 	sg_dma_address(&sg) = dma_src;
 	sg_dma_len(&sg) = len;
 
-	return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_BIDIRECTIONAL,
+	return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
 			       flags);
 }
 
 static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct sh_dmae_slave *param;
 	struct sh_dmae_chan *sh_chan;
@@ -996,7 +995,7 @@ static void dmae_do_tasklet(unsigned long data)
 	spin_lock_irq(&sh_chan->desc_lock);
 	list_for_each_entry(desc, &sh_chan->ld_queue, node) {
 		if (desc->mark == DESC_SUBMITTED &&
-		    ((desc->direction == DMA_FROM_DEVICE &&
+		    ((desc->direction == DMA_DEV_TO_MEM &&
 		      (desc->hw.dar + desc->hw.tcr) == dar_buf) ||
 		     (desc->hw.sar + desc->hw.tcr) == sar_buf)) {
 			dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n",
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 467e4dcb20a0..0c6cbacb8321 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -216,7 +216,7 @@ struct d40_chan {
 	struct d40_log_lli_full		*lcpa;
 	/* Runtime reconfiguration */
 	dma_addr_t			runtime_addr;
-	enum dma_data_direction		runtime_direction;
+	enum dma_transfer_direction	runtime_direction;
 };
 
 /**
@@ -1854,7 +1854,7 @@ err:
 }
 
 static dma_addr_t
-d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
+d40_get_dev_addr(struct d40_chan *chan, enum dma_transfer_direction direction)
 {
 	struct stedma40_platform_data *plat = chan->base->plat_data;
 	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
@@ -1863,9 +1863,9 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
 	if (chan->runtime_addr)
 		return chan->runtime_addr;
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		addr = plat->dev_rx[cfg->src_dev_type];
-	else if (direction == DMA_TO_DEVICE)
+	else if (direction == DMA_MEM_TO_DEV)
 		addr = plat->dev_tx[cfg->dst_dev_type];
 
 	return addr;
@@ -1874,7 +1874,7 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
 static struct dma_async_tx_descriptor *
 d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 	    struct scatterlist *sg_dst, unsigned int sg_len,
-	    enum dma_data_direction direction, unsigned long dma_flags)
+	    enum dma_transfer_direction direction, unsigned long dma_flags)
 {
 	struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
 	dma_addr_t src_dev_addr = 0;
@@ -1901,9 +1901,9 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 	if (direction != DMA_NONE) {
 		dma_addr_t dev_addr = d40_get_dev_addr(chan, direction);
 
-		if (direction == DMA_FROM_DEVICE)
+		if (direction == DMA_DEV_TO_MEM)
 			src_dev_addr = dev_addr;
-		else if (direction == DMA_TO_DEVICE)
+		else if (direction == DMA_MEM_TO_DEV)
 			dst_dev_addr = dev_addr;
 	}
 
@@ -2107,10 +2107,10 @@ d40_prep_memcpy_sg(struct dma_chan *chan,
 static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 							 struct scatterlist *sgl,
 							 unsigned int sg_len,
-							 enum dma_data_direction direction,
+							 enum dma_transfer_direction direction,
 							 unsigned long dma_flags)
 {
-	if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE)
+	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV)
 		return NULL;
 
 	return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
@@ -2119,7 +2119,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 static struct dma_async_tx_descriptor *
 dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 		     size_t buf_len, size_t period_len,
-		     enum dma_data_direction direction)
+		     enum dma_transfer_direction direction)
 {
 	unsigned int periods = buf_len / period_len;
 	struct dma_async_tx_descriptor *txd;
@@ -2268,7 +2268,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
 	dst_addr_width = config->dst_addr_width;
 	dst_maxburst = config->dst_maxburst;
 
-	if (config->direction == DMA_FROM_DEVICE) {
+	if (config->direction == DMA_DEV_TO_MEM) {
 		dma_addr_t dev_addr_rx =
 			d40c->base->plat_data->dev_rx[cfg->src_dev_type];
 
@@ -2291,7 +2291,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
 		if (dst_maxburst == 0)
 			dst_maxburst = src_maxburst;
 
-	} else if (config->direction == DMA_TO_DEVICE) {
+	} else if (config->direction == DMA_MEM_TO_DEV) {
 		dma_addr_t dev_addr_tx =
 			d40c->base->plat_data->dev_tx[cfg->dst_dev_type];
 
@@ -2356,7 +2356,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
 		"configured channel %s for %s, data width %d/%d, "
 		"maxburst %d/%d elements, LE, no flow control\n",
 		dma_chan_name(chan),
-		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		(config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
 		src_addr_width, dst_addr_width,
 		src_maxburst, dst_maxburst);
 
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index a4a398f2ef61..8c880729b094 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -90,7 +90,7 @@ struct timb_dma_chan {
 	struct list_head	queue;
 	struct list_head	free_list;
 	unsigned int		bytes_per_line;
-	enum dma_data_direction	direction;
+	enum dma_transfer_direction	direction;
 	unsigned int		descs; /* Descriptors to allocate */
 	unsigned int		desc_elems; /* number of elems per descriptor */
 };
@@ -235,7 +235,7 @@ static void __td_start_dma(struct timb_dma_chan *td_chan)
 		"td_chan: %p, chan: %d, membase: %p\n",
 		td_chan, td_chan->chan.chan_id, td_chan->membase);
 
-	if (td_chan->direction == DMA_FROM_DEVICE) {
+	if (td_chan->direction == DMA_DEV_TO_MEM) {
 
 		/* descriptor address */
 		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR);
@@ -278,7 +278,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
 		txd->cookie);
 
 	/* make sure to stop the transfer */
-	if (td_chan->direction == DMA_FROM_DEVICE)
+	if (td_chan->direction == DMA_DEV_TO_MEM)
 		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER);
 /* Currently no support for stopping DMA transfers
 	else
@@ -398,7 +398,7 @@ static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan)
 	td_desc->txd.flags = DMA_CTRL_ACK;
 
 	td_desc->txd.phys = dma_map_single(chan2dmadev(chan),
-		td_desc->desc_list, td_desc->desc_list_len, DMA_TO_DEVICE);
+		td_desc->desc_list, td_desc->desc_list_len, DMA_MEM_TO_DEV);
 
 	err = dma_mapping_error(chan2dmadev(chan), td_desc->txd.phys);
 	if (err) {
@@ -419,7 +419,7 @@ static void td_free_desc(struct timb_dma_desc *td_desc)
 {
 	dev_dbg(chan2dev(td_desc->txd.chan), "Freeing desc: %p\n", td_desc);
 	dma_unmap_single(chan2dmadev(td_desc->txd.chan), td_desc->txd.phys,
-		td_desc->desc_list_len, DMA_TO_DEVICE);
+		td_desc->desc_list_len, DMA_MEM_TO_DEV);
 
 	kfree(td_desc->desc_list);
 	kfree(td_desc);
@@ -558,7 +558,7 @@ static void td_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
 	struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct timb_dma_chan *td_chan =
 		container_of(chan, struct timb_dma_chan, chan);
@@ -606,7 +606,7 @@ static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
 	}
 
 	dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys,
-		td_desc->desc_list_len, DMA_TO_DEVICE);
+		td_desc->desc_list_len, DMA_MEM_TO_DEV);
 
 	return &td_desc->txd;
 }
@@ -775,8 +775,8 @@ static int __devinit td_probe(struct platform_device *pdev)
 		td_chan->descs = pchan->descriptors;
 		td_chan->desc_elems = pchan->descriptor_elements;
 		td_chan->bytes_per_line = pchan->bytes_per_line;
-		td_chan->direction = pchan->rx ? DMA_FROM_DEVICE :
-			DMA_TO_DEVICE;
+		td_chan->direction = pchan->rx ? DMA_DEV_TO_MEM :
+			DMA_MEM_TO_DEV;
 
 		td_chan->membase = td->membase +
 			(i / 2) * TIMBDMA_INSTANCE_OFFSET +
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index cbd83e362b5e..6122c364cf11 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -845,7 +845,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 static struct dma_async_tx_descriptor *
 txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
@@ -860,9 +860,9 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	BUG_ON(!ds || !ds->reg_width);
 	if (ds->tx_reg)
-		BUG_ON(direction != DMA_TO_DEVICE);
+		BUG_ON(direction != DMA_MEM_TO_DEV);
 	else
-		BUG_ON(direction != DMA_FROM_DEVICE);
+		BUG_ON(direction != DMA_DEV_TO_MEM);
 	if (unlikely(!sg_len))
 		return NULL;
 
@@ -882,7 +882,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		mem = sg_dma_address(sg);
 
 		if (__is_dmac64(ddev)) {
-			if (direction == DMA_TO_DEVICE) {
+			if (direction == DMA_MEM_TO_DEV) {
 				desc->hwdesc.SAR = mem;
 				desc->hwdesc.DAR = ds->tx_reg;
 			} else {
@@ -891,7 +891,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			}
 			desc->hwdesc.CNTR = sg_dma_len(sg);
 		} else {
-			if (direction == DMA_TO_DEVICE) {
+			if (direction == DMA_MEM_TO_DEV) {
 				desc->hwdesc32.SAR = mem;
 				desc->hwdesc32.DAR = ds->tx_reg;
 			} else {
@@ -900,7 +900,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			}
 			desc->hwdesc32.CNTR = sg_dma_len(sg);
 		}
-		if (direction == DMA_TO_DEVICE) {
+		if (direction == DMA_MEM_TO_DEV) {
 			sai = ds->reg_width;
 			dai = 0;
 		} else {
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 9eabffbc4e50..033f6aa670de 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -134,7 +134,7 @@ struct pl08x_txd {
 	struct dma_async_tx_descriptor tx;
 	struct list_head node;
 	struct list_head dsg_list;
-	enum dma_data_direction	direction;
+	enum dma_transfer_direction direction;
 	dma_addr_t llis_bus;
 	struct pl08x_lli *llis_va;
 	/* Default cctl value for LLIs */
@@ -197,7 +197,7 @@ struct pl08x_dma_chan {
 	dma_addr_t dst_addr;
 	u32 src_cctl;
 	u32 dst_cctl;
-	enum dma_data_direction	runtime_direction;
+	enum dma_transfer_direction runtime_direction;
 	dma_cookie_t lc;
 	struct list_head pend_list;
 	struct pl08x_txd *at;
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 4bfe0a2f7d50..f2c64f92c4a0 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -127,7 +127,7 @@ struct dw_cyclic_desc {
 
 struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_data_direction direction);
+		enum dma_transfer_direction direction);
 void dw_dma_cyclic_free(struct dma_chan *chan);
 int dw_dma_cyclic_start(struct dma_chan *chan);
 void dw_dma_cyclic_stop(struct dma_chan *chan);
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index cb2dd118cc0f..62ef6938da10 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -30,7 +30,7 @@ struct sh_desc {
 	struct sh_dmae_regs hw;
 	struct list_head node;
 	struct dma_async_tx_descriptor async_tx;
-	enum dma_data_direction direction;
+	enum dma_transfer_direction direction;
 	dma_cookie_t cookie;
 	size_t partial;
 	int chunks;
-- 
cgit v1.2.3


From 0314322d1303065568f33869cbd01d7f7367efc4 Mon Sep 17 00:00:00 2001
From: Michael Witten <mfwitten@gmail.com>
Date: Fri, 29 Jul 2011 12:53:20 +0000
Subject: Docs: Pedantry: [Cc]ordic -> CORDIC

According to:

  http://en.wikipedia.org/wiki/CORDIC

it stands for:

  *CO*ordinate *R*otation *DI*gital *C*omputer

Signed-off-by: Michael Witten <mfwitten@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/cordic.h | 2 +-
 lib/Kconfig            | 4 ++--
 lib/cordic.c           | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cordic.h b/include/linux/cordic.h
index f932093e20c2..686f6bf8157d 100644
--- a/include/linux/cordic.h
+++ b/include/linux/cordic.h
@@ -36,7 +36,7 @@ struct cordic_iq {
  * @coord: function output parameter holding the i/q coordinate.
  *
  * The function calculates the i/q coordinate for a given angle using
- * cordic algorithm. The coordinate consists of a real (i) and an
+ * CORDIC algorithm. The coordinate consists of a real (i) and an
  * imaginary (q) part. The real part is essentially the cosine of the
  * angle and the imaginary part is the sine of the angle. The returned
  * values are scaled by 2^16 for precision. The range for theta is
diff --git a/lib/Kconfig b/lib/Kconfig
index 6c695ff9caba..0bb69f674486 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -270,9 +270,9 @@ config AVERAGE
 	  If unsure, say N.
 
 config CORDIC
-	tristate "Cordic function"
+	tristate "CORDIC function"
 	help
-	  The option provides arithmetic function using cordic algorithm
+	  The option provides arithmetic function using CORDIC algorithm
 	  so its calculations are in fixed point. Modules can select this
 	  when they require this function. Module will be called cordic.
 
diff --git a/lib/cordic.c b/lib/cordic.c
index aa27a88d7e04..09b34036534d 100644
--- a/lib/cordic.c
+++ b/lib/cordic.c
@@ -96,6 +96,6 @@ struct cordic_iq cordic_calc_iq(s32 theta)
 }
 EXPORT_SYMBOL(cordic_calc_iq);
 
-MODULE_DESCRIPTION("Cordic functions");
+MODULE_DESCRIPTION("CORDIC functions");
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_LICENSE("Dual BSD/GPL");
-- 
cgit v1.2.3


From c620846967f335aafb874b9043a10a8fdda68a24 Mon Sep 17 00:00:00 2001
From: Michael Witten <mfwitten@gmail.com>
Date: Fri, 29 Jul 2011 13:00:47 +0000
Subject: Docs: wording: Insert `the'

Signed-off-by: Michael Witten <mfwitten@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/cordic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cordic.h b/include/linux/cordic.h
index 686f6bf8157d..cf68ca4a508c 100644
--- a/include/linux/cordic.h
+++ b/include/linux/cordic.h
@@ -35,7 +35,7 @@ struct cordic_iq {
  * @theta: angle in degrees for which i/q coordinate is to be calculated.
  * @coord: function output parameter holding the i/q coordinate.
  *
- * The function calculates the i/q coordinate for a given angle using
+ * The function calculates the i/q coordinate for a given angle using the
  * CORDIC algorithm. The coordinate consists of a real (i) and an
  * imaginary (q) part. The real part is essentially the cosine of the
  * angle and the imaginary part is the sine of the angle. The returned
-- 
cgit v1.2.3


From b60503ba432b16fc84442a84e29a7aad2c0c363d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 20 Jan 2011 12:50:14 -0500
Subject: NVMe: New driver

This driver is for devices that follow the NVM Express standard

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 Documentation/ioctl/ioctl-number.txt |    1 +
 drivers/block/Kconfig                |   11 +
 drivers/block/Makefile               |    1 +
 drivers/block/nvme.c                 | 1043 ++++++++++++++++++++++++++++++++++
 include/linux/nvme.h                 |  343 +++++++++++
 5 files changed, 1399 insertions(+)
 create mode 100644 drivers/block/nvme.c
 create mode 100644 include/linux/nvme.h

(limited to 'include/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 54078ed96b37..4840334ea97b 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -149,6 +149,7 @@ Code  Seq#(hex)	Include File		Comments
 'M'	01-03	drivers/scsi/megaraid/megaraid_sas.h
 'M'	00-0F	drivers/video/fsl-diu-fb.h	conflict!
 'N'	00-1F	drivers/usb/scanner.h
+'N'	40-7F	drivers/block/nvme.c
 'O'     00-06   mtd/ubi-user.h		UBI
 'P'	all	linux/soundcard.h	conflict!
 'P'	60-6F	sound/sscape_ioctl.h	conflict!
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 6f07ec1c2f58..35e56e1c948f 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -315,6 +315,17 @@ config BLK_DEV_NBD
 
 	  If unsure, say N.
 
+config BLK_DEV_NVME
+	tristate "NVM Express block device"
+	depends on PCI
+	---help---
+	  The NVM Express driver is for solid state drives directly
+	  connected to the PCI or PCI Express bus.  If you know you
+	  don't have one of these, it is safe to answer N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called nvme.
+
 config BLK_DEV_OSD
 	tristate "OSD object-as-blkdev support"
 	depends on SCSI_OSD_ULD
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 76646e9a1c91..349539ad3ad9 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_XILINX_SYSACE)	+= xsysace.o
 obj-$(CONFIG_CDROM_PKTCDVD)	+= pktcdvd.o
 obj-$(CONFIG_MG_DISK)		+= mg_disk.o
 obj-$(CONFIG_SUNVDC)		+= sunvdc.o
+obj-$(CONFIG_BLK_DEV_NVME)	+= nvme.o
 obj-$(CONFIG_BLK_DEV_OSD)	+= osdblk.o
 
 obj-$(CONFIG_BLK_DEV_UMEM)	+= umem.o
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
new file mode 100644
index 000000000000..ef66eccc2aa2
--- /dev/null
+++ b/drivers/block/nvme.c
@@ -0,0 +1,1043 @@
+/*
+ * NVM Express device driver
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/nvme.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+
+#define NVME_Q_DEPTH 1024
+#define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
+#define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
+#define NVME_MINORS 64
+
+static int nvme_major;
+module_param(nvme_major, int, 0);
+
+/*
+ * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+	struct list_head node;
+	struct nvme_queue **queues;
+	u32 __iomem *dbs;
+	struct pci_dev *pci_dev;
+	int instance;
+	int queue_count;
+	u32 ctrl_config;
+	struct msix_entry *entry;
+	struct nvme_bar __iomem *bar;
+	struct list_head namespaces;
+};
+
+/*
+ * An NVM Express namespace is equivalent to a SCSI LUN
+ */
+struct nvme_ns {
+	struct list_head list;
+
+	struct nvme_dev *dev;
+	struct request_queue *queue;
+	struct gendisk *disk;
+
+	int ns_id;
+	int lba_shift;
+};
+
+/*
+ * An NVM Express queue.  Each device has at least two (one for admin
+ * commands and one for I/O commands).
+ */
+struct nvme_queue {
+	struct device *q_dmadev;
+	spinlock_t q_lock;
+	struct nvme_command *sq_cmds;
+	volatile struct nvme_completion *cqes;
+	dma_addr_t sq_dma_addr;
+	dma_addr_t cq_dma_addr;
+	wait_queue_head_t sq_full;
+	struct bio_list sq_cong;
+	u32 __iomem *q_db;
+	u16 q_depth;
+	u16 cq_vector;
+	u16 sq_head;
+	u16 sq_tail;
+	u16 cq_head;
+	u16 cq_cycle;
+	unsigned long cmdid_data[];
+};
+
+/*
+ * Check we didin't inadvertently grow the command struct
+ */
+static inline void _nvme_check_size(void)
+{
+	BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
+}
+
+/**
+ * alloc_cmdid - Allocate a Command ID
+ * @param nvmeq The queue that will be used for this command
+ * @param ctx A pointer that will be passed to the handler
+ * @param handler The ID of the handler to call
+ *
+ * Allocate a Command ID for a queue.  The data passed in will
+ * be passed to the completion handler.  This is implemented by using
+ * the bottom two bits of the ctx pointer to store the handler ID.
+ * Passing in a pointer that's not 4-byte aligned will cause a BUG.
+ * We can change this if it becomes a problem.
+ */
+static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, int handler)
+{
+	int depth = nvmeq->q_depth;
+	unsigned long data = (unsigned long)ctx | handler;
+	int cmdid;
+
+	BUG_ON((unsigned long)ctx & 3);
+
+	do {
+		cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth);
+		if (cmdid >= depth)
+			return -EBUSY;
+	} while (test_and_set_bit(cmdid, nvmeq->cmdid_data));
+
+	nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(depth)] = data;
+	return cmdid;
+}
+
+static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
+								int handler)
+{
+	int cmdid;
+	wait_event_killable(nvmeq->sq_full,
+			(cmdid = alloc_cmdid(nvmeq, ctx, handler)) >= 0);
+	return (cmdid < 0) ? -EINTR : cmdid;
+}
+
+/* If you need more than four handlers, you'll need to change how
+ * alloc_cmdid and nvme_process_cq work
+ */
+enum {
+	sync_completion_id = 0,
+	bio_completion_id,
+};
+
+static unsigned long free_cmdid(struct nvme_queue *nvmeq, int cmdid)
+{
+	unsigned long data;
+
+	data = nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(nvmeq->q_depth)];
+	clear_bit(cmdid, nvmeq->cmdid_data);
+	wake_up(&nvmeq->sq_full);
+	return data;
+}
+
+static struct nvme_queue *get_nvmeq(struct nvme_ns *ns)
+{
+	return ns->dev->queues[1];
+}
+
+static void put_nvmeq(struct nvme_queue *nvmeq)
+{
+}
+
+/**
+ * nvme_submit_cmd: Copy a command into a queue and ring the doorbell
+ * @nvmeq: The queue to use
+ * @cmd: The command to send
+ *
+ * Safe to use from interrupt context
+ */
+static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+{
+	unsigned long flags;
+	u16 tail;
+	/* XXX: Need to check tail isn't going to overrun head */
+	spin_lock_irqsave(&nvmeq->q_lock, flags);
+	tail = nvmeq->sq_tail;
+	memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+	writel(tail, nvmeq->q_db);
+	if (++tail == nvmeq->q_depth)
+		tail = 0;
+	nvmeq->sq_tail = tail;
+	spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+
+	return 0;
+}
+
+struct nvme_req_info {
+	struct bio *bio;
+	int nents;
+	struct scatterlist sg[0];
+};
+
+/* XXX: use a mempool */
+static struct nvme_req_info *alloc_info(unsigned nseg, gfp_t gfp)
+{
+	return kmalloc(sizeof(struct nvme_req_info) +
+			sizeof(struct scatterlist) * nseg, gfp);
+}
+
+static void free_info(struct nvme_req_info *info)
+{
+	kfree(info);
+}
+
+static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
+						struct nvme_completion *cqe)
+{
+	struct nvme_req_info *info = ctx;
+	struct bio *bio = info->bio;
+	u16 status = le16_to_cpup(&cqe->status) >> 1;
+
+	dma_unmap_sg(nvmeq->q_dmadev, info->sg, info->nents,
+			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	free_info(info);
+	bio_endio(bio, status ? -EIO : 0);
+}
+
+static int nvme_map_bio(struct device *dev, struct nvme_req_info *info,
+		struct bio *bio, enum dma_data_direction dma_dir, int psegs)
+{
+	struct bio_vec *bvec;
+	struct scatterlist *sg = info->sg;
+	int i, nsegs;
+
+	sg_init_table(sg, psegs);
+	bio_for_each_segment(bvec, bio, i) {
+		sg_set_page(sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
+		/* XXX: handle non-mergable here */
+		nsegs++;
+	}
+	info->nents = nsegs;
+
+	return dma_map_sg(dev, info->sg, info->nents, dma_dir);
+}
+
+static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+								struct bio *bio)
+{
+	struct nvme_rw_command *cmnd;
+	struct nvme_req_info *info;
+	enum dma_data_direction dma_dir;
+	int cmdid;
+	u16 control;
+	u32 dsmgmt;
+	unsigned long flags;
+	int psegs = bio_phys_segments(ns->queue, bio);
+
+	info = alloc_info(psegs, GFP_NOIO);
+	if (!info)
+		goto congestion;
+	info->bio = bio;
+
+	cmdid = alloc_cmdid(nvmeq, info, bio_completion_id);
+	if (unlikely(cmdid < 0))
+		goto free_info;
+
+	control = 0;
+	if (bio->bi_rw & REQ_FUA)
+		control |= NVME_RW_FUA;
+	if (bio->bi_rw & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+		control |= NVME_RW_LR;
+
+	dsmgmt = 0;
+	if (bio->bi_rw & REQ_RAHEAD)
+		dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+
+	spin_lock_irqsave(&nvmeq->q_lock, flags);
+	cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail].rw;
+
+	if (bio_data_dir(bio)) {
+		cmnd->opcode = nvme_cmd_write;
+		dma_dir = DMA_TO_DEVICE;
+	} else {
+		cmnd->opcode = nvme_cmd_read;
+		dma_dir = DMA_FROM_DEVICE;
+	}
+
+	nvme_map_bio(nvmeq->q_dmadev, info, bio, dma_dir, psegs);
+
+	cmnd->flags = 1;
+	cmnd->command_id = cmdid;
+	cmnd->nsid = cpu_to_le32(ns->ns_id);
+	cmnd->prp1 = cpu_to_le64(sg_phys(info->sg));
+	/* XXX: Support more than one PRP */
+	cmnd->slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9));
+	cmnd->length = cpu_to_le16((bio->bi_size >> ns->lba_shift) - 1);
+	cmnd->control = cpu_to_le16(control);
+	cmnd->dsmgmt = cpu_to_le32(dsmgmt);
+
+	writel(nvmeq->sq_tail, nvmeq->q_db);
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+
+	spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+
+	return 0;
+
+ free_info:
+	free_info(info);
+ congestion:
+	return -EBUSY;
+}
+
+/*
+ * NB: return value of non-zero would mean that we were a stacking driver.
+ * make_request must always succeed.
+ */
+static int nvme_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct nvme_ns *ns = q->queuedata;
+	struct nvme_queue *nvmeq = get_nvmeq(ns);
+
+	if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
+		blk_set_queue_congested(q, rw_is_sync(bio->bi_rw));
+		bio_list_add(&nvmeq->sq_cong, bio);
+	}
+	put_nvmeq(nvmeq);
+
+	return 0;
+}
+
+struct sync_cmd_info {
+	struct task_struct *task;
+	u32 result;
+	int status;
+};
+
+static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
+						struct nvme_completion *cqe)
+{
+	struct sync_cmd_info *cmdinfo = ctx;
+	cmdinfo->result = le32_to_cpup(&cqe->result);
+	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
+	wake_up_process(cmdinfo->task);
+}
+
+typedef void (*completion_fn)(struct nvme_queue *, void *,
+						struct nvme_completion *);
+
+static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
+{
+	u16 head, cycle;
+
+	static const completion_fn completions[4] = {
+		[sync_completion_id] = sync_completion,
+		[bio_completion_id]  = bio_completion,
+	};
+
+	head = nvmeq->cq_head;
+	cycle = nvmeq->cq_cycle;
+
+	for (;;) {
+		unsigned long data;
+		void *ptr;
+		unsigned char handler;
+		struct nvme_completion cqe = nvmeq->cqes[head];
+		if ((le16_to_cpu(cqe.status) & 1) != cycle)
+			break;
+		nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
+		if (++head == nvmeq->q_depth) {
+			head = 0;
+			cycle = !cycle;
+		}
+
+		data = free_cmdid(nvmeq, cqe.command_id);
+		handler = data & 3;
+		ptr = (void *)(data & ~3UL);
+		completions[handler](nvmeq, ptr, &cqe);
+	}
+
+	/* If the controller ignores the cq head doorbell and continuously
+	 * writes to the queue, it is theoretically possible to wrap around
+	 * the queue twice and mistakenly return IRQ_NONE.  Linux only
+	 * requires that 0.1% of your interrupts are handled, so this isn't
+	 * a big problem.
+	 */
+	if (head == nvmeq->cq_head && cycle == nvmeq->cq_cycle)
+		return IRQ_NONE;
+
+	writel(head, nvmeq->q_db + 1);
+	nvmeq->cq_head = head;
+	nvmeq->cq_cycle = cycle;
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t nvme_irq(int irq, void *data)
+{
+	return nvme_process_cq(data);
+}
+
+/*
+ * Returns 0 on success.  If the result is negative, it's a Linux error code;
+ * if the result is positive, it's an NVM Express status code
+ */
+static int nvme_submit_sync_cmd(struct nvme_queue *q, struct nvme_command *cmd,
+								u32 *result)
+{
+	int cmdid;
+	struct sync_cmd_info cmdinfo;
+
+	cmdinfo.task = current;
+	cmdinfo.status = -EINTR;
+
+	cmdid = alloc_cmdid_killable(q, &cmdinfo, sync_completion_id);
+	if (cmdid < 0)
+		return cmdid;
+	cmd->common.command_id = cmdid;
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	nvme_submit_cmd(q, cmd);
+	schedule();
+
+	if (result)
+		*result = cmdinfo.result;
+
+	return cmdinfo.status;
+}
+
+static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
+								u32 *result)
+{
+	return nvme_submit_sync_cmd(dev->queues[0], cmd, result);
+}
+
+static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
+{
+	int status;
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.delete_queue.opcode = opcode;
+	c.delete_queue.qid = cpu_to_le16(id);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	if (status)
+		return -EIO;
+	return 0;
+}
+
+static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
+						struct nvme_queue *nvmeq)
+{
+	int status;
+	struct nvme_command c;
+	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
+
+	memset(&c, 0, sizeof(c));
+	c.create_cq.opcode = nvme_admin_create_cq;
+	c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
+	c.create_cq.cqid = cpu_to_le16(qid);
+	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+	c.create_cq.cq_flags = cpu_to_le16(flags);
+	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	if (status)
+		return -EIO;
+	return 0;
+}
+
+static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
+						struct nvme_queue *nvmeq)
+{
+	int status;
+	struct nvme_command c;
+	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+
+	memset(&c, 0, sizeof(c));
+	c.create_sq.opcode = nvme_admin_create_sq;
+	c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
+	c.create_sq.sqid = cpu_to_le16(qid);
+	c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+	c.create_sq.sq_flags = cpu_to_le16(flags);
+	c.create_sq.cqid = cpu_to_le16(qid);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	if (status)
+		return -EIO;
+	return 0;
+}
+
+static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
+{
+	return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
+}
+
+static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
+{
+	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
+}
+
+static void nvme_free_queue(struct nvme_dev *dev, int qid)
+{
+	struct nvme_queue *nvmeq = dev->queues[qid];
+
+	free_irq(dev->entry[nvmeq->cq_vector].vector, nvmeq);
+
+	/* Don't tell the adapter to delete the admin queue */
+	if (qid) {
+		adapter_delete_sq(dev, qid);
+		adapter_delete_cq(dev, qid);
+	}
+
+	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
+				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
+	dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+	kfree(nvmeq);
+}
+
+static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
+							int depth, int vector)
+{
+	struct device *dmadev = &dev->pci_dev->dev;
+	unsigned extra = (depth + BITS_TO_LONGS(depth)) * sizeof(long);
+	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
+	if (!nvmeq)
+		return NULL;
+
+	nvmeq->cqes = dma_alloc_coherent(dmadev, CQ_SIZE(depth),
+					&nvmeq->cq_dma_addr, GFP_KERNEL);
+	if (!nvmeq->cqes)
+		goto free_nvmeq;
+	memset((void *)nvmeq->cqes, 0, CQ_SIZE(depth));
+
+	nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
+					&nvmeq->sq_dma_addr, GFP_KERNEL);
+	if (!nvmeq->sq_cmds)
+		goto free_cqdma;
+
+	nvmeq->q_dmadev = dmadev;
+	spin_lock_init(&nvmeq->q_lock);
+	nvmeq->cq_head = 0;
+	nvmeq->cq_cycle = 1;
+	init_waitqueue_head(&nvmeq->sq_full);
+	bio_list_init(&nvmeq->sq_cong);
+	nvmeq->q_db = &dev->dbs[qid * 2];
+	nvmeq->q_depth = depth;
+	nvmeq->cq_vector = vector;
+
+	return nvmeq;
+
+ free_cqdma:
+	dma_free_coherent(dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes,
+							nvmeq->cq_dma_addr);
+ free_nvmeq:
+	kfree(nvmeq);
+	return NULL;
+}
+
+static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev,
+					int qid, int cq_size, int vector)
+{
+	int result;
+	struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
+
+	result = adapter_alloc_cq(dev, qid, nvmeq);
+	if (result < 0)
+		goto free_nvmeq;
+
+	result = adapter_alloc_sq(dev, qid, nvmeq);
+	if (result < 0)
+		goto release_cq;
+
+	result = request_irq(dev->entry[vector].vector, nvme_irq,
+				IRQF_DISABLED | IRQF_SHARED, "nvme", nvmeq);
+	if (result < 0)
+		goto release_sq;
+
+	return nvmeq;
+
+ release_sq:
+	adapter_delete_sq(dev, qid);
+ release_cq:
+	adapter_delete_cq(dev, qid);
+ free_nvmeq:
+	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
+				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
+	dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+	kfree(nvmeq);
+	return NULL;
+}
+
+static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
+{
+	int result;
+	u32 aqa;
+	struct nvme_queue *nvmeq;
+
+	dev->dbs = ((void __iomem *)dev->bar) + 4096;
+
+	nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
+
+	aqa = nvmeq->q_depth - 1;
+	aqa |= aqa << 16;
+
+	dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
+	dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
+	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+
+	writel(aqa, &dev->bar->aqa);
+	writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
+	writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
+	writel(dev->ctrl_config, &dev->bar->cc);
+
+	while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
+		msleep(100);
+		if (fatal_signal_pending(current))
+			return -EINTR;
+	}
+
+	result = request_irq(dev->entry[0].vector, nvme_irq,
+			IRQF_DISABLED | IRQF_SHARED, "nvme admin", nvmeq);
+	dev->queues[0] = nvmeq;
+	return result;
+}
+
+static int nvme_identify(struct nvme_ns *ns, void __user *addr, int cns)
+{
+	struct nvme_dev *dev = ns->dev;
+	int status;
+	struct nvme_command c;
+	void *page;
+	dma_addr_t dma_addr;
+
+	page = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
+								GFP_KERNEL);
+
+	memset(&c, 0, sizeof(c));
+	c.identify.opcode = nvme_admin_identify;
+	c.identify.nsid = cns ? 0 : cpu_to_le32(ns->ns_id);
+	c.identify.prp1 = cpu_to_le64(dma_addr);
+	c.identify.cns = cpu_to_le32(cns);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+
+	if (status)
+		status = -EIO;
+	else if (copy_to_user(addr, page, 4096))
+		status = -EFAULT;
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, page, dma_addr);
+
+	return status;
+}
+
+static int nvme_get_range_type(struct nvme_ns *ns, void __user *addr)
+{
+	struct nvme_dev *dev = ns->dev;
+	int status;
+	struct nvme_command c;
+	void *page;
+	dma_addr_t dma_addr;
+
+	page = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
+								GFP_KERNEL);
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode = nvme_admin_get_features;
+	c.features.nsid = cpu_to_le32(ns->ns_id);
+	c.features.prp1 = cpu_to_le64(dma_addr);
+	c.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+
+	/* XXX: Assuming first range for now */
+	if (status)
+		status = -EIO;
+	else if (copy_to_user(addr, page, 64))
+		status = -EFAULT;
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, page, dma_addr);
+
+	return status;
+}
+
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
+							unsigned long arg)
+{
+	struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+	switch (cmd) {
+	case NVME_IOCTL_IDENTIFY_NS:
+		return nvme_identify(ns, (void __user *)arg, 0);
+	case NVME_IOCTL_IDENTIFY_CTRL:
+		return nvme_identify(ns, (void __user *)arg, 1);
+	case NVME_IOCTL_GET_RANGE_TYPE:
+		return nvme_get_range_type(ns, (void __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct block_device_operations nvme_fops = {
+	.owner		= THIS_MODULE,
+	.ioctl		= nvme_ioctl,
+};
+
+static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int index,
+			struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
+{
+	struct nvme_ns *ns;
+	struct gendisk *disk;
+	int lbaf;
+
+	if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
+		return NULL;
+
+	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+	if (!ns)
+		return NULL;
+	ns->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!ns->queue)
+		goto out_free_ns;
+	ns->queue->queue_flags = QUEUE_FLAG_DEFAULT | QUEUE_FLAG_NOMERGES |
+				QUEUE_FLAG_NONROT | QUEUE_FLAG_DISCARD;
+	blk_queue_make_request(ns->queue, nvme_make_request);
+	ns->dev = dev;
+	ns->queue->queuedata = ns;
+
+	disk = alloc_disk(NVME_MINORS);
+	if (!disk)
+		goto out_free_queue;
+	ns->ns_id = index;
+	ns->disk = disk;
+	lbaf = id->flbas & 0xf;
+	ns->lba_shift = id->lbaf[lbaf].ds;
+
+	disk->major = nvme_major;
+	disk->minors = NVME_MINORS;
+	disk->first_minor = NVME_MINORS * index;
+	disk->fops = &nvme_fops;
+	disk->private_data = ns;
+	disk->queue = ns->queue;
+	sprintf(disk->disk_name, "nvme%dn%d", dev->instance, index);
+	set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+	return ns;
+
+ out_free_queue:
+	blk_cleanup_queue(ns->queue);
+ out_free_ns:
+	kfree(ns);
+	return NULL;
+}
+
+static void nvme_ns_free(struct nvme_ns *ns)
+{
+	put_disk(ns->disk);
+	blk_cleanup_queue(ns->queue);
+	kfree(ns);
+}
+
+static int set_queue_count(struct nvme_dev *dev, int sq_count, int cq_count)
+{
+	int status;
+	u32 result;
+	struct nvme_command c;
+	u32 q_count = (sq_count - 1) | ((cq_count - 1) << 16);
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode = nvme_admin_get_features;
+	c.features.fid = cpu_to_le32(NVME_FEAT_NUM_QUEUES);
+	c.features.dword11 = cpu_to_le32(q_count);
+
+	status = nvme_submit_admin_cmd(dev, &c, &result);
+	if (status)
+		return -EIO;
+	return min(result & 0xffff, result >> 16) + 1;
+}
+
+/* XXX: Create per-CPU queues */
+static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
+{
+	int this_cpu;
+
+	set_queue_count(dev, 1, 1);
+
+	this_cpu = get_cpu();
+	dev->queues[1] = nvme_create_queue(dev, 1, NVME_Q_DEPTH, this_cpu);
+	put_cpu();
+	if (!dev->queues[1])
+		return -ENOMEM;
+	dev->queue_count++;
+
+	return 0;
+}
+
+static void nvme_free_queues(struct nvme_dev *dev)
+{
+	int i;
+
+	for (i = dev->queue_count - 1; i >= 0; i--)
+		nvme_free_queue(dev, i);
+}
+
+static int __devinit nvme_dev_add(struct nvme_dev *dev)
+{
+	int res, nn, i;
+	struct nvme_ns *ns, *next;
+	void *id;
+	dma_addr_t dma_addr;
+	struct nvme_command cid, crt;
+
+	res = nvme_setup_io_queues(dev);
+	if (res)
+		return res;
+
+	/* XXX: Switch to a SG list once prp2 works */
+	id = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
+								GFP_KERNEL);
+
+	memset(&cid, 0, sizeof(cid));
+	cid.identify.opcode = nvme_admin_identify;
+	cid.identify.nsid = 0;
+	cid.identify.prp1 = cpu_to_le64(dma_addr);
+	cid.identify.cns = cpu_to_le32(1);
+
+	res = nvme_submit_admin_cmd(dev, &cid, NULL);
+	if (res) {
+		res = -EIO;
+		goto out_free;
+	}
+
+	nn = le32_to_cpup(&((struct nvme_id_ctrl *)id)->nn);
+
+	cid.identify.cns = 0;
+	memset(&crt, 0, sizeof(crt));
+	crt.features.opcode = nvme_admin_get_features;
+	crt.features.prp1 = cpu_to_le64(dma_addr + 4096);
+	crt.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
+
+	for (i = 0; i < nn; i++) {
+		cid.identify.nsid = cpu_to_le32(i);
+		res = nvme_submit_admin_cmd(dev, &cid, NULL);
+		if (res)
+			continue;
+
+		if (((struct nvme_id_ns *)id)->ncap == 0)
+			continue;
+
+		crt.features.nsid = cpu_to_le32(i);
+		res = nvme_submit_admin_cmd(dev, &crt, NULL);
+		if (res)
+			continue;
+
+		ns = nvme_alloc_ns(dev, i, id, id + 4096);
+		if (ns)
+			list_add_tail(&ns->list, &dev->namespaces);
+	}
+	list_for_each_entry(ns, &dev->namespaces, list)
+		add_disk(ns->disk);
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+	return 0;
+
+ out_free:
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		list_del(&ns->list);
+		nvme_ns_free(ns);
+	}
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+	return res;
+}
+
+static int nvme_dev_remove(struct nvme_dev *dev)
+{
+	struct nvme_ns *ns, *next;
+
+	/* TODO: wait all I/O finished or cancel them */
+
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		list_del(&ns->list);
+		del_gendisk(ns->disk);
+		nvme_ns_free(ns);
+	}
+
+	nvme_free_queues(dev);
+
+	return 0;
+}
+
+/* XXX: Use an ida or something to let remove / add work correctly */
+static void nvme_set_instance(struct nvme_dev *dev)
+{
+	static int instance;
+	dev->instance = instance++;
+}
+
+static void nvme_release_instance(struct nvme_dev *dev)
+{
+}
+
+static int __devinit nvme_probe(struct pci_dev *pdev,
+						const struct pci_device_id *id)
+{
+	int result = -ENOMEM;
+	struct nvme_dev *dev;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	dev->entry = kcalloc(num_possible_cpus(), sizeof(*dev->entry),
+								GFP_KERNEL);
+	if (!dev->entry)
+		goto free;
+	dev->queues = kcalloc(2, sizeof(void *), GFP_KERNEL);
+	if (!dev->queues)
+		goto free;
+
+	INIT_LIST_HEAD(&dev->namespaces);
+	dev->pci_dev = pdev;
+	pci_set_drvdata(pdev, dev);
+	dma_set_mask(&dev->pci_dev->dev, DMA_BIT_MASK(64));
+	nvme_set_instance(dev);
+
+	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
+	if (!dev->bar) {
+		result = -ENOMEM;
+		goto disable;
+	}
+
+	result = nvme_configure_admin_queue(dev);
+	if (result)
+		goto unmap;
+	dev->queue_count++;
+
+	result = nvme_dev_add(dev);
+	if (result)
+		goto delete;
+	return 0;
+
+ delete:
+	nvme_free_queues(dev);
+ unmap:
+	iounmap(dev->bar);
+ disable:
+	pci_disable_msix(pdev);
+	nvme_release_instance(dev);
+ free:
+	kfree(dev->queues);
+	kfree(dev->entry);
+	kfree(dev);
+	return result;
+}
+
+static void __devexit nvme_remove(struct pci_dev *pdev)
+{
+	struct nvme_dev *dev = pci_get_drvdata(pdev);
+	nvme_dev_remove(dev);
+	pci_disable_msix(pdev);
+	iounmap(dev->bar);
+	nvme_release_instance(dev);
+	kfree(dev->queues);
+	kfree(dev->entry);
+	kfree(dev);
+}
+
+/* These functions are yet to be implemented */
+#define nvme_error_detected NULL
+#define nvme_dump_registers NULL
+#define nvme_link_reset NULL
+#define nvme_slot_reset NULL
+#define nvme_error_resume NULL
+#define nvme_suspend NULL
+#define nvme_resume NULL
+
+static struct pci_error_handlers nvme_err_handler = {
+	.error_detected	= nvme_error_detected,
+	.mmio_enabled	= nvme_dump_registers,
+	.link_reset	= nvme_link_reset,
+	.slot_reset	= nvme_slot_reset,
+	.resume		= nvme_error_resume,
+};
+
+/* Move to pci_ids.h later */
+#define PCI_CLASS_STORAGE_EXPRESS	0x010802
+
+static DEFINE_PCI_DEVICE_TABLE(nvme_id_table) = {
+	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, nvme_id_table);
+
+static struct pci_driver nvme_driver = {
+	.name		= "nvme",
+	.id_table	= nvme_id_table,
+	.probe		= nvme_probe,
+	.remove		= __devexit_p(nvme_remove),
+	.suspend	= nvme_suspend,
+	.resume		= nvme_resume,
+	.err_handler	= &nvme_err_handler,
+};
+
+static int __init nvme_init(void)
+{
+	int result;
+
+	nvme_major = register_blkdev(nvme_major, "nvme");
+	if (nvme_major <= 0)
+		return -EBUSY;
+
+	result = pci_register_driver(&nvme_driver);
+	if (!result)
+		return 0;
+
+	unregister_blkdev(nvme_major, "nvme");
+	return result;
+}
+
+static void __exit nvme_exit(void)
+{
+	pci_unregister_driver(&nvme_driver);
+	unregister_blkdev(nvme_major, "nvme");
+}
+
+MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.1");
+module_init(nvme_init);
+module_exit(nvme_exit);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
new file mode 100644
index 000000000000..9ba53584f722
--- /dev/null
+++ b/include/linux/nvme.h
@@ -0,0 +1,343 @@
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _LINUX_NVME_H
+#define _LINUX_NVME_H
+
+#include <linux/types.h>
+
+struct nvme_bar {
+	__u64			cap;	/* Controller Capabilities */
+	__u32			vs;	/* Version */
+	__u32			ims;	/* Interrupt Mask Set */
+	__u32			imc;	/* Interrupt Mask Clear */
+	__u32			cc;	/* Controller Configuration */
+	__u32			csts;	/* Controller Status */
+	__u32			aqa;	/* Admin Queue Attributes */
+	__u64			asq;	/* Admin SQ Base Address */
+	__u64			acq;	/* Admin CQ Base Address */
+};
+
+enum {
+	NVME_CC_ENABLE		= 1 << 0,
+	NVME_CC_CSS_NVM		= 0 << 4,
+	NVME_CC_MPS_SHIFT	= 7,
+	NVME_CC_ARB_RR		= 0 << 11,
+	NVME_CC_ARB_WRRU	= 1 << 11,
+	NVME_CC_ARB_VS		= 3 << 11,
+	NVME_CC_SHN_NONE	= 0 << 13,
+	NVME_CC_SHN_NORMAL	= 1 << 13,
+	NVME_CC_SHN_ABRUPT	= 2 << 13,
+	NVME_CSTS_RDY		= 1 << 0,
+	NVME_CSTS_CFS		= 1 << 1,
+	NVME_CSTS_SHST_NORMAL	= 0 << 2,
+	NVME_CSTS_SHST_OCCUR	= 1 << 2,
+	NVME_CSTS_SHST_CMPLT	= 2 << 2,
+};
+
+#define NVME_VS(major, minor)	(major << 16 | minor)
+
+struct nvme_id_ctrl {
+	__le16			vid;
+	__le16			ssvid;
+	char			sn[20];
+	char			mn[40];
+	char			fr[8];
+	__le32			nn;
+	__u8			rab;
+	__u8			rsvd77[178];
+	__le16			oacs;
+	__u8			acl;
+	__u8			aerl;
+	__u8			frmw;
+	__u8			lpa;
+	__u8			elpe;
+	__u8			npss;
+	__u8			rsvd264[248];
+	__le64			psd[32];
+	__le16			oncs;
+	__le16			fuses;
+	__u8			fna;
+	__u8			vwc;
+	__le16			awun;
+	__le16			awupf;
+	__u8			rsvd778[246];
+	__u8			cmdset[2048];
+	__u8			vs[1024];
+};
+
+struct nvme_lbaf {
+	__le16			ms;
+	__u8			ds;
+	__u8			rp;
+};
+
+struct nvme_id_ns {
+	__le64			nsze;
+	__le64			ncap;
+	__le64			nuse;
+	__u8			nsfeat;
+	__u8			nlbaf;
+	__u8			flbas;
+	__u8			mc;
+	__u8			dpc;
+	__u8			dps;
+	__u8			rsvd30[98];
+	struct nvme_lbaf	lbaf[16];
+	__u8			rsvd192[192];
+	__u8			vs[3712];
+};
+
+enum {
+	NVME_NS_FEAT_THIN	= 1 << 0,
+	NVME_LBAF_RP_BEST	= 0,
+	NVME_LBAF_RP_BETTER	= 1,
+	NVME_LBAF_RP_GOOD	= 2,
+	NVME_LBAF_RP_DEGRADED	= 3,
+};
+
+struct nvme_lba_range_type {
+	__u8			type;
+	__u8			attributes;
+	__u8			rsvd2[14];
+	__u64			slba;
+	__u64			nlb;
+	__u8			guid[16];
+	__u8			rsvd48[16];
+};
+
+enum {
+	NVME_LBART_TYPE_FS	= 0x01,
+	NVME_LBART_TYPE_RAID	= 0x02,
+	NVME_LBART_TYPE_CACHE	= 0x03,
+	NVME_LBART_TYPE_SWAP	= 0x04,
+
+	NVME_LBART_ATTRIB_TEMP	= 1 << 0,
+	NVME_LBART_ATTRIB_HIDE	= 1 << 1,
+};
+
+/* I/O commands */
+
+enum nvme_opcode {
+	nvme_cmd_flush		= 0x00,
+	nvme_cmd_write		= 0x01,
+	nvme_cmd_read		= 0x02,
+	nvme_cmd_write_uncor	= 0x04,
+	nvme_cmd_compare	= 0x05,
+	nvme_cmd_dsm		= 0x09,
+};
+
+struct nvme_rw_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	__le64			prp1;
+	__le64			prp2;
+	__le64			slba;
+	__le16			length;
+	__le16			control;
+	__le32			dsmgmt;
+	__le32			reftag;
+	__le16			apptag;
+	__le16			appmask;
+};
+
+enum {
+	NVME_RW_LR			= 1 << 15,
+	NVME_RW_FUA			= 1 << 14,
+	NVME_RW_DSM_FREQ_UNSPEC		= 0,
+	NVME_RW_DSM_FREQ_TYPICAL	= 1,
+	NVME_RW_DSM_FREQ_RARE		= 2,
+	NVME_RW_DSM_FREQ_READS		= 3,
+	NVME_RW_DSM_FREQ_WRITES		= 4,
+	NVME_RW_DSM_FREQ_RW		= 5,
+	NVME_RW_DSM_FREQ_ONCE		= 6,
+	NVME_RW_DSM_FREQ_PREFETCH	= 7,
+	NVME_RW_DSM_FREQ_TEMP		= 8,
+	NVME_RW_DSM_LATENCY_NONE	= 0 << 4,
+	NVME_RW_DSM_LATENCY_IDLE	= 1 << 4,
+	NVME_RW_DSM_LATENCY_NORM	= 2 << 4,
+	NVME_RW_DSM_LATENCY_LOW		= 3 << 4,
+	NVME_RW_DSM_SEQ_REQ		= 1 << 6,
+	NVME_RW_DSM_COMPRESSED		= 1 << 7,
+};
+
+/* Admin commands */
+
+enum nvme_admin_opcode {
+	nvme_admin_delete_sq		= 0x00,
+	nvme_admin_create_sq		= 0x01,
+	nvme_admin_get_features		= 0x02,
+	nvme_admin_delete_cq		= 0x04,
+	nvme_admin_create_cq		= 0x05,
+	nvme_admin_identify		= 0x06,
+	nvme_admin_abort_cmd		= 0x08,
+	nvme_admin_set_features		= 0x09,
+	nvme_admin_get_log_page		= 0x0a,
+	nvme_admin_async_event		= 0x0c,
+	nvme_admin_download_fw		= 0x0d,
+	nvme_admin_security_recv	= 0x0e,
+	nvme_admin_format_nvm		= 0x10,
+	nvme_admin_security_send	= 0x11,
+	nvme_admin_activate_fw		= 0x14,
+};
+
+enum {
+	NVME_QUEUE_PHYS_CONTIG	= (1 << 0),
+	NVME_CQ_IRQ_ENABLED	= (1 << 1),
+	NVME_SQ_PRIO_URGENT	= (0 << 1),
+	NVME_SQ_PRIO_HIGH	= (1 << 1),
+	NVME_SQ_PRIO_MEDIUM	= (2 << 1),
+	NVME_SQ_PRIO_LOW	= (3 << 1),
+	NVME_FEAT_ARBITRATION	= 0x01,
+	NVME_FEAT_POWER_MGMT	= 0x02,
+	NVME_FEAT_LBA_RANGE	= 0x03,
+	NVME_FEAT_TEMP_THRESH	= 0x04,
+	NVME_FEAT_ERR_RECOVERY	= 0x05,
+	NVME_FEAT_VOLATILE_WC	= 0x06,
+	NVME_FEAT_NUM_QUEUES	= 0x07,
+	NVME_FEAT_IRQ_COALESCE	= 0x08,
+	NVME_FEAT_IRQ_CONFIG	= 0x09,
+	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
+	NVME_FEAT_ASYNC_EVENT	= 0x0b,
+	NVME_FEAT_SW_PROGRESS	= 0x0c,
+};
+
+struct nvme_identify {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			cns;
+	__u32			rsvd11[5];
+};
+
+struct nvme_features {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			fid;
+	__le32			dword11;
+	__u32			rsvd12[4];
+};
+
+struct nvme_create_cq {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			rsvd1[5];
+	__le64			prp1;
+	__u64			rsvd8;
+	__le16			cqid;
+	__le16			qsize;
+	__le16			cq_flags;
+	__le16			irq_vector;
+	__u32			rsvd12[4];
+};
+
+struct nvme_create_sq {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			rsvd1[5];
+	__le64			prp1;
+	__u64			rsvd8;
+	__le16			sqid;
+	__le16			qsize;
+	__le16			sq_flags;
+	__le16			cqid;
+	__le32			rsvd12[4];
+};
+
+struct nvme_delete_queue {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[9];
+	__le16			qid;
+	__le16			rsvd10;
+	__le32			rsvd11[5];
+};
+
+struct nvme_common_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u32			rsvd2[14];
+};
+
+struct nvme_command {
+	union {
+		struct nvme_common_command common;
+		struct nvme_rw_command rw;
+		struct nvme_identify identify;
+		struct nvme_features features;
+		struct nvme_create_cq create_cq;
+		struct nvme_create_sq create_sq;
+		struct nvme_delete_queue delete_queue;
+	};
+};
+
+/* XXX: Sync with spec */
+enum {
+	NVME_SC_SUCCESS			= 0x0,
+	NVME_SC_INVALID_OPCODE		= 0x1,
+	NVME_SC_INVALID_FIELD		= 0x2,
+	NVME_SC_CMDID_CONFLICT		= 0x3,
+	NVME_SC_DATA_XFER_ERROR		= 0x4,
+	NVME_SC_POWER_LOSS		= 0x5,
+	NVME_SC_INTERNAL		= 0x6,
+	NVME_SC_ABORT_REQ		= 0x7,
+	NVME_SC_ABORT_QUEUE		= 0x8,
+	NVME_SC_FUSED_FAIL		= 0x9,
+	NVME_SC_FUSED_MISSING		= 0xa,
+	NVME_SC_LBA_RANGE		= 0x80,
+	NVME_SC_CAP_EXCEEDED		= 0x81,
+	NVME_SC_NS_NOT_READY		= 0x82,
+	NVME_SC_CQ_INVALID		= 0x100,
+	NVME_SC_QID_INVALID		= 0x101,
+	NVME_SC_QUEUE_SIZE		= 0x102,
+	NVME_SC_WRITE_FAULT		= 0x280,
+	NVME_SC_READ_ERROR		= 0x281,
+};
+
+struct nvme_completion {
+	__le32	result;		/* Used by admin commands to return data */
+	__le32	rsvd;
+	__le16	sq_head;	/* how much of this queue may be reclaimed */
+	__le16	sq_id;		/* submission queue that generated this entry */
+	__u16	command_id;	/* of the command which completed */
+	__le16	status;		/* did the command fail, and if so, why? */
+};
+
+#define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
+#define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
+#define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
+
+#endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 7b4fe9b1cb4b9a6f4ae23a12ef96d08d96e2a5da Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 26 Jan 2011 10:01:21 -0500
Subject: NVMe: Make nvme_common_command more featureful

Add prp1, prp2 and the metadata prp to the common command, since the
fields are generally used this way.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 9ba53584f722..1c0b5ef08959 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -142,6 +142,18 @@ enum nvme_opcode {
 	nvme_cmd_dsm		= 0x09,
 };
 
+struct nvme_common_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	__le64			prp1;
+	__le64			prp2;
+	__u32			rsvd10[6];
+};
+
 struct nvme_rw_command {
 	__u8			opcode;
 	__u8			flags;
@@ -284,14 +296,6 @@ struct nvme_delete_queue {
 	__le32			rsvd11[5];
 };
 
-struct nvme_common_command {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__le32			nsid;
-	__u32			rsvd2[14];
-};
-
 struct nvme_command {
 	union {
 		struct nvme_common_command common;
-- 
cgit v1.2.3


From a53295b6998f62d961c29e54051c1cf1d738c2b3 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 1 Feb 2011 16:13:29 -0500
Subject: NVMe: Add NVME_IOCTL_SUBMIT_IO

Allow userspace to submit synchronous I/O like the SCSI sg interface does.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h | 18 ++++++++++++++++++
 2 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index f44d6cd87ea2..40fb2e1bdfe4 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -780,6 +780,47 @@ static int nvme_get_range_type(struct nvme_ns *ns, unsigned long addr)
 	return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
 }
 
+static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+{
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_queue *nvmeq;
+	struct nvme_user_io io;
+	struct nvme_command c;
+	unsigned length;
+	u32 result;
+	int nents, status;
+	struct scatterlist *sg;
+
+	if (copy_from_user(&io, uio, sizeof(io)))
+		return -EFAULT;
+	length = io.nblocks << io.block_shift;
+	nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length, &sg);
+	if (nents < 0)
+		return nents;
+
+	memset(&c, 0, sizeof(c));
+	c.rw.opcode = io.opcode;
+	c.rw.flags = io.flags;
+	c.rw.nsid = cpu_to_le32(io.nsid);
+	c.rw.slba = cpu_to_le64(io.slba);
+	c.rw.length = cpu_to_le16(io.nblocks - 1);
+	c.rw.control = cpu_to_le16(io.control);
+	c.rw.dsmgmt = cpu_to_le16(io.dsmgmt);
+	c.rw.reftag = cpu_to_le32(io.reftag);	/* XXX: endian? */
+	c.rw.apptag = cpu_to_le16(io.apptag);
+	c.rw.appmask = cpu_to_le16(io.appmask);
+	/* XXX: metadata */
+	nvme_setup_prps(&c.common, sg, length);
+
+	nvmeq = get_nvmeq(ns);
+	status = nvme_submit_sync_cmd(nvmeq, &c, &result);
+	put_nvmeq(nvmeq);
+
+	nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg, nents);
+	put_user(result, &uio->result);
+	return status;
+}
+
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 							unsigned long arg)
 {
@@ -792,6 +833,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 		return nvme_identify(ns, arg, 1);
 	case NVME_IOCTL_GET_RANGE_TYPE:
 		return nvme_get_range_type(ns, arg);
+	case NVME_IOCTL_SUBMIT_IO:
+		return nvme_submit_io(ns, (void __user *)arg);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 1c0b5ef08959..0aaecb059d14 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -340,8 +340,26 @@ struct nvme_completion {
 	__le16	status;		/* did the command fail, and if so, why? */
 };
 
+struct nvme_user_io {
+	__u8	opcode;
+	__u8	flags;
+	__u16	control;
+	__u32	nsid;
+	__u64	metadata;
+	__u64	addr;
+	__u64	slba;
+	__u16	nblocks;
+	__u16	block_shift;
+	__u32	dsmgmt;
+	__u32	reftag;
+	__u16	apptag;
+	__u16	appmask;
+	__u32	result;
+};
+
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
+#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_rw_command)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 7a63e07b9a98b77dd075e06b93c1d8dc871ddad5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 3 Feb 2011 09:20:57 -0500
Subject: NVMe: Add remaining status codes

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0aaecb059d14..dbbdc126401b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -321,14 +321,29 @@ enum {
 	NVME_SC_ABORT_QUEUE		= 0x8,
 	NVME_SC_FUSED_FAIL		= 0x9,
 	NVME_SC_FUSED_MISSING		= 0xa,
+	NVME_SC_INVALID_NS		= 0xb,
 	NVME_SC_LBA_RANGE		= 0x80,
 	NVME_SC_CAP_EXCEEDED		= 0x81,
 	NVME_SC_NS_NOT_READY		= 0x82,
 	NVME_SC_CQ_INVALID		= 0x100,
 	NVME_SC_QID_INVALID		= 0x101,
 	NVME_SC_QUEUE_SIZE		= 0x102,
+	NVME_SC_ABORT_LIMIT		= 0x103,
+	NVME_SC_ABORT_MISSING		= 0x104,
+	NVME_SC_ASYNC_LIMIT		= 0x105,
+	NVME_SC_FIRMWARE_SLOT		= 0x106,
+	NVME_SC_FIRMWARE_IMAGE		= 0x107,
+	NVME_SC_INVALID_VECTOR		= 0x108,
+	NVME_SC_INVALID_LOG_PAGE	= 0x109,
+	NVME_SC_INVALID_FORMAT		= 0x10a,
+	NVME_SC_BAD_ATTRIBUTES		= 0x180,
 	NVME_SC_WRITE_FAULT		= 0x280,
 	NVME_SC_READ_ERROR		= 0x281,
+	NVME_SC_GUARD_CHECK		= 0x282,
+	NVME_SC_APPTAG_CHECK		= 0x283,
+	NVME_SC_REFTAG_CHECK		= 0x284,
+	NVME_SC_COMPARE_FAILED		= 0x285,
+	NVME_SC_ACCESS_DENIED		= 0x286,
 };
 
 struct nvme_completion {
-- 
cgit v1.2.3


From 6ee44cdced04a53dc4f27eb97067e6cd33784726 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 3 Feb 2011 10:58:26 -0500
Subject: NVMe: Add download / activate firmware ioctls

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h | 33 +++++++++++++++++++++++++++------
 2 files changed, 72 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 744db3877c42..7cdf7f69cdcd 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -829,6 +829,47 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	return status;
 }
 
+static int nvme_download_firmware(struct nvme_ns *ns,
+						struct nvme_dlfw __user *udlfw)
+{
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_dlfw dlfw;
+	struct nvme_command c;
+	int nents, status;
+	struct scatterlist *sg;
+
+	if (copy_from_user(&dlfw, udlfw, sizeof(dlfw)))
+		return -EFAULT;
+	if (dlfw.length >= (1 << 30))
+		return -EINVAL;
+
+	nents = nvme_map_user_pages(dev, 1, dlfw.addr, dlfw.length * 4, &sg);
+	if (nents < 0)
+		return nents;
+
+	memset(&c, 0, sizeof(c));
+	c.dlfw.opcode = nvme_admin_download_fw;
+	c.dlfw.numd = cpu_to_le32(dlfw.length);
+	c.dlfw.offset = cpu_to_le32(dlfw.offset);
+	nvme_setup_prps(&c.common, sg, dlfw.length * 4);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	nvme_unmap_user_pages(dev, 0, dlfw.addr, dlfw.length * 4, sg, nents);
+	return status;
+}
+
+static int nvme_activate_firmware(struct nvme_ns *ns, unsigned long arg)
+{
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_activate_fw;
+	c.common.rsvd10[0] = cpu_to_le32(arg);
+
+	return nvme_submit_admin_cmd(dev, &c, NULL);
+}
+
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 							unsigned long arg)
 {
@@ -843,6 +884,10 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 		return nvme_get_range_type(ns, arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
+	case NVME_IOCTL_DOWNLOAD_FW:
+		return nvme_download_firmware(ns, (void __user *)arg);
+	case NVME_IOCTL_ACTIVATE_FW:
+		return nvme_activate_firmware(ns, arg);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index dbbdc126401b..8eed0e432eef 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -262,7 +262,7 @@ struct nvme_create_cq {
 	__u8			opcode;
 	__u8			flags;
 	__u16			command_id;
-	__le32			rsvd1[5];
+	__u32			rsvd1[5];
 	__le64			prp1;
 	__u64			rsvd8;
 	__le16			cqid;
@@ -276,14 +276,14 @@ struct nvme_create_sq {
 	__u8			opcode;
 	__u8			flags;
 	__u16			command_id;
-	__le32			rsvd1[5];
+	__u32			rsvd1[5];
 	__le64			prp1;
 	__u64			rsvd8;
 	__le16			sqid;
 	__le16			qsize;
 	__le16			sq_flags;
 	__le16			cqid;
-	__le32			rsvd12[4];
+	__u32			rsvd12[4];
 };
 
 struct nvme_delete_queue {
@@ -292,8 +292,20 @@ struct nvme_delete_queue {
 	__u16			command_id;
 	__u32			rsvd1[9];
 	__le16			qid;
-	__le16			rsvd10;
-	__le32			rsvd11[5];
+	__u16			rsvd10;
+	__u32			rsvd11[5];
+};
+
+struct nvme_download_firmware {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[5];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			numd;
+	__le32			offset;
+	__u32			rsvd12[4];
 };
 
 struct nvme_command {
@@ -305,6 +317,7 @@ struct nvme_command {
 		struct nvme_create_cq create_cq;
 		struct nvme_create_sq create_sq;
 		struct nvme_delete_queue delete_queue;
+		struct nvme_download_firmware dlfw;
 	};
 };
 
@@ -348,7 +361,7 @@ enum {
 
 struct nvme_completion {
 	__le32	result;		/* Used by admin commands to return data */
-	__le32	rsvd;
+	__u32	rsvd;
 	__le16	sq_head;	/* how much of this queue may be reclaimed */
 	__le16	sq_id;		/* submission queue that generated this entry */
 	__u16	command_id;	/* of the command which completed */
@@ -372,9 +385,17 @@ struct nvme_user_io {
 	__u32	result;
 };
 
+struct nvme_dlfw {
+	__u64	addr;
+	__u32	length;	/* In dwords */
+	__u32	offset;	/* In dwords */
+};
+
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
 #define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_rw_command)
+#define NVME_IOCTL_DOWNLOAD_FW	_IOR('N', 0x44, struct nvme_dlfw)
+#define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 897cfe1ce7db152fa6dde576f4213a6160bf6502 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Mon, 14 Feb 2011 12:20:15 -0500
Subject: NVMe: Update BAR structure to match the current spec

Add two reserved registers in the middle of the BAR to match the 1.0
spec plus ECN 0002.

Also rename IMC and ISC to INTMC and INTSC to conform with the spec.
We still don't need to use them :-)

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8eed0e432eef..757faa71666e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -24,10 +24,12 @@
 struct nvme_bar {
 	__u64			cap;	/* Controller Capabilities */
 	__u32			vs;	/* Version */
-	__u32			ims;	/* Interrupt Mask Set */
-	__u32			imc;	/* Interrupt Mask Clear */
+	__u32			intms;	/* Interrupt Mask Set */
+	__u32			intmc;	/* Interrupt Mask Clear */
 	__u32			cc;	/* Controller Configuration */
+	__u32			rsvd1;	/* Reserved */
 	__u32			csts;	/* Controller Status */
+	__u32			rsvd2;	/* Reserved */
 	__u32			aqa;	/* Admin Queue Attributes */
 	__u64			asq;	/* Admin SQ Base Address */
 	__u64			acq;	/* Admin CQ Base Address */
-- 
cgit v1.2.3


From 2ddc4f74d8adcf3e1cdec7f3e72d19b5c878597c Mon Sep 17 00:00:00 2001
From: Krzysztof Wierzbicki <krzysztof.wierzbicki@intel.com>
Date: Mon, 28 Feb 2011 08:27:13 +0100
Subject: NVMe: Update admin opcodes to match the 1.0RC spec

Signed-off-by: Krzysztof Wierzbicki <krzysztof.wierzbicki@intel.com>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 757faa71666e..c46a9b7988fb 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -199,19 +199,19 @@ enum {
 enum nvme_admin_opcode {
 	nvme_admin_delete_sq		= 0x00,
 	nvme_admin_create_sq		= 0x01,
-	nvme_admin_get_features		= 0x02,
+	nvme_admin_get_log_page		= 0x02,
 	nvme_admin_delete_cq		= 0x04,
 	nvme_admin_create_cq		= 0x05,
 	nvme_admin_identify		= 0x06,
 	nvme_admin_abort_cmd		= 0x08,
 	nvme_admin_set_features		= 0x09,
-	nvme_admin_get_log_page		= 0x0a,
+	nvme_admin_get_features		= 0x0a,
 	nvme_admin_async_event		= 0x0c,
-	nvme_admin_download_fw		= 0x0d,
-	nvme_admin_security_recv	= 0x0e,
-	nvme_admin_format_nvm		= 0x10,
-	nvme_admin_security_send	= 0x11,
-	nvme_admin_activate_fw		= 0x14,
+	nvme_admin_activate_fw		= 0x10,
+	nvme_admin_download_fw		= 0x11,
+	nvme_admin_format_nvm		= 0x80,
+	nvme_admin_security_send	= 0x81,
+	nvme_admin_security_recv	= 0x82,
 };
 
 enum {
-- 
cgit v1.2.3


From 19e899b2f9f89f4a290dd5c9c24d15987a18ab21 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 16 Mar 2011 16:29:24 -0400
Subject: NVMe: Remove outdated comments

The head can never overrun the tail since we won't allocate enough command
IDs to let that happen.  The status codes are in sync with the spec.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 1 -
 include/linux/nvme.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 740a9c1b81aa..d4f95eb51dc1 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -245,7 +245,6 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
 {
 	unsigned long flags;
 	u16 tail;
-	/* XXX: Need to check tail isn't going to overrun head */
 	spin_lock_irqsave(&nvmeq->q_lock, flags);
 	tail = nvmeq->sq_tail;
 	memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c46a9b7988fb..6b5a8d19daf5 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -323,7 +323,6 @@ struct nvme_command {
 	};
 };
 
-/* XXX: Sync with spec */
 enum {
 	NVME_SC_SUCCESS			= 0x0,
 	NVME_SC_INVALID_OPCODE		= 0x1,
-- 
cgit v1.2.3


From 9d4af1b7796ba02b73a79a8694399e5a3cd1c55d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Sun, 20 Mar 2011 07:27:10 -0400
Subject: NVMe: Correct the definitions of two ioctls

NVME_IOCTL_SUBMIT_IO has a struct nvme_user_io, not a struct nvme_rw_command
as a parameter, and NVME_IOCTL_DOWNLOAD_FW is a Write, not a Read.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6b5a8d19daf5..fd10d597cca7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -395,8 +395,8 @@ struct nvme_dlfw {
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
-#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_rw_command)
-#define NVME_IOCTL_DOWNLOAD_FW	_IOR('N', 0x44, struct nvme_dlfw)
+#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_user_io)
+#define NVME_IOCTL_DOWNLOAD_FW	_IOW('N', 0x44, struct nvme_dlfw)
 #define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 6c7d49455ceb63064f992347d9185ff5bf43497a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Mon, 21 Mar 2011 09:48:57 -0400
Subject: NVMe: Change the definition of nvme_user_io

The read and write commands don't define a 'result', so there's no need
to copy it back to userspace.

Remove the ability of the ioctl to submit commands to a different
namespace; it's just asking for trouble, and the use case I have in mind
will be addressed througha  different ioctl in the future.  That removes
the need for both the block_shift and nsid arguments.

Check that the opcode is one of 'read' or 'write'.  Future opcodes may
be added in the future, but we will need a different structure definition
for them.

The nblocks field is redefined to be 0-based.  This allows the user to
request the full 65536 blocks.

Don't byteswap the reftag, apptag and appmask.  Martin Petersen tells
me these are calculated in big-endian and are transmitted to the device
in big-endian.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 27 +++++++++++++++++----------
 include/linux/nvme.h |  8 +++-----
 2 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index d0b52622e261..90a96ec8a596 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -1035,29 +1035,37 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	struct nvme_user_io io;
 	struct nvme_command c;
 	unsigned length;
-	u32 result;
 	int nents, status;
 	struct scatterlist *sg;
 	struct nvme_prps *prps;
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
-	length = io.nblocks << io.block_shift;
-	nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length, &sg);
+	length = (io.nblocks + 1) << ns->lba_shift;
+
+	switch (io.opcode) {
+	case nvme_cmd_write:
+	case nvme_cmd_read:
+		nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr,
+								length, &sg);
+	default:
+		return -EFAULT;
+	}
+
 	if (nents < 0)
 		return nents;
 
 	memset(&c, 0, sizeof(c));
 	c.rw.opcode = io.opcode;
 	c.rw.flags = io.flags;
-	c.rw.nsid = cpu_to_le32(io.nsid);
+	c.rw.nsid = cpu_to_le32(ns->ns_id);
 	c.rw.slba = cpu_to_le64(io.slba);
-	c.rw.length = cpu_to_le16(io.nblocks - 1);
+	c.rw.length = cpu_to_le16(io.nblocks);
 	c.rw.control = cpu_to_le16(io.control);
 	c.rw.dsmgmt = cpu_to_le16(io.dsmgmt);
-	c.rw.reftag = cpu_to_le32(io.reftag);	/* XXX: endian? */
-	c.rw.apptag = cpu_to_le16(io.apptag);
-	c.rw.appmask = cpu_to_le16(io.appmask);
+	c.rw.reftag = io.reftag;
+	c.rw.apptag = io.apptag;
+	c.rw.appmask = io.appmask;
 	/* XXX: metadata */
 	prps = nvme_setup_prps(dev, &c.common, sg, length);
 
@@ -1069,11 +1077,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	 * additional races since q_lock already protects against other CPUs.
 	 */
 	put_nvmeq(nvmeq);
-	status = nvme_submit_sync_cmd(nvmeq, &c, &result, IO_TIMEOUT);
+	status = nvme_submit_sync_cmd(nvmeq, &c, NULL, IO_TIMEOUT);
 
 	nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg, nents);
 	nvme_free_prps(dev, prps);
-	put_user(result, &uio->result);
 	return status;
 }
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index fd10d597cca7..347ad5f9a721 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -373,17 +373,15 @@ struct nvme_user_io {
 	__u8	opcode;
 	__u8	flags;
 	__u16	control;
-	__u32	nsid;
+	__u16	nblocks;
+	__u16	rsvd;
 	__u64	metadata;
 	__u64	addr;
 	__u64	slba;
-	__u16	nblocks;
-	__u16	block_shift;
 	__u32	dsmgmt;
 	__u32	reftag;
 	__u16	apptag;
 	__u16	appmask;
-	__u32	result;
 };
 
 struct nvme_dlfw {
@@ -395,7 +393,7 @@ struct nvme_dlfw {
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
-#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_user_io)
+#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x43, struct nvme_user_io)
 #define NVME_IOCTL_DOWNLOAD_FW	_IOW('N', 0x44, struct nvme_dlfw)
 #define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
 
-- 
cgit v1.2.3


From 7f53f9d2424533256ae86f7df5661a17de743de8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 22 Mar 2011 15:55:45 -0400
Subject: NVMe: Correct the Controller Configuration settings

The arbitration field was extended by one bit, shifting the shutdown
notification bits by one.  Also, the SQ/CQ entry size was made
configurable for future extensions.

Reported-by: Paul Luse <paul.e.luse@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c |  1 +
 include/linux/nvme.h | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index d3eeca5a3c4c..014a7f6e39bc 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -905,6 +905,7 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 	dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
 	dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
 	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
 	writel(0, &dev->bar->cc);
 	writel(aqa, &dev->bar->aqa);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 347ad5f9a721..9d6febb91521 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -41,10 +41,12 @@ enum {
 	NVME_CC_MPS_SHIFT	= 7,
 	NVME_CC_ARB_RR		= 0 << 11,
 	NVME_CC_ARB_WRRU	= 1 << 11,
-	NVME_CC_ARB_VS		= 3 << 11,
-	NVME_CC_SHN_NONE	= 0 << 13,
-	NVME_CC_SHN_NORMAL	= 1 << 13,
-	NVME_CC_SHN_ABRUPT	= 2 << 13,
+	NVME_CC_ARB_VS		= 7 << 11,
+	NVME_CC_SHN_NONE	= 0 << 14,
+	NVME_CC_SHN_NORMAL	= 1 << 14,
+	NVME_CC_SHN_ABRUPT	= 2 << 14,
+	NVME_CC_IOSQES		= 6 << 16,
+	NVME_CC_IOCQES		= 4 << 20,
 	NVME_CSTS_RDY		= 1 << 0,
 	NVME_CSTS_CFS		= 1 << 1,
 	NVME_CSTS_SHST_NORMAL	= 0 << 2,
-- 
cgit v1.2.3


From 22605f96810d073eb74051d0295b6577d6a6a563 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 19 Apr 2011 15:04:20 -0400
Subject: NVMe: Time out initialisation after a few seconds

THe device reports (in its capability register) how long it will take
to initialise.  If that time elapses before the ready bit becomes set,
conclude the device is broken and refuse to initialise it.  Log a nice
error message so the user knows why we did nothing.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 10 ++++++++++
 include/linux/nvme.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index bcc780ac4ec0..57f2b33a47dd 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -893,6 +893,8 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
 	u32 aqa;
+	u64 cap;
+	unsigned long timeout;
 	struct nvme_queue *nvmeq;
 
 	dev->dbs = ((void __iomem *)dev->bar) + 4096;
@@ -915,10 +917,18 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 	writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
 	writel(dev->ctrl_config, &dev->bar->cc);
 
+	cap = readq(&dev->bar->cap);
+	timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+
 	while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
 		msleep(100);
 		if (fatal_signal_pending(current))
 			return -EINTR;
+		if (time_after(jiffies, timeout)) {
+			dev_err(&dev->pci_dev->dev,
+				"Device not ready; aborting initialisation\n");
+			return -ENODEV;
+		}
 	}
 
 	result = queue_request_irq(dev, nvmeq, "nvme admin");
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 9d6febb91521..a19304fefa7d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -35,6 +35,8 @@ struct nvme_bar {
 	__u64			acq;	/* Admin CQ Base Address */
 };
 
+#define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
+
 enum {
 	NVME_CC_ENABLE		= 1 << 0,
 	NVME_CC_CSS_NVM		= 0 << 4,
-- 
cgit v1.2.3


From 6bbf1acddeed0bfb345a5578f9fcada16f1e514f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 20 May 2011 13:03:42 -0400
Subject: NVMe: Rework ioctls

Remove the special-purpose IDENTIFY, GET_RANGE_TYPE, DOWNLOAD_FIRMWARE
and ACTIVATE_FIRMWARE commands.  Replace them with a generic ADMIN_CMD
ioctl that can submit any admin command.

Add a new ID ioctl that returns the namespace ID of the queried device.
It corresponds to the SCSI Idlun ioctl.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 128 ++++++++++++++++-----------------------------------
 include/linux/nvme.h |  34 +++++++++-----
 2 files changed, 63 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index f5e51a6116e3..9e3c724b95c3 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -1033,51 +1033,6 @@ static void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 		put_page(sg_page(&sg[i]));
 }
 
-static int nvme_submit_user_admin_command(struct nvme_dev *dev,
-					unsigned long addr, unsigned length,
-					struct nvme_command *cmd)
-{
-	int err, nents, tmplen = length;
-	struct scatterlist *sg;
-	struct nvme_prps *prps;
-
-	nents = nvme_map_user_pages(dev, 0, addr, length, &sg);
-	if (nents < 0)
-		return nents;
-	prps = nvme_setup_prps(dev, &cmd->common, sg, &tmplen, GFP_KERNEL);
-	if (tmplen != length)
-		err = -ENOMEM;
-	else
-		err = nvme_submit_admin_cmd(dev, cmd, NULL);
-	nvme_unmap_user_pages(dev, 0, addr, length, sg, nents);
-	nvme_free_prps(dev, prps);
-	return err ? -EIO : 0;
-}
-
-static int nvme_identify(struct nvme_ns *ns, unsigned long addr, int cns)
-{
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.identify.opcode = nvme_admin_identify;
-	c.identify.nsid = cns ? 0 : cpu_to_le32(ns->ns_id);
-	c.identify.cns = cpu_to_le32(cns);
-
-	return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
-}
-
-static int nvme_get_range_type(struct nvme_ns *ns, unsigned long addr)
-{
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.features.opcode = nvme_admin_get_features;
-	c.features.nsid = cpu_to_le32(ns->ns_id);
-	c.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
-
-	return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
-}
-
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
 	struct nvme_dev *dev = ns->dev;
@@ -1096,10 +1051,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	switch (io.opcode) {
 	case nvme_cmd_write:
 	case nvme_cmd_read:
+	case nvme_cmd_compare:
 		nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr,
 								length, &sg);
 	default:
-		return -EFAULT;
+		return -EINVAL;
 	}
 
 	if (nents < 0)
@@ -1137,70 +1093,66 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	return status;
 }
 
-static int nvme_download_firmware(struct nvme_ns *ns,
-						struct nvme_dlfw __user *udlfw)
+static int nvme_user_admin_cmd(struct nvme_ns *ns,
+					struct nvme_admin_cmd __user *ucmd)
 {
 	struct nvme_dev *dev = ns->dev;
-	struct nvme_dlfw dlfw;
+	struct nvme_admin_cmd cmd;
 	struct nvme_command c;
-	int nents, status, length;
+	int status, length, nents = 0;
 	struct scatterlist *sg;
-	struct nvme_prps *prps;
+	struct nvme_prps *prps = NULL;
 
-	if (copy_from_user(&dlfw, udlfw, sizeof(dlfw)))
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
-	if (dlfw.length >= (1 << 30))
-		return -EINVAL;
-	length = dlfw.length * 4;
-
-	nents = nvme_map_user_pages(dev, 1, dlfw.addr, length, &sg);
-	if (nents < 0)
-		return nents;
 
 	memset(&c, 0, sizeof(c));
-	c.dlfw.opcode = nvme_admin_download_fw;
-	c.dlfw.numd = cpu_to_le32(dlfw.length);
-	c.dlfw.offset = cpu_to_le32(dlfw.offset);
-	prps = nvme_setup_prps(dev, &c.common, sg, &length, GFP_KERNEL);
-	if (length != dlfw.length * 4)
+	c.common.opcode = cmd.opcode;
+	c.common.flags = cmd.flags;
+	c.common.nsid = cpu_to_le32(cmd.nsid);
+	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+	c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
+	c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
+	c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
+	c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
+	c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
+	c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
+
+	length = cmd.data_len;
+	if (cmd.data_len) {
+		nents = nvme_map_user_pages(dev, 1, cmd.addr, length, &sg);
+		if (nents < 0)
+			return nents;
+		prps = nvme_setup_prps(dev, &c.common, sg, &length, GFP_KERNEL);
+	}
+
+	if (length != cmd.data_len)
 		status = -ENOMEM;
 	else
 		status = nvme_submit_admin_cmd(dev, &c, NULL);
-	nvme_unmap_user_pages(dev, 0, dlfw.addr, dlfw.length * 4, sg, nents);
-	nvme_free_prps(dev, prps);
+	if (cmd.data_len) {
+		nvme_unmap_user_pages(dev, 0, cmd.addr, cmd.data_len, sg,
+									nents);
+		nvme_free_prps(dev, prps);
+	}
 	return status;
 }
 
-static int nvme_activate_firmware(struct nvme_ns *ns, unsigned long arg)
-{
-	struct nvme_dev *dev = ns->dev;
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_activate_fw;
-	c.common.rsvd10[0] = cpu_to_le32(arg);
-
-	return nvme_submit_admin_cmd(dev, &c, NULL);
-}
-
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 							unsigned long arg)
 {
 	struct nvme_ns *ns = bdev->bd_disk->private_data;
 
 	switch (cmd) {
-	case NVME_IOCTL_IDENTIFY_NS:
-		return nvme_identify(ns, arg, 0);
-	case NVME_IOCTL_IDENTIFY_CTRL:
-		return nvme_identify(ns, arg, 1);
-	case NVME_IOCTL_GET_RANGE_TYPE:
-		return nvme_get_range_type(ns, arg);
+	case NVME_IOCTL_ID:
+		return ns->ns_id;
+	case NVME_IOCTL_ADMIN_CMD:
+		return nvme_user_admin_cmd(ns, (void __user *)arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
-	case NVME_IOCTL_DOWNLOAD_FW:
-		return nvme_download_firmware(ns, (void __user *)arg);
-	case NVME_IOCTL_ACTIVATE_FW:
-		return nvme_activate_firmware(ns, arg);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a19304fefa7d..c96ab0f5ef6f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -153,11 +153,11 @@ struct nvme_common_command {
 	__u8			flags;
 	__u16			command_id;
 	__le32			nsid;
-	__u64			rsvd2;
+	__u32			cdw2[2];
 	__le64			metadata;
 	__le64			prp1;
 	__le64			prp2;
-	__u32			rsvd10[6];
+	__u32			cdw10[6];
 };
 
 struct nvme_rw_command {
@@ -388,17 +388,29 @@ struct nvme_user_io {
 	__u16	appmask;
 };
 
-struct nvme_dlfw {
+struct nvme_admin_cmd {
+	__u8	opcode;
+	__u8	flags;
+	__u16	rsvd1;
+	__u32	nsid;
+	__u32	cdw2;
+	__u32	cdw3;
+	__u64	metadata;
 	__u64	addr;
-	__u32	length;	/* In dwords */
-	__u32	offset;	/* In dwords */
+	__u32	metadata_len;
+	__u32	data_len;
+	__u32	cdw10;
+	__u32	cdw11;
+	__u32	cdw12;
+	__u32	cdw13;
+	__u32	cdw14;
+	__u32	cdw15;
+	__u32	timeout_ms;
+	__u32	result;
 };
 
-#define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
-#define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
-#define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
-#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x43, struct nvme_user_io)
-#define NVME_IOCTL_DOWNLOAD_FW	_IOW('N', 0x44, struct nvme_dlfw)
-#define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
+#define NVME_IOCTL_ID		_IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x42, struct nvme_user_io)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From f1938f6e1ee1583c87ec74dc406fdd8694e99ac8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 20 Oct 2011 17:00:41 -0400
Subject: NVMe: Implement doorbell stride capability

The doorbell stride allows devices to spread out their doorbells instead
of packing them tightly.  This feature was added as part of ECN 003.

This patch also enables support for more than 512 queues :-)

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 17 ++++++++++++++---
 include/linux/nvme.h |  1 +
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index cfe5932821d8..a17f80fa3881 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -70,6 +70,7 @@ struct nvme_dev {
 	struct dma_pool *prp_small_pool;
 	int instance;
 	int queue_count;
+	int db_stride;
 	u32 ctrl_config;
 	struct msix_entry *entry;
 	struct nvme_bar __iomem *bar;
@@ -672,7 +673,7 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
 	if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
 		return IRQ_NONE;
 
-	writel(head, nvmeq->q_db + 1);
+	writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride));
 	nvmeq->cq_head = head;
 	nvmeq->cq_phase = phase;
 
@@ -889,7 +890,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	init_waitqueue_head(&nvmeq->sq_full);
 	init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
 	bio_list_init(&nvmeq->sq_cong);
-	nvmeq->q_db = &dev->dbs[qid * 2];
+	nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
 	nvmeq->q_depth = depth;
 	nvmeq->cq_vector = vector;
 
@@ -981,6 +982,7 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 
 	cap = readq(&dev->bar->cap);
 	timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+	dev->db_stride = NVME_CAP_STRIDE(cap);
 
 	while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
 		msleep(100);
@@ -1357,7 +1359,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
 
 static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
 {
-	int result, cpu, i, nr_io_queues;
+	int result, cpu, i, nr_io_queues, db_bar_size;
 
 	nr_io_queues = num_online_cpus();
 	result = set_queue_count(dev, nr_io_queues);
@@ -1369,6 +1371,15 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
 	/* Deregister the admin queue's interrupt */
 	free_irq(dev->entry[0].vector, dev->queues[0]);
 
+	db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
+	if (db_bar_size > 8192) {
+		iounmap(dev->bar);
+		dev->bar = ioremap(pci_resource_start(dev->pci_dev, 0),
+								db_bar_size);
+		dev->dbs = ((void __iomem *)dev->bar) + 4096;
+		dev->queues[0]->q_db = dev->dbs;
+	}
+
 	for (i = 0; i < nr_io_queues; i++)
 		dev->entry[i].entry = i;
 	for (;;) {
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c96ab0f5ef6f..2a2c535c8345 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -36,6 +36,7 @@ struct nvme_bar {
 };
 
 #define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
+#define NVME_CAP_STRIDE(cap)	(((cap) >> 32) & 0xf)
 
 enum {
 	NVME_CC_ENABLE		= 1 << 0,
-- 
cgit v1.2.3


From 010e646ba2fdfc558048a97da746381c35836280 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 4 Nov 2011 16:24:23 -0400
Subject: NVMe: Update Identify Controller data structure

The driver was still using an old definition of Identify Controller
which only came to light once we started using the 'number of namespaces'
field properly.

Reported-by: Nisheeth Bhat <nisheeth.bhat@intel.com>
Reported-by: Khosrow Panah <Khosrow.Panah@idt.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2a2c535c8345..9490a00529f4 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -57,6 +57,18 @@ enum {
 	NVME_CSTS_SHST_CMPLT	= 2 << 2,
 };
 
+struct nvme_id_power_state {
+	__le16			max_power;	/* centiwatts */
+	__u16			rsvd2;
+	__le32			entry_lat;	/* microseconds */
+	__le32			exit_lat;	/* microseconds */
+	__u8			read_tput;
+	__u8			read_lat;
+	__u8			write_tput;
+	__u8			write_lat;
+	__u8			rsvd16[16];
+};
+
 #define NVME_VS(major, minor)	(major << 16 | minor)
 
 struct nvme_id_ctrl {
@@ -65,9 +77,11 @@ struct nvme_id_ctrl {
 	char			sn[20];
 	char			mn[40];
 	char			fr[8];
-	__le32			nn;
 	__u8			rab;
-	__u8			rsvd77[178];
+	__u8			ieee[3];
+	__u8			mic;
+	__u8			mdts;
+	__u8			rsvd78[178];
 	__le16			oacs;
 	__u8			acl;
 	__u8			aerl;
@@ -76,15 +90,18 @@ struct nvme_id_ctrl {
 	__u8			elpe;
 	__u8			npss;
 	__u8			rsvd264[248];
-	__le64			psd[32];
+	__u8			sqes;
+	__u8			cqes;
+	__u8			rsvd514[2];
+	__le32			nn;
 	__le16			oncs;
 	__le16			fuses;
 	__u8			fna;
 	__u8			vwc;
 	__le16			awun;
 	__le16			awupf;
-	__u8			rsvd778[246];
-	__u8			cmdset[2048];
+	__u8			rsvd530[1518];
+	struct nvme_id_power_state	psd[32];
 	__u8			vs[1024];
 };
 
-- 
cgit v1.2.3


From 11be0b3c18d654a8d5ed441fa9e988193a57c1d2 Mon Sep 17 00:00:00 2001
From: Vsevolod Alekseev <vsevolod.alekseev@gmx.com>
Date: Sat, 5 Nov 2011 02:35:28 -0700
Subject: security.h: fix misc typos/grammar errors in comments

Fix various typos/grammar errors in include/linux/security.h comments (no code changes).

Signed-off-by: Vsevolod Alekseev <vsevolod.alekseev@gmx.com>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/security.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 19d8e04e1688..94c35336b86b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -186,7 +186,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * Security module identifier.
  *
  * @name:
- *	A string that acts as a unique identifeir for the LSM with max number
+ *	A string that acts as a unique identifier for the LSM with max number
  *	of characters = SECURITY_NAME_MAX.
  *
  * Security hooks for program execution operations.
@@ -275,7 +275,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@copy copied data which will be passed to the security module.
  *	Returns 0 if the copy was successful.
  * @sb_remount:
- *	Extracts security system specifc mount options and verifys no changes
+ *	Extracts security system specific mount options and verifies no changes
  *	are being made to those options.
  *	@sb superblock being remounted
  *	@data contains the filesystem-specific data.
@@ -380,15 +380,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Return 0 if permission is granted.
  * @inode_mkdir:
  *	Check permissions to create a new directory in the existing directory
- *	associated with inode strcture @dir.
- *	@dir containst the inode structure of parent of the directory to be created.
+ *	associated with inode structure @dir.
+ *	@dir contains the inode structure of parent of the directory to be created.
  *	@dentry contains the dentry structure of new directory.
  *	@mode contains the mode of new directory.
  *	Return 0 if permission is granted.
  * @path_mkdir:
  *	Check permissions to create a new directory in the existing directory
- *	associated with path strcture @path.
- *	@dir containst the path structure of parent of the directory
+ *	associated with path structure @path.
+ *	@dir contains the path structure of parent of the directory
  *	to be created.
  *	@dentry contains the dentry structure of new directory.
  *	@mode contains the mode of new directory.
@@ -578,7 +578,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@file contains the file structure.
  *	@cmd contains the operation to perform.
  *	@arg contains the operational arguments.
- *	Check permission for an ioctl operation on @file.  Note that @arg can
+ *	Check permission for an ioctl operation on @file.  Note that @arg
  *	sometimes represents a user space pointer; in other cases, it may be a
  *	simple integer value.  When @arg represents a user space pointer, it
  *	should never be used by the security module.
@@ -606,7 +606,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Return 0 if permission is granted.
  * @file_fcntl:
  *	Check permission before allowing the file operation specified by @cmd
- *	from being performed on the file @file.  Note that @arg can sometimes
+ *	from being performed on the file @file.  Note that @arg sometimes
  *	represents a user space pointer; in other cases, it may be a simple
  *	integer value.  When @arg represents a user space pointer, it should
  *	never be used by the security module.
@@ -793,7 +793,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	information can be saved using the eff_cap field of the
  *	netlink_skb_parms structure.  Also may be used to provide fine
  *	grained control over message transmission.
- *	@sk associated sock of task sending the message.,
+ *	@sk associated sock of task sending the message.
  *	@skb contains the sk_buff structure for the netlink message.
  *	Return 0 if the information was successfully saved and message
  *	is allowed to be transmitted.
@@ -1080,9 +1080,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	should free it.
  *	@key points to the key to be queried.
  *	@_buffer points to a pointer that should be set to point to the
- *	 resulting string (if no label or an error occurs).
+ *	resulting string (if no label or an error occurs).
  *	Return the length of the string (including terminating NUL) or -ve if
- *      an error.
+ *	an error.
  *	May also return 0 (and a NULL buffer pointer) if there is no label.
  *
  * Security hooks affecting all System V IPC operations.
@@ -1268,7 +1268,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	credentials.
  *	@tsk contains the task_struct for the process.
  *	@cred contains the credentials to use.
- *      @ns contains the user namespace we want the capability in
+ *	@ns contains the user namespace we want the capability in
  *	@cap contains the capability <include/linux/capability.h>.
  *	@audit: Whether to write an audit message or not
  *	Return 0 if the capability is granted for @tsk.
@@ -1370,7 +1370,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * 	@ctxlen contains the length of @ctx.
  *
  * @inode_getsecctx:
- * 	Returns a string containing all relavent security context information
+ *	Returns a string containing all relevant security context information
  *
  * 	@inode we wish to get the security context of.
  *	@ctx is a pointer in which to place the allocated security context.
-- 
cgit v1.2.3


From f8beab2bb611d735767871e0e1a12dc6a0def7b1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 28 Oct 2011 23:50:49 +0200
Subject: regmap: Add a reusable irq_chip for regmap based interrupt
 controllers

There seem to be lots of regmap-using devices with very similar interrupt
controllers with a small bank of interrupt registers and mask registers
with an interrupt per bit. This won't cover everything but it's a good
start.

Each chip supplies a base for the status registers, a base for the mask
registers, an optional base for writing acknowledgements (which may be the
same as the status registers) and an array of bits within each of these
register banks which indicate the interrupt.

There is an assumption that the bit for each interrupt will be the same
in each of the register bank.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/Kconfig      |   3 +
 drivers/base/regmap/Makefile     |   1 +
 drivers/base/regmap/regmap-irq.c | 284 +++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h           |  47 +++++++
 4 files changed, 335 insertions(+)
 create mode 100644 drivers/base/regmap/regmap-irq.c

(limited to 'include/linux')

diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index 2fc6a66f39a4..0f6c7fb418e8 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -13,3 +13,6 @@ config REGMAP_I2C
 
 config REGMAP_SPI
 	tristate
+
+config REGMAP_IRQ
+	bool
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 0573c8a9dacb..ce2d18a6465b 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_REGMAP) += regmap.o regcache.o regcache-indexed.o regcache-rbtree.o
 obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
 obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
 obj-$(CONFIG_REGMAP_SPI) += regmap-spi.o
+obj-$(CONFIG_REGMAP_IRQ) += regmap-irq.o
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
new file mode 100644
index 000000000000..bd54f63be9ed
--- /dev/null
+++ b/drivers/base/regmap/regmap-irq.c
@@ -0,0 +1,284 @@
+/*
+ * regmap based irq_chip
+ *
+ * Copyright 2011 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/regmap.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+#include "internal.h"
+
+struct regmap_irq_chip_data {
+	struct mutex lock;
+
+	struct regmap *map;
+	struct regmap_irq_chip *chip;
+
+	int irq_base;
+
+	void *status_reg_buf;
+	unsigned int *status_buf;
+	unsigned int *mask_buf;
+	unsigned int *mask_buf_def;
+};
+
+static inline const
+struct regmap_irq *irq_to_regmap_irq(struct regmap_irq_chip_data *data,
+				     int irq)
+{
+	return &data->chip->irqs[irq - data->irq_base];
+}
+
+static void regmap_irq_lock(struct irq_data *data)
+{
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&d->lock);
+}
+
+static void regmap_irq_sync_unlock(struct irq_data *data)
+{
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+	int i, ret;
+
+	/*
+	 * If there's been a change in the mask write it back to the
+	 * hardware.  We rely on the use of the regmap core cache to
+	 * suppress pointless writes.
+	 */
+	for (i = 0; i < d->chip->num_regs; i++) {
+		ret = regmap_update_bits(d->map, d->chip->mask_base + i,
+					 d->mask_buf_def[i], d->mask_buf[i]);
+		if (ret != 0)
+			dev_err(d->map->dev, "Failed to sync masks in %x\n",
+				d->chip->mask_base + i);
+	}
+
+	mutex_unlock(&d->lock);
+}
+
+static void regmap_irq_enable(struct irq_data *data)
+{
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+	const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->irq);
+
+	d->mask_buf[irq_data->reg_offset] &= ~irq_data->mask;
+}
+
+static void regmap_irq_disable(struct irq_data *data)
+{
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+	const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->irq);
+
+	d->mask_buf[irq_data->reg_offset] |= irq_data->mask;
+}
+
+static struct irq_chip regmap_irq_chip = {
+	.name			= "regmap",
+	.irq_bus_lock		= regmap_irq_lock,
+	.irq_bus_sync_unlock	= regmap_irq_sync_unlock,
+	.irq_disable		= regmap_irq_disable,
+	.irq_enable		= regmap_irq_enable,
+};
+
+static irqreturn_t regmap_irq_thread(int irq, void *d)
+{
+	struct regmap_irq_chip_data *data = d;
+	struct regmap_irq_chip *chip = data->chip;
+	struct regmap *map = data->map;
+	int ret, i;
+	u8 *buf8 = data->status_reg_buf;
+	u16 *buf16 = data->status_reg_buf;
+	u32 *buf32 = data->status_reg_buf;
+
+	ret = regmap_bulk_read(map, chip->status_base, data->status_reg_buf,
+			       chip->num_regs);
+	if (ret != 0) {
+		dev_err(map->dev, "Failed to read IRQ status: %d\n", ret);
+		return IRQ_NONE;
+	}
+
+	/*
+	 * Ignore masked IRQs and ack if we need to; we ack early so
+	 * there is no race between handling and acknowleding the
+	 * interrupt.  We assume that typically few of the interrupts
+	 * will fire simultaneously so don't worry about overhead from
+	 * doing a write per register.
+	 */
+	for (i = 0; i < data->chip->num_regs; i++) {
+		switch (map->format.val_bytes) {
+		case 1:
+			data->status_buf[i] = buf8[i];
+			break;
+		case 2:
+			data->status_buf[i] = buf16[i];
+			break;
+		case 4:
+			data->status_buf[i] = buf32[i];
+			break;
+		default:
+			BUG();
+			return IRQ_NONE;
+		}
+
+		data->status_buf[i] &= ~data->mask_buf[i];
+
+		if (data->status_buf[i] && chip->ack_base) {
+			ret = regmap_write(map, chip->ack_base + i,
+					   data->status_buf[i]);
+			if (ret != 0)
+				dev_err(map->dev, "Failed to ack 0x%x: %d\n",
+					chip->ack_base + i, ret);
+		}
+	}
+
+	for (i = 0; i < chip->num_irqs; i++) {
+		if (data->status_buf[chip->irqs[i].reg_offset] &
+		    chip->irqs[i].mask) {
+			handle_nested_irq(data->irq_base + i);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * regmap_add_irq_chip(): Use standard regmap IRQ controller handling
+ *
+ * map:       The regmap for the device.
+ * irq:       The IRQ the device uses to signal interrupts
+ * irq_flags: The IRQF_ flags to use for the primary interrupt.
+ * chip:      Configuration for the interrupt controller.
+ * data:      Runtime data structure for the controller, allocated on success
+ *
+ * Returns 0 on success or an errno on failure.
+ *
+ * In order for this to be efficient the chip really should use a
+ * register cache.  The chip driver is responsible for restoring the
+ * register values used by the IRQ controller over suspend and resume.
+ */
+int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
+			int irq_base, struct regmap_irq_chip *chip,
+			struct regmap_irq_chip_data **data)
+{
+	struct regmap_irq_chip_data *d;
+	int cur_irq, i;
+	int ret = -ENOMEM;
+
+	irq_base = irq_alloc_descs(irq_base, 0, chip->num_irqs, 0);
+	if (irq_base < 0) {
+		dev_warn(map->dev, "Failed to allocate IRQs: %d\n",
+			 irq_base);
+		return irq_base;
+	}
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->status_buf = kzalloc(sizeof(unsigned int) * chip->num_regs,
+				GFP_KERNEL);
+	if (!d->status_buf)
+		goto err_alloc;
+
+	d->status_reg_buf = kzalloc(map->format.val_bytes * chip->num_regs,
+				    GFP_KERNEL);
+	if (!d->status_reg_buf)
+		goto err_alloc;
+
+	d->mask_buf = kzalloc(sizeof(unsigned int) * chip->num_regs,
+			      GFP_KERNEL);
+	if (!d->mask_buf)
+		goto err_alloc;
+
+	d->mask_buf_def = kzalloc(sizeof(unsigned int) * chip->num_regs,
+				  GFP_KERNEL);
+	if (!d->mask_buf_def)
+		goto err_alloc;
+
+	d->map = map;
+	d->chip = chip;
+	d->irq_base = irq_base;
+	mutex_init(&d->lock);
+
+	for (i = 0; i < chip->num_irqs; i++)
+		d->mask_buf_def[chip->irqs[i].reg_offset]
+			|= chip->irqs[i].mask;
+
+	/* Mask all the interrupts by default */
+	for (i = 0; i < chip->num_regs; i++) {
+		d->mask_buf[i] = d->mask_buf_def[i];
+		ret = regmap_write(map, chip->mask_base + i, d->mask_buf[i]);
+		if (ret != 0) {
+			dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
+				chip->mask_base + i, ret);
+			goto err_alloc;
+		}
+	}
+
+	/* Register them with genirq */
+	for (cur_irq = irq_base;
+	     cur_irq < chip->num_irqs + irq_base;
+	     cur_irq++) {
+		irq_set_chip_data(cur_irq, d);
+		irq_set_chip_and_handler(cur_irq, &regmap_irq_chip,
+					 handle_edge_irq);
+		irq_set_nested_thread(cur_irq, 1);
+
+		/* ARM needs us to explicitly flag the IRQ as valid
+		 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+		set_irq_flags(cur_irq, IRQF_VALID);
+#else
+		irq_set_noprobe(cur_irq);
+#endif
+	}
+
+	ret = request_threaded_irq(irq, NULL, regmap_irq_thread, irq_flags,
+				   chip->name, d);
+	if (ret != 0) {
+		dev_err(map->dev, "Failed to request IRQ %d: %d\n", irq, ret);
+		goto err_alloc;
+	}
+
+	return 0;
+
+err_alloc:
+	kfree(d->mask_buf_def);
+	kfree(d->mask_buf);
+	kfree(d->status_reg_buf);
+	kfree(d->status_buf);
+	kfree(d);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_add_irq_chip);
+
+/**
+ * regmap_del_irq_chip(): Stop interrupt handling for a regmap IRQ chip
+ *
+ * @irq: Primary IRQ for the device
+ * @d:   regmap_irq_chip_data allocated by regmap_add_irq_chip()
+ */
+void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
+{
+	if (!d)
+		return;
+
+	free_irq(irq, d);
+	kfree(d->mask_buf_def);
+	kfree(d->mask_buf);
+	kfree(d->status_reg_buf);
+	kfree(d->status_buf);
+	kfree(d);
+}
+EXPORT_SYMBOL_GPL(regmap_del_irq_chip);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 690276a642cf..bd54cecdfdf8 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -144,4 +144,51 @@ int regcache_sync(struct regmap *map);
 void regcache_cache_only(struct regmap *map, bool enable);
 void regcache_cache_bypass(struct regmap *map, bool enable);
 
+/**
+ * Description of an IRQ for the generic regmap irq_chip.
+ *
+ * @reg_offset: Offset of the status/mask register within the bank
+ * @mask:       Mask used to flag/control the register.
+ */
+struct regmap_irq {
+	unsigned int reg_offset;
+	unsigned int mask;
+};
+
+/**
+ * Description of a generic regmap irq_chip.  This is not intended to
+ * handle every possible interrupt controller, but it should handle a
+ * substantial proportion of those that are found in the wild.
+ *
+ * @name:        Descriptive name for IRQ controller.
+ *
+ * @status_base: Base status register address.
+ * @mask_base:   Base mask register address.
+ * @ack_base:    Base ack address.  If zero then the chip is clear on read.
+ *
+ * @num_regs:    Number of registers in each control bank.
+ * @irqs:        Descriptors for individual IRQs.  Interrupt numbers are
+ *               assigned based on the index in the array of the interrupt.
+ * @num_irqs:    Number of descriptors.
+ */
+struct regmap_irq_chip {
+	const char *name;
+
+	unsigned int status_base;
+	unsigned int mask_base;
+	unsigned int ack_base;
+
+	int num_regs;
+
+	const struct regmap_irq *irqs;
+	int num_irqs;
+};
+
+struct regmap_irq_chip_data;
+
+int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
+			int irq_base, struct regmap_irq_chip *chip,
+			struct regmap_irq_chip_data **data);
+void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data);
+
 #endif
-- 
cgit v1.2.3


From 8ae0d7e8a918e9603748abe9b31984fc5d96abb3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 26 Oct 2011 10:34:22 +0200
Subject: regmap: Track if the register cache is dirty and suppress unneeded
 syncs

Allow drivers to optimise out the register cache sync if they didn't need
to do one. If the hardware is desynced from the register cache (by power
loss for example) then the driver should call regcache_mark_dirty() to
let the core know about this.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/internal.h |  1 +
 drivers/base/regmap/regcache.c | 19 +++++++++++++++++++
 drivers/base/regmap/regmap.c   |  4 +++-
 include/linux/regmap.h         |  1 +
 4 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 348ff02eb93e..6483e0bda0cf 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -74,6 +74,7 @@ struct regmap {
 	struct reg_default *reg_defaults;
 	const void *reg_defaults_raw;
 	void *cache;
+	bool cache_dirty;
 };
 
 struct regcache_ops {
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 666f6f5011dc..6ab9f0384d82 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -241,6 +241,8 @@ int regcache_sync(struct regmap *map)
 		map->cache_ops->name);
 	name = map->cache_ops->name;
 	trace_regcache_sync(map->dev, name, "start");
+	if (!map->cache_dirty)
+		goto out;
 	if (map->cache_ops->sync) {
 		ret = map->cache_ops->sync(map);
 	} else {
@@ -290,6 +292,23 @@ void regcache_cache_only(struct regmap *map, bool enable)
 }
 EXPORT_SYMBOL_GPL(regcache_cache_only);
 
+/**
+ * regcache_mark_dirty: Mark the register cache as dirty
+ *
+ * @map: map to mark
+ *
+ * Mark the register cache as dirty, for example due to the device
+ * having been powered down for suspend.  If the cache is not marked
+ * as dirty then the cache sync will be suppressed.
+ */
+void regcache_mark_dirty(struct regmap *map)
+{
+	mutex_lock(&map->lock);
+	map->cache_dirty = true;
+	mutex_unlock(&map->lock);
+}
+EXPORT_SYMBOL_GPL(regcache_mark_dirty);
+
 /**
  * regcache_cache_bypass: Put a register map into cache bypass mode
  *
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index bf441db1ee90..3aca18dbf367 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -306,8 +306,10 @@ int _regmap_write(struct regmap *map, unsigned int reg,
 		ret = regcache_write(map, reg, val);
 		if (ret != 0)
 			return ret;
-		if (map->cache_only)
+		if (map->cache_only) {
+			map->cache_dirty = true;
 			return 0;
+		}
 	}
 
 	trace_regmap_reg_write(map->dev, reg, val);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 690276a642cf..32043a9749e6 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -143,5 +143,6 @@ int regmap_update_bits(struct regmap *map, unsigned int reg,
 int regcache_sync(struct regmap *map);
 void regcache_cache_only(struct regmap *map, bool enable);
 void regcache_cache_bypass(struct regmap *map, bool enable);
+void regcache_mark_dirty(struct regmap *map);
 
 #endif
-- 
cgit v1.2.3


From 50b776fc71c13663eb7434f634f2b796de5c9885 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 2 Nov 2011 15:00:03 +0000
Subject: regmap: Rename LZO cache type to compressed

Users probably don't care about the specific compression algorithm and
we might want to use a different algorithm (snappy being the one I'm
thinking of right now) so update the public interface to have a more
generic name.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regcache-lzo.c | 2 +-
 include/linux/regmap.h             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regcache-lzo.c b/drivers/base/regmap/regcache-lzo.c
index 066aeece3626..854448d09293 100644
--- a/drivers/base/regmap/regcache-lzo.c
+++ b/drivers/base/regmap/regcache-lzo.c
@@ -351,7 +351,7 @@ static int regcache_lzo_sync(struct regmap *map)
 }
 
 struct regcache_ops regcache_lzo_ops = {
-	.type = REGCACHE_LZO,
+	.type = REGCACHE_COMPRESSED,
 	.name = "lzo",
 	.init = regcache_lzo_init,
 	.exit = regcache_lzo_exit,
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 32043a9749e6..bebda1481f23 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -25,7 +25,7 @@ enum regcache_type {
 	REGCACHE_NONE,
 	REGCACHE_INDEXED,
 	REGCACHE_RBTREE,
-	REGCACHE_LZO
+	REGCACHE_COMPRESSED
 };
 
 /**
-- 
cgit v1.2.3


From 5ce3e312ec5c11abce13215be70700778bf601f0 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Wed, 31 Aug 2011 14:05:16 +0300
Subject: crypto: GnuPG based MPI lib - header files (part 2)

Adds the multi-precision-integer maths library which was originally taken
from GnuPG and ported to the kernel by (among others) David Howells.
This version is taken from Fedora kernel 2.6.32-71.14.1.el6.
The difference is that checkpatch reported errors and warnings have been fixed.

This library is used to implemenet RSA digital signature verification
used in IMA/EVM integrity protection subsystem.

Due to patch size limitation, the patch is divided into 4 parts.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
---
 include/linux/mpi.h    |  146 +++++
 lib/mpi/longlong.h     | 1478 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/mpi/mpi-inline.h   |  122 ++++
 lib/mpi/mpi-internal.h |  261 +++++++++
 4 files changed, 2007 insertions(+)
 create mode 100644 include/linux/mpi.h
 create mode 100644 lib/mpi/longlong.h
 create mode 100644 lib/mpi/mpi-inline.h
 create mode 100644 lib/mpi/mpi-internal.h

(limited to 'include/linux')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
new file mode 100644
index 000000000000..06f88994ccaa
--- /dev/null
+++ b/include/linux/mpi.h
@@ -0,0 +1,146 @@
+/* mpi.h  -  Multi Precision Integers
+ *	Copyright (C) 1994, 1996, 1998, 1999,
+ *                    2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_H
+#define G10_MPI_H
+
+#include <linux/types.h>
+
+/* DSI defines */
+
+#define SHA1_DIGEST_LENGTH   20
+
+/*end of DSI defines */
+
+#define BYTES_PER_MPI_LIMB	(BITS_PER_LONG / 8)
+#define BITS_PER_MPI_LIMB	BITS_PER_LONG
+
+typedef unsigned long int mpi_limb_t;
+typedef signed long int mpi_limb_signed_t;
+
+struct gcry_mpi {
+	int alloced;		/* array size (# of allocated limbs) */
+	int nlimbs;		/* number of valid limbs */
+	int nbits;		/* the real number of valid bits (info only) */
+	int sign;		/* indicates a negative number */
+	unsigned flags;		/* bit 0: array must be allocated in secure memory space */
+	/* bit 1: not used */
+	/* bit 2: the limb is a pointer to some m_alloced data */
+	mpi_limb_t *d;		/* array with the limbs */
+};
+
+typedef struct gcry_mpi *MPI;
+
+#define MPI_NULL NULL
+
+#define mpi_get_nlimbs(a)     ((a)->nlimbs)
+#define mpi_is_neg(a)	      ((a)->sign)
+
+/*-- mpiutil.c --*/
+MPI mpi_alloc(unsigned nlimbs);
+MPI mpi_alloc_secure(unsigned nlimbs);
+MPI mpi_alloc_like(MPI a);
+void mpi_free(MPI a);
+int mpi_resize(MPI a, unsigned nlimbs);
+int mpi_copy(MPI *copy, const MPI a);
+void mpi_clear(MPI a);
+int mpi_set(MPI w, MPI u);
+int mpi_set_ui(MPI w, ulong u);
+MPI mpi_alloc_set_ui(unsigned long u);
+void mpi_m_check(MPI a);
+void mpi_swap(MPI a, MPI b);
+
+/*-- mpicoder.c --*/
+MPI do_encode_md(const void *sha_buffer, unsigned nbits);
+MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread);
+int mpi_fromstr(MPI val, const char *str);
+u32 mpi_get_keyid(MPI a, u32 *keyid);
+void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign);
+void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign);
+int mpi_set_buffer(MPI a, const void *buffer, unsigned nbytes, int sign);
+
+#define log_mpidump g10_log_mpidump
+
+/*-- mpi-add.c --*/
+int mpi_add_ui(MPI w, MPI u, ulong v);
+int mpi_add(MPI w, MPI u, MPI v);
+int mpi_addm(MPI w, MPI u, MPI v, MPI m);
+int mpi_sub_ui(MPI w, MPI u, ulong v);
+int mpi_sub(MPI w, MPI u, MPI v);
+int mpi_subm(MPI w, MPI u, MPI v, MPI m);
+
+/*-- mpi-mul.c --*/
+int mpi_mul_ui(MPI w, MPI u, ulong v);
+int mpi_mul_2exp(MPI w, MPI u, ulong cnt);
+int mpi_mul(MPI w, MPI u, MPI v);
+int mpi_mulm(MPI w, MPI u, MPI v, MPI m);
+
+/*-- mpi-div.c --*/
+ulong mpi_fdiv_r_ui(MPI rem, MPI dividend, ulong divisor);
+int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor);
+int mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor);
+int mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor);
+int mpi_tdiv_r(MPI rem, MPI num, MPI den);
+int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den);
+int mpi_tdiv_q_2exp(MPI w, MPI u, unsigned count);
+int mpi_divisible_ui(const MPI dividend, ulong divisor);
+
+/*-- mpi-gcd.c --*/
+int mpi_gcd(MPI g, const MPI a, const MPI b);
+
+/*-- mpi-pow.c --*/
+int mpi_pow(MPI w, MPI u, MPI v);
+int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
+
+/*-- mpi-mpow.c --*/
+int mpi_mulpowm(MPI res, MPI *basearray, MPI *exparray, MPI mod);
+
+/*-- mpi-cmp.c --*/
+int mpi_cmp_ui(MPI u, ulong v);
+int mpi_cmp(MPI u, MPI v);
+
+/*-- mpi-scan.c --*/
+int mpi_getbyte(MPI a, unsigned idx);
+void mpi_putbyte(MPI a, unsigned idx, int value);
+unsigned mpi_trailing_zeros(MPI a);
+
+/*-- mpi-bit.c --*/
+void mpi_normalize(MPI a);
+unsigned mpi_get_nbits(MPI a);
+int mpi_test_bit(MPI a, unsigned n);
+int mpi_set_bit(MPI a, unsigned n);
+int mpi_set_highbit(MPI a, unsigned n);
+void mpi_clear_highbit(MPI a, unsigned n);
+void mpi_clear_bit(MPI a, unsigned n);
+int mpi_rshift(MPI x, MPI a, unsigned n);
+
+/*-- mpi-inv.c --*/
+int mpi_invm(MPI x, MPI u, MPI v);
+
+#endif /*G10_MPI_H */
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
new file mode 100644
index 000000000000..b87487b40a8b
--- /dev/null
+++ b/lib/mpi/longlong.h
@@ -0,0 +1,1478 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+ * Note: I added some stuff for use with gnupg
+ *
+ * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
+ *	2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this file; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA. */
+
+/* You have to define the following before including this file:
+ *
+ * UWtype -- An unsigned type, default type for operations (typically a "word")
+ * UHWtype -- An unsigned type, at least half the size of UWtype.
+ * UDWtype -- An unsigned type, at least twice as large a UWtype
+ * W_TYPE_SIZE -- size in bits of UWtype
+ *
+ * SItype, USItype -- Signed and unsigned 32 bit types.
+ * DItype, UDItype -- Signed and unsigned 64 bit types.
+ *
+ * On a 32 bit machine UWtype should typically be USItype;
+ * on a 64 bit machine, UWtype should typically be UDItype.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* This is used to make sure no undesirable sharing between different libraries
+	that use this file takes place.  */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+
+/* Define auxiliary asm macros.
+ *
+ * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ * word product in HIGH_PROD and LOW_PROD.
+ *
+ * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+ * UDWtype product.  This is just a variant of umul_ppmm.
+
+ * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator) divides a UDWtype, composed by the UWtype integers
+ * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ * in QUOTIENT and the remainder in REMAINDER.	HIGH_NUMERATOR must be less
+ * than DENOMINATOR for correct operation.  If, in addition, the most
+ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ * UDIV_NEEDS_NORMALIZATION is defined to 1.
+ * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+ * is rounded towards 0.
+ *
+ * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ * msb to the first non-zero bit in the UWtype X.  This is the number of
+ * steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+ * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+ *
+ * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ * from the least significant end.
+ *
+ * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ * high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+ * (i.e. carry out) is not stored anywhere, and is lost.
+ *
+ * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+ * and LOW_DIFFERENCE.	Overflow (i.e. carry out) is not stored anywhere,
+ * and is lost.
+ *
+ * If any of these macros are left undefined for a particular CPU,
+ * C macros are used.  */
+
+/* The CPUs come in alphabetical order below.
+ *
+ * Please add support for more CPUs here, or improve the current support
+ * for the CPUs below!	*/
+
+#if defined(__GNUC__) && !defined(NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+	understood by gcc1.	Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+/***************************************
+	**************  A29K  *****************
+	***************************************/
+#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %1,%4,%5\n" \
+		"addc %0,%2,%3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "%r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %1,%4,%5\n" \
+		"subc %0,%2,%3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+		USItype __m0 = (m0), __m1 = (m1); \
+		__asm__ ("multiplu %0,%1,%2" \
+		: "=r" ((USItype)(xl)) \
+		: "r" (__m0), \
+			"r" (__m1)); \
+		__asm__ ("multmu %0,%1,%2" \
+		: "=r" ((USItype)(xh)) \
+		: "r" (__m0), \
+			"r" (__m1)); \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("dividu %0,%3,%4" \
+	: "=r" ((USItype)(q)), \
+		"=q" ((USItype)(r)) \
+	: "1" ((USItype)(n1)), \
+		"r" ((USItype)(n0)), \
+		"r" ((USItype)(d)))
+
+#define count_leading_zeros(count, x) \
+	__asm__ ("clz %0,%1" \
+	: "=r" ((USItype)(count)) \
+	: "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+
+#if defined(__alpha) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+		UDItype __m0 = (m0), __m1 = (m1); \
+		__asm__ ("umulh %r1,%2,%0" \
+		: "=r" ((UDItype) ph) \
+		: "%rJ" (__m0), \
+			"rI" (__m1)); \
+		(pl) = __m0 * __m1; \
+	} while (0)
+#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { UDItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+extern UDItype __udiv_qrnnd();
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha */
+
+/***************************************
+	**************  ARM  ******************
+	***************************************/
+#if defined(__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("adds %1, %4, %5\n" \
+		"adc  %0, %2, %3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "%r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subs %1, %4, %5\n" \
+		"sbc  %0, %2, %3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
+#define umul_ppmm(xh, xl, a, b) \
+	__asm__ ("%@ Inlined umul_ppmm\n" \
+		"mov	%|r0, %2, lsr #16		@ AAAA\n" \
+		"mov	%|r2, %3, lsr #16		@ BBBB\n" \
+		"bic	%|r1, %2, %|r0, lsl #16		@ aaaa\n" \
+		"bic	%0, %3, %|r2, lsl #16		@ bbbb\n" \
+		"mul	%1, %|r1, %|r2			@ aaaa * BBBB\n" \
+		"mul	%|r2, %|r0, %|r2		@ AAAA * BBBB\n" \
+		"mul	%|r1, %0, %|r1			@ aaaa * bbbb\n" \
+		"mul	%0, %|r0, %0			@ AAAA * bbbb\n" \
+		"adds	%|r0, %1, %0			@ central sum\n" \
+		"addcs	%|r2, %|r2, #65536\n" \
+		"adds	%1, %|r1, %|r0, lsl #16\n" \
+		"adc	%0, %|r2, %|r0, lsr #16" \
+	: "=&r" ((USItype)(xh)), \
+		"=r" ((USItype)(xl)) \
+	: "r" ((USItype)(a)), \
+		"r" ((USItype)(b)) \
+	: "r0", "r1", "r2")
+#else
+#define umul_ppmm(xh, xl, a, b) \
+	__asm__ ("%@ Inlined umul_ppmm\n" \
+		"umull %r1, %r0, %r2, %r3" \
+	: "=&r" ((USItype)(xh)), \
+			"=r" ((USItype)(xl)) \
+	: "r" ((USItype)(a)), \
+			"r" ((USItype)(b)) \
+	: "r0", "r1")
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+/***************************************
+	**************  CLIPPER  **************
+	***************************************/
+#if defined(__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+		struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("mulwux %2,%0" \
+	: "=r" (__xx.__ll) \
+	: "%0" ((USItype)(u)), \
+		"r" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define smul_ppmm(w1, w0, u, v) \
+	({union {DItype __ll; \
+		struct {SItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("mulwx %2,%0" \
+	: "=r" (__xx.__ll) \
+	: "%0" ((SItype)(u)), \
+		"r" ((SItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("mulwux %2,%0" \
+	: "=r" (__w) \
+	: "%0" ((USItype)(u)), \
+		"r" ((USItype)(v))); \
+	__w; })
+#endif /* __clipper__ */
+
+/***************************************
+	**************  GMICRO  ***************
+	***************************************/
+#if defined(__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add.w %5,%1\n" \
+		"addx %3,%0" \
+	: "=g" ((USItype)(sh)), \
+		"=&g" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+		"g" ((USItype)(bh)), \
+		"%1" ((USItype)(al)), \
+		"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub.w %5,%1\n" \
+		"subx %3,%0" \
+	: "=g" ((USItype)(sh)), \
+		"=&g" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+		"g" ((USItype)(bh)), \
+		"1" ((USItype)(al)), \
+		"g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+	__asm__ ("mulx %3,%0,%1" \
+	: "=g" ((USItype)(ph)), \
+		"=r" ((USItype)(pl)) \
+	: "%0" ((USItype)(m0)), \
+		"g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+	__asm__ ("divx %4,%0,%1" \
+	: "=g" ((USItype)(q)), \
+		"=r" ((USItype)(r)) \
+	: "1" ((USItype)(nh)), \
+		"0" ((USItype)(nl)), \
+		"g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+	__asm__ ("bsch/1 %1,%0" \
+	: "=g" (count) \
+	: "g" ((USItype)(x)), \
+	     "0" ((USItype)0))
+#endif
+
+/***************************************
+	**************  HPPA  *****************
+	***************************************/
+#if defined(__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %4,%5,%1\n" \
+		   "addc %2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%rM" ((USItype)(ah)), \
+	     "rM" ((USItype)(bh)), \
+	     "%rM" ((USItype)(al)), \
+	     "rM" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %4,%5,%1\n" \
+	   "subb %2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "rM" ((USItype)(ah)), \
+	     "rM" ((USItype)(bh)), \
+	     "rM" ((USItype)(al)), \
+	     "rM" ((USItype)(bl)))
+#if defined(_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+	union {UDItype __ll; \
+	struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("xmpyu %1,%2,%0" \
+	: "=*f" (__xx.__ll) \
+	: "*f" ((USItype)(u)), \
+	       "*f" ((USItype)(v))); \
+	(wh) = __xx.__i.__h; \
+	(wl) = __xx.__i.__l; \
+} while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { USItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+extern USItype __udiv_qrnnd();
+#endif /* LONGLONG_STANDALONE */
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __tmp; \
+	__asm__ ( \
+	"ldi             1,%0\n" \
+	"extru,=	%1,15,16,%%r0  ; Bits 31..16 zero?\n" \
+	"extru,tr	%1,15,16,%1    ; No.  Shift down, skip add.\n" \
+	"ldo		16(%0),%0      ; Yes.	Perform add.\n" \
+	"extru,=	%1,23,8,%%r0   ; Bits 15..8 zero?\n" \
+	"extru,tr	%1,23,8,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		8(%0),%0       ; Yes.	Perform add.\n" \
+	"extru,=	%1,27,4,%%r0   ; Bits 7..4 zero?\n" \
+	"extru,tr	%1,27,4,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		4(%0),%0       ; Yes.	Perform add.\n" \
+	"extru,=	%1,29,2,%%r0   ; Bits 3..2 zero?\n" \
+	"extru,tr	%1,29,2,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		2(%0),%0       ; Yes.	Perform add.\n" \
+	"extru		%1,30,1,%1     ; Extract bit 1.\n" \
+	"sub		%0,%1,%0       ; Subtract it.              " \
+	: "=r" (count), "=r" (__tmp) : "1" (x)); \
+} while (0)
+#endif /* hppa */
+
+/***************************************
+	**************  I370  *****************
+	***************************************/
+#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mr %0,%3" \
+	: "=r" (__xx.__i.__h), \
+	       "=r" (__xx.__i.__l) \
+	: "%1" (__m0), \
+	       "r" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	     + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define smul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {DItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("mr %0,%3" \
+	: "=r" (__xx.__i.__h), \
+	       "=r" (__xx.__i.__l) \
+	: "%1" (m0), \
+	       "r" (m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	union {DItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__xx.__i.__h = n1; __xx.__i.__l = n0; \
+	__asm__ ("dr %0,%2" \
+	: "=r" (__xx.__ll) \
+	: "0" (__xx.__ll), "r" (d)); \
+	(q) = __xx.__i.__l; (r) = __xx.__i.__h; \
+} while (0)
+#endif
+
+/***************************************
+	**************  I386  *****************
+	***************************************/
+#undef __i386__
+#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addl %5,%1\n" \
+	   "adcl %3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	     "g" ((USItype)(bh)), \
+	     "%1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subl %5,%1\n" \
+	   "sbbl %3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	     "g" ((USItype)(bh)), \
+	     "1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("mull %3" \
+	: "=a" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "%0" ((USItype)(u)), \
+	     "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divl %4" \
+	: "=a" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "rm" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("bsrl %1,%0" \
+	: "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define count_trailing_zeros(count, x) \
+	__asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
+#define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+
+/***************************************
+	**************  I860  *****************
+	***************************************/
+#if defined(__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r, h, l, c) \
+	__asm__ ("shr %3,r0,r0\n" \
+	"shrd %1,%2,%0" \
+	   "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+
+/***************************************
+	**************  I960  *****************
+	***************************************/
+#if defined(__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("cmpo 1,0\n" \
+	"addc %5,%4,%1\n" \
+	"addc %3,%2,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%dI" ((USItype)(ah)), \
+	     "dI" ((USItype)(bh)), \
+	     "%dI" ((USItype)(al)), \
+	     "dI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("cmpo 0,0\n" \
+	"subc %5,%4,%1\n" \
+	"subc %3,%2,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "dI" ((USItype)(ah)), \
+	     "dI" ((USItype)(bh)), \
+	     "dI" ((USItype)(al)), \
+	     "dI" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("emul        %2,%1,%0" \
+	: "=d" (__xx.__ll) \
+	: "%dI" ((USItype)(u)), \
+	     "dI" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("emul      %2,%1,%0" \
+	: "=d" (__w) \
+	: "%dI" ((USItype)(u)), \
+	       "dI" ((USItype)(v))); \
+	__w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __nn; \
+	__nn.__i.__h = (nh); __nn.__i.__l = (nl); \
+	__asm__ ("ediv %d,%n,%0" \
+	: "=d" (__rq.__ll) \
+	: "dI" (__nn.__ll), \
+	     "dI" ((USItype)(d))); \
+	(r) = __rq.__i.__l; (q) = __rq.__i.__h; \
+} while (0)
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("scanbit %1,%0" \
+	: "=r" (__cbtmp) \
+	: "r" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 (-32)	/* sic */
+#if defined(__i960mx)		/* what is the proper symbol to test??? */
+#define rshift_rhlc(r, h, l, c) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __nn; \
+	__nn.__i.__h = (h); __nn.__i.__l = (l); \
+	__asm__ ("shre %2,%1,%0" \
+	: "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+}
+#endif /* i960mx */
+#endif /* i960 */
+
+/***************************************
+	**************  68000	****************
+	***************************************/
+#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add%.l %5,%1\n" \
+	   "addx%.l %3,%0" \
+	: "=d" ((USItype)(sh)), \
+	     "=&d" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	     "d" ((USItype)(bh)), \
+	     "%1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub%.l %5,%1\n" \
+	   "subx%.l %3,%0" \
+	: "=d" ((USItype)(sh)), \
+	     "=&d" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	     "d" ((USItype)(bh)), \
+	     "1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("mulu%.l %3,%1:%0" \
+	: "=d" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "%0" ((USItype)(u)), \
+	     "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divu%.l %4,%1:%0" \
+	: "=d" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divs%.l %4,%1:%0" \
+	: "=d" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "dmi" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+	__asm__ ("bfffo %1{%b2:%b2},%0" \
+	: "=d" ((USItype)(count)) \
+	: "od" ((USItype)(x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#else /* not mc68020 */
+#define umul_ppmm(xh, xl, a, b) \
+do { USItype __umul_tmp1, __umul_tmp2; \
+	__asm__ ("| Inlined umul_ppmm\n" \
+	"move%.l %5,%3\n" \
+	"move%.l %2,%0\n" \
+	"move%.w %3,%1\n" \
+	"swap	%3\n" \
+	"swap	%0\n" \
+	"mulu	%2,%1\n" \
+	"mulu	%3,%0\n" \
+	"mulu	%2,%3\n" \
+	"swap	%2\n" \
+	"mulu	%5,%2\n" \
+	"add%.l	%3,%2\n" \
+	"jcc	1f\n" \
+	"add%.l	%#0x10000,%0\n" \
+	"1:	move%.l %2,%3\n" \
+	"clr%.w	%2\n" \
+	"swap	%2\n" \
+	"swap	%3\n" \
+	"clr%.w	%3\n" \
+	"add%.l	%3,%1\n" \
+	"addx%.l %2,%0\n" \
+	"| End inlined umul_ppmm" \
+	: "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+		"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+	: "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+} while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+#endif /* mc68000 */
+
+/***************************************
+	**************  88000	****************
+	***************************************/
+#if defined(__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addu.co %1,%r4,%r5\n" \
+	   "addu.ci %0,%r2,%r3" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%rJ" ((USItype)(ah)), \
+	     "rJ" ((USItype)(bh)), \
+	     "%rJ" ((USItype)(al)), \
+	     "rJ" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subu.co %1,%r4,%r5\n" \
+	   "subu.ci %0,%r2,%r3" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "rJ" ((USItype)(ah)), \
+	     "rJ" ((USItype)(bh)), \
+	     "rJ" ((USItype)(al)), \
+	     "rJ" ((USItype)(bl)))
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("ff1 %0,%1" \
+	: "=r" (__cbtmp) \
+	: "r" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 63	/* sic */
+#if defined(__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __x; \
+	__asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
+	(wh) = __x.__i.__h; \
+	(wl) = __x.__i.__l; \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	({union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __x, __q; \
+	__x.__i.__h = (n1); __x.__i.__l = (n0); \
+	__asm__ ("divu.d %0,%1,%2" \
+	: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
+	(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+
+/***************************************
+	**************  MIPS  *****************
+	***************************************/
+#if defined(__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("multu %2,%3" \
+	: "=l" ((USItype)(w0)), \
+	     "=h" ((USItype)(w1)) \
+	: "d" ((USItype)(u)), \
+	     "d" ((USItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("multu %2,%3\n" \
+	   "mflo %0\n" \
+	   "mfhi %1" \
+	: "=d" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "d" ((USItype)(u)), \
+	     "d" ((USItype)(v)))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips__ */
+
+/***************************************
+	**************  MIPS/64  **************
+	***************************************/
+#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("dmultu %2,%3" \
+	: "=l" ((UDItype)(w0)), \
+	     "=h" ((UDItype)(w1)) \
+	: "d" ((UDItype)(u)), \
+	     "d" ((UDItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("dmultu %2,%3\n" \
+	   "mflo %0\n" \
+	   "mfhi %1" \
+	: "=d" ((UDItype)(w0)), \
+	     "=d" ((UDItype)(w1)) \
+	: "d" ((UDItype)(u)), \
+	     "d" ((UDItype)(v)))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips__ */
+
+/***************************************
+	**************  32000	****************
+	***************************************/
+#if defined(__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("meid %2,%0" \
+	: "=g" (__xx.__ll) \
+	: "%0" ((USItype)(u)), \
+	     "g" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("meid %2,%0" \
+	: "=g" (__w) \
+	: "%0" ((USItype)(u)), \
+	       "g" ((USItype)(v))); \
+	__w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__xx.__i.__h = (n1); __xx.__i.__l = (n0); \
+	__asm__ ("deid %2,%0" \
+	: "=g" (__xx.__ll) \
+	: "0" (__xx.__ll), \
+	     "g" ((USItype)(d))); \
+	(r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count, x) \
+do { \
+	__asm__("ffsd      %2,%0" \
+	: "=r"((USItype) (count)) \
+	: "0"((USItype) 0), "r"((USItype) (x))); \
+	} while (0)
+#endif /* __ns32000__ */
+
+/***************************************
+	**************  PPC  ******************
+	***************************************/
+#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+	if (__builtin_constant_p(bh) && (bh) == 0) \
+		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+	else \
+		__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"r" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+} while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+	if (__builtin_constant_p(ah) && (ah) == 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else \
+		__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+} while (0)
+#define count_leading_zeros(count, x) \
+	__asm__ ("{cntlz|cntlzw} %0,%1" \
+	: "=r" ((USItype)(count)) \
+	: "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined(_ARCH_PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mulhwu %0,%1,%2" \
+	: "=r" ((USItype) ph) \
+	: "%r" (__m0), \
+	"r" (__m1)); \
+	(pl) = __m0 * __m1; \
+} while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+do { \
+	SItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mulhw %0,%1,%2" \
+	: "=r" ((SItype) ph) \
+	: "%r" (__m0), \
+	"r" (__m1)); \
+	(pl) = __m0 * __m1; \
+} while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mul %0,%2,%3" \
+	: "=r" ((USItype)(xh)), \
+	"=q" ((USItype)(xl)) \
+	: "r" (__m0), \
+	"r" (__m1)); \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+	__asm__ ("mul %0,%2,%3" \
+	: "=r" ((SItype)(xh)), \
+	"=q" ((SItype)(xl)) \
+	: "r" (m0), \
+	"r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+	__asm__ ("div %0,%2,%4" \
+	: "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
+	: "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
+#define UDIV_TIME 100
+#endif
+#endif /* Power architecture variants.  */
+
+/***************************************
+	**************  PYR  ******************
+	***************************************/
+#if defined(__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addw        %5,%1\n" \
+	"addwc	%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subw        %5,%1\n" \
+	"subwb	%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+	/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("movw %1,%R0\n" \
+	"uemul %2,%0" \
+	: "=&r" (__xx.__ll) \
+	: "g" ((USItype) (u)), \
+	"g" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#endif /* __pyr__ */
+
+/***************************************
+	**************  RT/ROMP  **************
+	***************************************/
+#if defined(__ibm032__) /* RT/ROMP */	&& W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("a %1,%5\n" \
+	"ae %0,%3" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"r" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("s %1,%5\n" \
+	"se %0,%3" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"r" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"r" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ( \
+	"s       r2,r2\n" \
+	"mts	r10,%2\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"cas	%0,r2,r0\n" \
+	"mfs	r10,%1" \
+	: "=r" ((USItype)(ph)), \
+	"=r" ((USItype)(pl)) \
+	: "%r" (__m0), \
+	"r" (__m1) \
+	: "r2"); \
+	(ph) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+do { \
+	if ((x) >= 0x10000) \
+		__asm__ ("clz     %0,%1" \
+		: "=r" ((USItype)(count)) \
+		: "r" ((USItype)(x) >> 16)); \
+	else { \
+		__asm__ ("clz   %0,%1" \
+		: "=r" ((USItype)(count)) \
+		: "r" ((USItype)(x))); \
+		(count) += 16; \
+	} \
+} while (0)
+#endif /* RT/ROMP */
+
+/***************************************
+	**************  SH2  ******************
+	***************************************/
+#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
+	&& W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ( \
+	"dmulu.l %2,%3\n" \
+	"sts	macl,%1\n" \
+	"sts	mach,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)) \
+	: "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+/***************************************
+	**************  SPARC	****************
+	***************************************/
+#if defined(__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addcc %r4,%5,%1\n" \
+	"addx %r2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%rJ" ((USItype)(ah)), \
+	"rI" ((USItype)(bh)), \
+	"%rJ" ((USItype)(al)), \
+	"rI" ((USItype)(bl)) \
+	__CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subcc %r4,%5,%1\n" \
+	"subx %r2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "rJ" ((USItype)(ah)), \
+	"rI" ((USItype)(bh)), \
+	"rJ" ((USItype)(al)), \
+	"rI" ((USItype)(bl)) \
+	__CLOBBER_CC)
+#if defined(__sparc_v8__)
+/* Don't match immediate range because, 1) it is not often useful,
+	2) the 'I' flag thinks of the range as a 13 bit signed interval,
+	while we want to match a 13 bit interval, sign extended to 32 bits,
+	but INTERPRETED AS UNSIGNED.  */
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)))
+#define UMUL_TIME 5
+#ifndef SUPERSPARC		/* SuperSPARC's udiv only handles 53 bit dividends */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	USItype __q; \
+	__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+	: "=r" ((USItype)(__q)) \
+	: "r" ((USItype)(n1)), \
+	"r" ((USItype)(n0)), \
+	"r" ((USItype)(d))); \
+	(r) = (n0) - __q * (d); \
+	(q) = __q; \
+} while (0)
+#define UDIV_TIME 25
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
+#if defined(__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+	instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("! Inlined udiv_qrnnd\n" \
+	"wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n" \
+	"tst	%%g0\n" \
+	"divscc	%3,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%0\n" \
+	"rd	%%y,%1\n" \
+	"bl,a 1f\n" \
+	"add	%1,%4,%1\n" \
+	"1:	! End of inline udiv_qrnnd" \
+	: "=r" ((USItype)(q)), \
+	"=r" ((USItype)(r)) \
+	: "r" ((USItype)(n1)), \
+	"r" ((USItype)(n0)), \
+	"rI" ((USItype)(d)) \
+	: "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+	__asm__ ("scan %1,0,%0" \
+	: "=r" ((USItype)(x)) \
+	: "r" ((USItype)(count)))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+	implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+	undefined.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+	/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("! Inlined umul_ppmm\n" \
+	"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n" \
+	"sra	%3,31,%%g2	! Don't move this insn\n" \
+	"and	%2,%%g2,%%g2	! Don't move this insn\n" \
+	"andcc	%%g0,0,%%g1	! Don't move this insn\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,0,%%g1\n" \
+	"add	%%g1,%%g2,%0\n" \
+	"rd	%%y,%1" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "%rI" ((USItype)(u)), \
+	"r" ((USItype)(v)) \
+	: "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39		/* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { USItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+	extern USItype __udiv_qrnnd();
+#define UDIV_TIME 140
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
+#endif /* __sparc__ */
+
+/***************************************
+	**************  VAX  ******************
+	***************************************/
+#if defined(__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addl2 %5,%1\n" \
+	"adwc %3,%0" \
+	: "=g" ((USItype)(sh)), \
+	"=&g" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subl2 %5,%1\n" \
+	"sbwc %3,%0" \
+	: "=g" ((USItype)(sh)), \
+	"=&g" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {UDItype __ll; \
+	struct {USItype __l, __h; } __i; \
+	} __xx; \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("emul %1,%2,$0,%0" \
+	: "=g" (__xx.__ll) \
+	: "g" (__m0), \
+	"g" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	union {DItype __ll; \
+	struct {SItype __l, __h; } __i; \
+	} __xx; \
+	__xx.__i.__h = n1; __xx.__i.__l = n0; \
+	__asm__ ("ediv %3,%2,%0,%1" \
+	: "=g" (q), "=g" (r) \
+	: "g" (__xx.__ll), "g" (d)); \
+} while (0)
+#endif /* __vax__ */
+
+/***************************************
+	**************  Z8000	****************
+	***************************************/
+#if defined(__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %H1,%H5\n\tadc  %H0,%H3" \
+	: "=r" ((unsigned int)(sh)), \
+	"=&r" ((unsigned int)(sl)) \
+	: "%0" ((unsigned int)(ah)), \
+	"r" ((unsigned int)(bh)), \
+	"%1" ((unsigned int)(al)), \
+	"rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3" \
+	: "=r" ((unsigned int)(sh)), \
+	"=&r" ((unsigned int)(sl)) \
+	: "0" ((unsigned int)(ah)), \
+	"r" ((unsigned int)(bh)), \
+	"1" ((unsigned int)(al)), \
+	"rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {long int __ll; \
+	struct {unsigned int __h, __l; } __i; \
+	} __xx; \
+	unsigned int __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mult      %S0,%H3" \
+	: "=r" (__xx.__i.__h), \
+	"=r" (__xx.__i.__l) \
+	: "%1" (__m0), \
+	"rQR" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((signed int) __m0 >> 15) & __m1) \
+	+ (((signed int) __m1 >> 15) & __m0)); \
+} while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+/***************************************
+	***********  Generic Versions	********
+	***************************************/
+#if !defined(umul_ppmm) && defined(__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+{ \
+	UDWtype __ll = __umulsidi3(m0, m1); \
+	ph = (UWtype) (__ll >> W_TYPE_SIZE); \
+	pl = (UWtype) __ll; \
+}
+#endif
+
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) \
+	({UWtype __hi, __lo; \
+	umul_ppmm(__hi, __lo, u, v); \
+	((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+
+	/* If this machine has no inline assembler, use C macros.  */
+
+#if !defined(add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) + (bl); \
+	(sh) = (ah) + (bh) + (__x < (al)); \
+	(sl) = __x; \
+} while (0)
+#endif
+
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) - (bl); \
+	(sh) = (ah) - (bh) - (__x > (al)); \
+	(sl) = __x; \
+} while (0)
+#endif
+
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+	UWtype __x0, __x1, __x2, __x3; \
+	UHWtype __ul, __vl, __uh, __vh; \
+	UWtype __u = (u), __v = (v); \
+	\
+	__ul = __ll_lowpart(__u); \
+	__uh = __ll_highpart(__u); \
+	__vl = __ll_lowpart(__v); \
+	__vh = __ll_highpart(__v); \
+	\
+	__x0 = (UWtype) __ul * __vl; \
+	__x1 = (UWtype) __ul * __vh; \
+	__x2 = (UWtype) __uh * __vl; \
+	__x3 = (UWtype) __uh * __vh; \
+	\
+	__x1 += __ll_highpart(__x0);/* this can't give carry */ \
+	__x1 += __x2;		/* but this indeed can */ \
+	if (__x1 < __x2)		/* did we get it? */ \
+	__x3 += __ll_B;		/* yes, add it in the proper pos. */ \
+	\
+	(w1) = __x3 + __ll_highpart(__x1); \
+	(w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
+} while (0)
+#endif
+
+#if !defined(umul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+do { \
+	UWtype __w1; \
+	UWtype __m0 = (u), __m1 = (v); \
+	umul_ppmm(__w1, w0, __m0, __m1); \
+	(w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
+	- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
+} while (0)
+#endif
+
+	/* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+do { \
+	UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+	__d1 = __ll_highpart(d); \
+	__d0 = __ll_lowpart(d); \
+	\
+	__r1 = (n1) % __d1; \
+	__q1 = (n1) / __d1; \
+	__m = (UWtype) __q1 * __d0; \
+	__r1 = __r1 * __ll_B | __ll_highpart(n0); \
+	if (__r1 < __m) { \
+		__q1--, __r1 += (d); \
+		if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
+		if (__r1 < __m) \
+			__q1--, __r1 += (d); \
+	} \
+	__r1 -= __m; \
+	\
+	__r0 = __r1 % __d1; \
+	__q0 = __r1 / __d1; \
+	__m = (UWtype) __q0 * __d0; \
+	__r0 = __r0 * __ll_B | __ll_lowpart(n0); \
+	if (__r0 < __m) { \
+		__q0--, __r0 += (d); \
+		if (__r0 >= (d)) \
+			if (__r0 < __m) \
+				__q0--, __r0 += (d); \
+	} \
+	__r0 -= __m; \
+	\
+	(q) = (UWtype) __q1 * __ll_B | __q0; \
+	(r) = __r0; \
+} while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+	__udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+	UWtype __r; \
+	(q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
+	(r) = __r; \
+} while (0)
+#endif
+
+	/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#undef count_leading_zeros
+#if !defined(count_leading_zeros)
+	extern
+#ifdef __STDC__
+			const
+#endif
+			unsigned char __clz_tab[];
+#define count_leading_zeros(count, x) \
+do { \
+	UWtype __xr = (x); \
+	UWtype __a; \
+	\
+	if (W_TYPE_SIZE <= 32) { \
+		__a = __xr < ((UWtype) 1 << 2*__BITS4) \
+		? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
+		: (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4); \
+	} \
+	else { \
+		for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+			if (((__xr >> __a) & 0xff) != 0) \
+				break; \
+	} \
+	\
+	(count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
+} while (0)
+	/* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+#if !defined(count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+	defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+do { \
+	UWtype __ctz_x = (x); \
+	UWtype __ctz_c; \
+	count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \
+	(count) = W_TYPE_SIZE - 1 - __ctz_c; \
+} while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h
new file mode 100644
index 000000000000..e2b39852b30a
--- /dev/null
+++ b/lib/mpi/mpi-inline.h
@@ -0,0 +1,122 @@
+/* mpi-inline.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INLINE_H
+#define G10_MPI_INLINE_H
+
+#ifndef G10_MPI_INLINE_DECL
+#define G10_MPI_INLINE_DECL  extern inline
+#endif
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t x;
+
+	x = *s1_ptr++;
+	s2_limb += x;
+	*res_ptr++ = s2_limb;
+	if (s2_limb < x) {	/* sum is less than the left operand: handle carry */
+		while (--s1_size) {
+			x = *s1_ptr++ + 1;	/* add carry */
+			*res_ptr++ = x;	/* and store */
+			if (x)	/* not 0 (no overflow): we can stop */
+				goto leave;
+		}
+		return 1;	/* return carry (size of s1 to small) */
+	}
+
+leave:
+	if (res_ptr != s1_ptr) {	/* not the same variable */
+		mpi_size_t i;	/* copy the rest */
+		for (i = 0; i < s1_size - 1; i++)
+			res_ptr[i] = s1_ptr[i];
+	}
+	return 0;		/* no carry */
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	    mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+	mpi_limb_t cy = 0;
+
+	if (s2_size)
+		cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+	if (s1_size - s2_size)
+		cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size,
+				   s1_size - s2_size, cy);
+	return cy;
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t x;
+
+	x = *s1_ptr++;
+	s2_limb = x - s2_limb;
+	*res_ptr++ = s2_limb;
+	if (s2_limb > x) {
+		while (--s1_size) {
+			x = *s1_ptr++;
+			*res_ptr++ = x - 1;
+			if (x)
+				goto leave;
+		}
+		return 1;
+	}
+
+leave:
+	if (res_ptr != s1_ptr) {
+		mpi_size_t i;
+		for (i = 0; i < s1_size - 1; i++)
+			res_ptr[i] = s1_ptr[i];
+	}
+	return 0;
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	    mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+	mpi_limb_t cy = 0;
+
+	if (s2_size)
+		cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+	if (s1_size - s2_size)
+		cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
+				   s1_size - s2_size, cy);
+	return cy;
+}
+
+#endif /*G10_MPI_INLINE_H */
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
new file mode 100644
index 000000000000..77adcf6bc257
--- /dev/null
+++ b/lib/mpi/mpi-internal.h
@@ -0,0 +1,261 @@
+/* mpi-internal.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INTERNAL_H
+#define G10_MPI_INTERNAL_H
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/mpi.h>
+#include <linux/errno.h>
+
+#define log_debug printk
+#define log_bug printk
+
+#define assert(x) \
+	do { \
+		if (!x) \
+			log_bug("failed assertion\n"); \
+	} while (0);
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+ * value which is good on most machines.  */
+
+/* tested 4, 16, 32 and 64, where 16 gave the best performance when
+ * checking a 768 and a 1024 bit ElGamal signature.
+ * (wk 22.12.97) */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 16
+#endif
+
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
+#if KARATSUBA_THRESHOLD < 2
+#undef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 2
+#endif
+
+typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */
+typedef int mpi_size_t;		/* (must be a signed type) */
+
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l, o) ((l) < (o) ? (l) : (o))
+#define MAX(h, i) ((h) > (i) ? (h) : (i))
+
+static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
+{
+	if (a->alloced < b)
+		return mpi_resize(a, b);
+	return 0;
+}
+
+/* Copy N limbs from S to D.  */
+#define MPN_COPY(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+#define MPN_COPY_INCR(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (d)[_i];	\
+	} while (0)
+
+#define MPN_COPY_DECR(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = (n)-1; _i >= 0; _i--) \
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+/* Zero N limbs at D */
+#define MPN_ZERO(d, n) \
+	do {					\
+		int  _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = 0;		\
+	} while (0)
+
+#define MPN_NORMALIZE(d, n)  \
+	do {					\
+		while ((n) > 0) {		\
+			if ((d)[(n)-1])		\
+				break;		\
+			(n)--;			\
+		}				\
+	} while (0)
+
+#define MPN_NORMALIZE_NOT_ZERO(d, n) \
+	do {				\
+		for (;;) {		\
+			if ((d)[(n)-1])	\
+				break;	\
+			(n)--;		\
+		}			\
+	} while (0)
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+	do {							\
+		if ((size) < KARATSUBA_THRESHOLD)		\
+			mul_n_basecase(prodp, up, vp, size);	\
+		else						\
+			mul_n(prodp, up, vp, size, tspace);	\
+	} while (0);
+
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones).  For correct operation, the most significant bit of D
+ * has to be set.  Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
+	do {								\
+		mpi_limb_t _q, _ql, _r;					\
+		mpi_limb_t _xh, _xl;					\
+		umul_ppmm(_q, _ql, (nh), (di));				\
+		_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */ \
+		umul_ppmm(_xh, _xl, _q, (d));				\
+		sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);		\
+		if (_xh) {						\
+			sub_ddmmss(_xh, _r, _xh, _r, 0, (d));		\
+			_q++;						\
+			if (_xh) {					\
+				sub_ddmmss(_xh, _r, _xh, _r, 0, (d));	\
+				_q++;					\
+			}						\
+		}							\
+		if (_r >= (d)) {					\
+			_r -= (d);					\
+			_q++;						\
+		}							\
+		(r) = _r;						\
+		(q) = _q;						\
+	} while (0)
+
+/*-- mpiutil.c --*/
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
+void mpi_free_limb_space(mpi_ptr_t a);
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
+
+/*-- mpi-bit.c --*/
+void mpi_rshift_limbs(MPI a, unsigned int count);
+int mpi_lshift_limbs(MPI a, unsigned int count);
+
+/*-- mpihelp-add.c --*/
+mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-sub.c --*/
+mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-cmp.c --*/
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
+
+/*-- mpihelp-mul.c --*/
+
+struct karatsuba_ctx {
+	struct karatsuba_ctx *next;
+	mpi_ptr_t tspace;
+	mpi_size_t tspace_size;
+	mpi_ptr_t tp;
+	mpi_size_t tp_size;
+};
+
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx);
+
+mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			    mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			    mpi_size_t s1_size, mpi_limb_t s2_limb);
+int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
+int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+		mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
+void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
+		mpi_ptr_t tspace);
+
+int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+			       mpi_ptr_t up, mpi_size_t usize,
+			       mpi_ptr_t vp, mpi_size_t vsize,
+			       struct karatsuba_ctx *ctx);
+
+/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
+mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+
+/*-- mpihelp-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			 mpi_limb_t divisor_limb);
+mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+			  mpi_ptr_t np, mpi_size_t nsize,
+			  mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+			    mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			    mpi_limb_t divisor_limb);
+
+/*-- mpihelp-shift.c --*/
+mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+			  unsigned cnt);
+mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+			  unsigned cnt);
+
+/* Define stuff for longlong.h.  */
+#define W_TYPE_SIZE BITS_PER_MPI_LIMB
+typedef mpi_limb_t UWtype;
+typedef unsigned int UHWtype;
+#if defined(__GNUC__)
+typedef unsigned int UQItype __attribute__ ((mode(QI)));
+typedef int SItype __attribute__ ((mode(SI)));
+typedef unsigned int USItype __attribute__ ((mode(SI)));
+typedef int DItype __attribute__ ((mode(DI)));
+typedef unsigned int UDItype __attribute__ ((mode(DI)));
+#else
+typedef unsigned char UQItype;
+typedef long SItype;
+typedef unsigned long USItype;
+#endif
+
+#ifdef __GNUC__
+#include "mpi-inline.h"
+#endif
+
+#endif /*G10_MPI_INTERNAL_H */
-- 
cgit v1.2.3


From 051dbb918c7fb7da8e64a2cd0d804ba73399709f Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Fri, 14 Oct 2011 15:25:16 +0300
Subject: crypto: digital signature verification support

This patch implements RSA digital signature verification using GnuPG library.

The format of the signature and the public key is defined by their respective
headers. The signature header contains version information, algorithm,
and keyid, which was used to generate the signature.
The key header contains version and algorythim type.
The payload of the signature and the key are multi-precision integers.

The signing and key management utilities evm-utils provide functionality
to generate signatures and load keys into the kernel keyring.
When the key is added to the kernel keyring, the keyid defines the name
of the key.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
---
 Documentation/digsig.txt |  96 ++++++++++++++++
 include/linux/digsig.h   |  64 +++++++++++
 lib/Kconfig              |   8 ++
 lib/Makefile             |   1 +
 lib/digsig.c             | 284 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 453 insertions(+)
 create mode 100644 Documentation/digsig.txt
 create mode 100644 include/linux/digsig.h
 create mode 100644 lib/digsig.c

(limited to 'include/linux')

diff --git a/Documentation/digsig.txt b/Documentation/digsig.txt
new file mode 100644
index 000000000000..3f682889068b
--- /dev/null
+++ b/Documentation/digsig.txt
@@ -0,0 +1,96 @@
+Digital Signature Verification API
+
+CONTENTS
+
+1. Introduction
+2. API
+3. User-space utilities
+
+
+1. Introduction
+
+Digital signature verification API provides a method to verify digital signature.
+Currently digital signatures are used by the IMA/EVM integrity protection subsystem.
+
+Digital signature verification is implemented using cut-down kernel port of
+GnuPG multi-precision integers (MPI) library. The kernel port provides
+memory allocation errors handling, has been refactored according to kernel
+coding style, and checkpatch.pl reported errors and warnings have been fixed.
+
+Public key and signature consist of header and MPIs.
+
+struct pubkey_hdr {
+	uint8_t		version;	/* key format version */
+	time_t		timestamp;	/* key made, always 0 for now */
+	uint8_t		algo;
+	uint8_t		nmpi;
+	char		mpi[0];
+} __packed;
+
+struct signature_hdr {
+	uint8_t		version;	/* signature format version */
+	time_t		timestamp;	/* signature made */
+	uint8_t		algo;
+	uint8_t		hash;
+	uint8_t		keyid[8];
+	uint8_t		nmpi;
+	char		mpi[0];
+} __packed;
+
+keyid equals to SHA1[12-19] over the total key content.
+Signature header is used as an input to generate a signature.
+Such approach insures that key or signature header could not be changed.
+It protects timestamp from been changed and can be used for rollback
+protection.
+
+2. API
+
+API currently includes only 1 function:
+
+	digsig_verify() - digital signature verification with public key
+
+
+/**
+ * digsig_verify() - digital signature verification with public key
+ * @keyring:	keyring to search key in
+ * @sig:	digital signature
+ * @sigen:	length of the signature
+ * @data:	data
+ * @datalen:	length of the data
+ * @return:	0 on success, -EINVAL otherwise
+ *
+ * Verifies data integrity against digital signature.
+ * Currently only RSA is supported.
+ * Normally hash of the content is used as a data for this function.
+ *
+ */
+int digsig_verify(struct key *keyring, const char *sig, int siglen,
+						const char *data, int datalen);
+
+3. User-space utilities
+
+The signing and key management utilities evm-utils provide functionality
+to generate signatures, to load keys into the kernel keyring.
+Keys can be in PEM or converted to the kernel format.
+When the key is added to the kernel keyring, the keyid defines the name
+of the key: 5D2B05FC633EE3E8 in the example bellow.
+
+Here is example output of the keyctl utility.
+
+$ keyctl show
+Session Keyring
+       -3 --alswrv      0     0  keyring: _ses
+603976250 --alswrv      0    -1   \_ keyring: _uid.0
+817777377 --alswrv      0     0       \_ user: kmk
+891974900 --alswrv      0     0       \_ encrypted: evm-key
+170323636 --alswrv      0     0       \_ keyring: _module
+548221616 --alswrv      0     0       \_ keyring: _ima
+128198054 --alswrv      0     0       \_ keyring: _evm
+
+$ keyctl list 128198054
+1 key in keyring:
+620789745: --alswrv     0     0 user: 5D2B05FC633EE3E8
+
+
+Dmitry Kasatkin
+06.10.2011
diff --git a/include/linux/digsig.h b/include/linux/digsig.h
new file mode 100644
index 000000000000..efae755017d7
--- /dev/null
+++ b/include/linux/digsig.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2011 Nokia Corporation
+ * Copyright (C) 2011 Intel Corporation
+ *
+ * Author:
+ * Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *                 <dmitry.kasatkin@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ */
+
+#ifndef _DIGSIG_H
+#define _DIGSIG_H
+
+#include <linux/key.h>
+
+enum pubkey_algo {
+	PUBKEY_ALGO_RSA,
+	PUBKEY_ALGO_MAX,
+};
+
+enum digest_algo {
+	DIGEST_ALGO_SHA1,
+	DIGEST_ALGO_SHA256,
+	DIGEST_ALGO_MAX
+};
+
+struct pubkey_hdr {
+	uint8_t		version;	/* key format version */
+	time_t		timestamp;	/* key made, always 0 for now */
+	uint8_t		algo;
+	uint8_t		nmpi;
+	char		mpi[0];
+} __packed;
+
+struct signature_hdr {
+	uint8_t		version;	/* signature format version */
+	time_t		timestamp;	/* signature made */
+	uint8_t		algo;
+	uint8_t		hash;
+	uint8_t		keyid[8];
+	uint8_t		nmpi;
+	char		mpi[0];
+} __packed;
+
+#if defined(CONFIG_DIGSIG) || defined(CONFIG_DIGSIG_MODULE)
+
+int digsig_verify(struct key *keyring, const char *sig, int siglen,
+					const char *digest, int digestlen);
+
+#else
+
+static inline int digsig_verify(struct key *keyring, const char *sig,
+				int siglen, const char *digest, int digestlen)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif /* CONFIG_DIGSIG */
+
+#endif /* _DIGSIG_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index d411de53be4f..c1a89185fe6b 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -293,4 +293,12 @@ config MPILIB_EXTRA
 	  This code in unnecessary for RSA digital signature verification,
 	  and can be compiled if needed.
 
+config DIGSIG
+	tristate "In-kernel signature checker"
+	depends on CRYPTO
+	select MPILIB
+	help
+	  Digital signature verification. Currently only RSA is supported.
+	  Implementation is done using GnuPG MPI library
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 6ba8cbf4c72f..0f5cff267aff 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -116,6 +116,7 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 obj-$(CONFIG_CORDIC) += cordic.o
 
 obj-$(CONFIG_MPILIB) += mpi/
+obj-$(CONFIG_DIGSIG) += digsig.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
diff --git a/lib/digsig.c b/lib/digsig.c
new file mode 100644
index 000000000000..fd2402f67f89
--- /dev/null
+++ b/lib/digsig.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2011 Nokia Corporation
+ * Copyright (C) 2011 Intel Corporation
+ *
+ * Author:
+ * Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *                 <dmitry.kasatkin@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * File: sign.c
+ *	implements signature (RSA) verification
+ *	pkcs decoding is based on LibTomCrypt code
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/key.h>
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <keys/user-type.h>
+#include <linux/mpi.h>
+#include <linux/digsig.h>
+
+static struct crypto_shash *shash;
+
+static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg,
+			unsigned long  msglen,
+			unsigned long  modulus_bitlen,
+			unsigned char *out,
+			unsigned long *outlen,
+			int *is_valid)
+{
+	unsigned long modulus_len, ps_len, i;
+	int result;
+
+	/* default to invalid packet */
+	*is_valid = 0;
+
+	modulus_len = (modulus_bitlen >> 3) + (modulus_bitlen & 7 ? 1 : 0);
+
+	/* test message size */
+	if ((msglen > modulus_len) || (modulus_len < 11))
+		return -EINVAL;
+
+	/* separate encoded message */
+	if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1)) {
+		result = -EINVAL;
+		goto bail;
+	}
+
+	for (i = 2; i < modulus_len - 1; i++)
+		if (msg[i] != 0xFF)
+			break;
+
+	/* separator check */
+	if (msg[i] != 0) {
+		/* There was no octet with hexadecimal value 0x00
+		to separate ps from m. */
+		result = -EINVAL;
+		goto bail;
+	}
+
+	ps_len = i - 2;
+
+	if (*outlen < (msglen - (2 + ps_len + 1))) {
+		*outlen = msglen - (2 + ps_len + 1);
+		result = -EOVERFLOW;
+		goto bail;
+	}
+
+	*outlen = (msglen - (2 + ps_len + 1));
+	memcpy(out, &msg[2 + ps_len + 1], *outlen);
+
+	/* valid packet */
+	*is_valid = 1;
+	result    = 0;
+bail:
+	return result;
+}
+
+/*
+ * RSA Signature verification with public key
+ */
+static int digsig_verify_rsa(struct key *key,
+		    const char *sig, int siglen,
+		       const char *h, int hlen)
+{
+	int err = -EINVAL;
+	unsigned long len;
+	unsigned long mlen, mblen;
+	unsigned nret, l;
+	int valid, head, i;
+	unsigned char *out1 = NULL, *out2 = NULL;
+	MPI in = NULL, res = NULL, pkey[2];
+	uint8_t *p, *datap, *endp;
+	struct user_key_payload *ukp;
+	struct pubkey_hdr *pkh;
+
+	down_read(&key->sem);
+	ukp = key->payload.data;
+	pkh = (struct pubkey_hdr *)ukp->data;
+
+	if (pkh->version != 1)
+		goto err1;
+
+	if (pkh->algo != PUBKEY_ALGO_RSA)
+		goto err1;
+
+	if (pkh->nmpi != 2)
+		goto err1;
+
+	datap = pkh->mpi;
+	endp = datap + ukp->datalen;
+
+	for (i = 0; i < pkh->nmpi; i++) {
+		unsigned int remaining = endp - datap;
+		pkey[i] = mpi_read_from_buffer(datap, &remaining);
+		datap += remaining;
+	}
+
+	mblen = mpi_get_nbits(pkey[0]);
+	mlen = (mblen + 7)/8;
+
+	err = -ENOMEM;
+
+	out1 = kzalloc(mlen, GFP_KERNEL);
+	if (!out1)
+		goto err;
+
+	out2 = kzalloc(mlen, GFP_KERNEL);
+	if (!out2)
+		goto err;
+
+	nret = siglen;
+	in = mpi_read_from_buffer(sig, &nret);
+	if (!in)
+		goto err;
+
+	res = mpi_alloc(mpi_get_nlimbs(in) * 2);
+	if (!res)
+		goto err;
+
+	err = mpi_powm(res, in, pkey[1], pkey[0]);
+	if (err)
+		goto err;
+
+	if (mpi_get_nlimbs(res) * BYTES_PER_MPI_LIMB > mlen) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	p = mpi_get_buffer(res, &l, NULL);
+	if (!p) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	len = mlen;
+	head = len - l;
+	memset(out1, 0, head);
+	memcpy(out1 + head, p, l);
+
+	err = -EINVAL;
+	pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len, &valid);
+
+	if (valid && len == hlen)
+		err = memcmp(out2, h, hlen);
+
+err:
+	mpi_free(in);
+	mpi_free(res);
+	kfree(out1);
+	kfree(out2);
+	mpi_free(pkey[0]);
+	mpi_free(pkey[1]);
+err1:
+	up_read(&key->sem);
+
+	return err;
+}
+
+/**
+ * digsig_verify() - digital signature verification with public key
+ * @keyring:	keyring to search key in
+ * @sig:	digital signature
+ * @sigen:	length of the signature
+ * @data:	data
+ * @datalen:	length of the data
+ * @return:	0 on success, -EINVAL otherwise
+ *
+ * Verifies data integrity against digital signature.
+ * Currently only RSA is supported.
+ * Normally hash of the content is used as a data for this function.
+ *
+ */
+int digsig_verify(struct key *keyring, const char *sig, int siglen,
+						const char *data, int datalen)
+{
+	int err = -ENOMEM;
+	struct signature_hdr *sh = (struct signature_hdr *)sig;
+	struct shash_desc *desc = NULL;
+	unsigned char hash[SHA1_DIGEST_SIZE];
+	struct key *key;
+	char name[20];
+
+	if (siglen < sizeof(*sh) + 2)
+		return -EINVAL;
+
+	if (sh->algo != PUBKEY_ALGO_RSA)
+		return -ENOTSUPP;
+
+	sprintf(name, "%llX", __be64_to_cpup((uint64_t *)sh->keyid));
+
+	if (keyring) {
+		/* search in specific keyring */
+		key_ref_t kref;
+		kref = keyring_search(make_key_ref(keyring, 1UL),
+						&key_type_user, name);
+		if (IS_ERR(kref))
+			key = ERR_PTR(PTR_ERR(kref));
+		else
+			key = key_ref_to_ptr(kref);
+	} else {
+		key = request_key(&key_type_user, name, NULL);
+	}
+	if (IS_ERR(key)) {
+		pr_err("key not found, id: %s\n", name);
+		return PTR_ERR(key);
+	}
+
+	desc = kzalloc(sizeof(*desc) + crypto_shash_descsize(shash),
+		       GFP_KERNEL);
+	if (!desc)
+		goto err;
+
+	desc->tfm = shash;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	crypto_shash_init(desc);
+	crypto_shash_update(desc, data, datalen);
+	crypto_shash_update(desc, sig, sizeof(*sh));
+	crypto_shash_final(desc, hash);
+
+	kfree(desc);
+
+	/* pass signature mpis address */
+	err = digsig_verify_rsa(key, sig + sizeof(*sh), siglen - sizeof(*sh),
+			     hash, sizeof(hash));
+
+err:
+	key_put(key);
+
+	return err ? -EINVAL : 0;
+}
+EXPORT_SYMBOL_GPL(digsig_verify);
+
+static int __init digsig_init(void)
+{
+	shash = crypto_alloc_shash("sha1", 0, 0);
+	if (IS_ERR(shash)) {
+		pr_err("shash allocation failed\n");
+		return  PTR_ERR(shash);
+	}
+
+	return 0;
+
+}
+
+static void __exit digsig_cleanup(void)
+{
+	crypto_free_shash(shash);
+}
+
+module_init(digsig_init);
+module_exit(digsig_cleanup);
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From c74d084f914e16e42730bcf625ab3f37a4cae8d4 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Sat, 15 Oct 2011 00:14:49 +0200
Subject: mac80211: handle HT PHY BSS membership selector value correctly

802.11n-2009 extends the supported rates element with a
magic value which can be used to prevent legacy stations
from joining the BSS.

However, this magic value is not a rate like the others
and the magic can simply be ignored/skipped at this late
stage.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>---
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  3 ++
 net/mac80211/mlme.c       | 90 ++++++++++++++++++++++++++---------------------
 2 files changed, 52 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 48363c3c40f8..9789aedb2453 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -770,6 +770,9 @@ struct ieee80211_mgmt {
 	} u;
 } __attribute__ ((packed));
 
+/* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */
+#define BSS_MEMBERSHIP_SELECTOR_HT_PHY	127
+
 /* mgmt header + 1 byte category code */
 #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u)
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d3b408cda08d..b25567a32f92 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1466,6 +1466,47 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	return RX_MGMT_CFG80211_DISASSOC;
 }
 
+static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
+				u8 *supp_rates, unsigned int supp_rates_len,
+				u32 *rates, u32 *basic_rates,
+				bool *have_higher_than_11mbit,
+				int *min_rate, int *min_rate_index)
+{
+	int i, j;
+
+	for (i = 0; i < supp_rates_len; i++) {
+		int rate = (supp_rates[i] & 0x7f) * 5;
+		bool is_basic = !!(supp_rates[i] & 0x80);
+
+		if (rate > 110)
+			*have_higher_than_11mbit = true;
+
+		/*
+		 * BSS_MEMBERSHIP_SELECTOR_HT_PHY is defined in 802.11n-2009
+		 * 7.3.2.2 as a magic value instead of a rate. Hence, skip it.
+		 *
+		 * Note: Even through the membership selector and the basic
+		 *	 rate flag share the same bit, they are not exactly
+		 *	 the same.
+		 */
+		if (!!(supp_rates[i] & 0x80) &&
+		    (supp_rates[i] & 0x7f) == BSS_MEMBERSHIP_SELECTOR_HT_PHY)
+			continue;
+
+		for (j = 0; j < sband->n_bitrates; j++) {
+			if (sband->bitrates[j].bitrate == rate) {
+				*rates |= BIT(j);
+				if (is_basic)
+					*basic_rates |= BIT(j);
+				if (rate < *min_rate) {
+					*min_rate = rate;
+					*min_rate_index = j;
+				}
+				break;
+			}
+		}
+	}
+}
 
 static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 				    struct ieee80211_mgmt *mgmt, size_t len)
@@ -1482,7 +1523,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	struct ieee802_11_elems elems;
 	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 	u32 changed = 0;
-	int i, j, err;
+	int err;
 	bool have_higher_than_11mbit = false;
 	u16 ap_ht_cap_flags;
 	int min_rate = INT_MAX, min_rate_index = -1;
@@ -1540,47 +1581,14 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	basic_rates = 0;
 	sband = local->hw.wiphy->bands[wk->chan->band];
 
-	for (i = 0; i < elems.supp_rates_len; i++) {
-		int rate = (elems.supp_rates[i] & 0x7f) * 5;
-		bool is_basic = !!(elems.supp_rates[i] & 0x80);
-
-		if (rate > 110)
-			have_higher_than_11mbit = true;
+	ieee80211_get_rates(sband, elems.supp_rates, elems.supp_rates_len,
+			    &rates, &basic_rates, &have_higher_than_11mbit,
+			    &min_rate, &min_rate_index);
 
-		for (j = 0; j < sband->n_bitrates; j++) {
-			if (sband->bitrates[j].bitrate == rate) {
-				rates |= BIT(j);
-				if (is_basic)
-					basic_rates |= BIT(j);
-				if (rate < min_rate) {
-					min_rate = rate;
-					min_rate_index = j;
-				}
-				break;
-			}
-		}
-	}
-
-	for (i = 0; i < elems.ext_supp_rates_len; i++) {
-		int rate = (elems.ext_supp_rates[i] & 0x7f) * 5;
-		bool is_basic = !!(elems.ext_supp_rates[i] & 0x80);
-
-		if (rate > 110)
-			have_higher_than_11mbit = true;
-
-		for (j = 0; j < sband->n_bitrates; j++) {
-			if (sband->bitrates[j].bitrate == rate) {
-				rates |= BIT(j);
-				if (is_basic)
-					basic_rates |= BIT(j);
-				if (rate < min_rate) {
-					min_rate = rate;
-					min_rate_index = j;
-				}
-				break;
-			}
-		}
-	}
+	ieee80211_get_rates(sband, elems.ext_supp_rates,
+			    elems.ext_supp_rates_len, &rates, &basic_rates,
+			    &have_higher_than_11mbit,
+			    &min_rate, &min_rate_index);
 
 	/*
 	 * some buggy APs don't advertise basic_rates. use the lowest
-- 
cgit v1.2.3


From c2e889a7f7947bc346e0a341e793fd5cb471d884 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Wed, 2 Nov 2011 23:34:56 +0200
Subject: ieee80211: Define cipher suite selector for WPI-SMS4

This value is used for WPI-SMS4 in ISO/IEC JTC 1 N 9880.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 9789aedb2453..ffc073ab3ff8 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1555,6 +1555,8 @@ enum ieee80211_sa_query_action {
 #define WLAN_CIPHER_SUITE_WEP104	0x000FAC05
 #define WLAN_CIPHER_SUITE_AES_CMAC	0x000FAC06
 
+#define WLAN_CIPHER_SUITE_SMS4		0x00147201
+
 /* AKM suite selectors */
 #define WLAN_AKM_SUITE_8021X		0x000FAC01
 #define WLAN_AKM_SUITE_PSK		0x000FAC02
-- 
cgit v1.2.3


From 6e3e939f3b1bf8534b32ad09ff199d88800835a0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 9 Nov 2011 10:15:42 +0100
Subject: net: add wireless TX status socket option

The 802.1X EAPOL handshake hostapd does requires
knowing whether the frame was ack'ed by the peer.
Currently, we fudge this pretty badly by not even
transmitting the frame as a normal data frame but
injecting it with radiotap and getting the status
out of radiotap monitor as well. This is rather
complex, confuses users (mon.wlan0 presence) and
doesn't work with all hardware.

To get rid of that hack, introduce a real wifi TX
status option for data frame transmissions.

This works similar to the existing TX timestamping
in that it reflects the SKB back to the socket's
error queue with a SCM_WIFI_STATUS cmsg that has
an int indicating ACK status (0/1).

Since it is possible that at some point we will
want to have TX timestamping and wifi status in a
single errqueue SKB (there's little point in not
doing that), redefine SO_EE_ORIGIN_TIMESTAMPING
to SO_EE_ORIGIN_TXSTATUS which can collect more
than just the timestamp; keep the old constant
as an alias of course. Currently the internal APIs
don't make that possible, but it wouldn't be hard
to split them up in a way that makes it possible.

Thanks to Neil Horman for helping me figure out
the functions that add the control messages.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/alpha/include/asm/socket.h   |  3 +++
 arch/arm/include/asm/socket.h     |  3 +++
 arch/avr32/include/asm/socket.h   |  3 +++
 arch/cris/include/asm/socket.h    |  3 +++
 arch/frv/include/asm/socket.h     |  3 +++
 arch/h8300/include/asm/socket.h   |  3 +++
 arch/ia64/include/asm/socket.h    |  3 +++
 arch/m32r/include/asm/socket.h    |  3 +++
 arch/m68k/include/asm/socket.h    |  3 +++
 arch/mips/include/asm/socket.h    |  3 +++
 arch/mn10300/include/asm/socket.h |  3 +++
 arch/parisc/include/asm/socket.h  |  3 +++
 arch/powerpc/include/asm/socket.h |  3 +++
 arch/s390/include/asm/socket.h    |  3 +++
 arch/sparc/include/asm/socket.h   |  3 +++
 arch/xtensa/include/asm/socket.h  |  3 +++
 include/asm-generic/socket.h      |  3 +++
 include/linux/errqueue.h          |  3 ++-
 include/linux/skbuff.h            | 19 +++++++++++++++++--
 include/net/sock.h                |  6 ++++++
 net/core/skbuff.c                 | 20 ++++++++++++++++++++
 net/core/sock.c                   |  9 +++++++++
 net/socket.c                      | 18 ++++++++++++++++++
 23 files changed, 123 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 06edfefc3373..082355f159e6 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -69,6 +69,9 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 90ffd04b8e74..dec6f9afb3cf 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index c8d1fae49476..247b88c760be 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index 1a4a61909ca8..e269264df7c4 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -64,6 +64,9 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h
index a6b26880c1ec..ce80fdadcce5 100644
--- a/arch/frv/include/asm/socket.h
+++ b/arch/frv/include/asm/socket.h
@@ -62,5 +62,8 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index 04c0f4596eb5..cf1daab6f27e 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index 51427eaa51ba..4b03664e3fb5 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -71,4 +71,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index 469787c30098..e8b8c5bb053c 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index 9bf49c87d954..d4708ce466e0 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index 9de5190f2487..ad5c0a7a02a7 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -82,6 +82,9 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #ifdef __KERNEL__
 
 /** sock_type - Socket types
diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h
index 4e60c4281288..876356d78522 100644
--- a/arch/mn10300/include/asm/socket.h
+++ b/arch/mn10300/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index 225b7d6a1a0a..d28c51b61067 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -61,6 +61,9 @@
 
 #define SO_RXQ_OVFL             0x4021
 
+#define SO_WIFI_STATUS		0x4022
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 866f7606da68..2fc2af8fbf59 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -69,4 +69,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index fdff1e995c73..67b5c1b14b51 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -70,4 +70,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index 9d3fefcff2f5..8af1b64168b3 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -58,6 +58,9 @@
 
 #define SO_RXQ_OVFL             0x0024
 
+#define SO_WIFI_STATUS		0x0025
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index cbdf2ffaacff..bb06968be227 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -73,4 +73,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 9a6115e7cf63..49c1704173e7 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -64,4 +64,7 @@
 #define SO_DOMAIN		39
 
 #define SO_RXQ_OVFL             40
+
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS	SO_WIFI_STATUS
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index 034072cea853..c9f522bd17e4 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -17,7 +17,8 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_LOCAL	1
 #define SO_EE_ORIGIN_ICMP	2
 #define SO_EE_ORIGIN_ICMP6	3
-#define SO_EE_ORIGIN_TIMESTAMPING 4
+#define SO_EE_ORIGIN_TXSTATUS	4
+#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6a6b352326d7..ff7e1306a2d2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -218,6 +218,9 @@ enum {
 
 	/* device driver supports TX zero-copy buffers */
 	SKBTX_DEV_ZEROCOPY = 1 << 4,
+
+	/* generate wifi status information (where possible) */
+	SKBTX_WIFI_STATUS = 1 << 5,
 };
 
 /*
@@ -352,6 +355,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
  *		ports.
+ *	@wifi_acked_valid: wifi_acked was set
+ *	@wifi_acked: whether frame was acked on wifi or not
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
  *	@secmark: security marking
@@ -445,10 +450,11 @@ struct sk_buff {
 #endif
 	__u8			ooo_okay:1;
 	__u8			l4_rxhash:1;
+	__u8			wifi_acked_valid:1;
+	__u8			wifi_acked:1;
+	/* 10/12 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
-	/* 0/13 bit hole */
-
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
 #endif
@@ -2263,6 +2269,15 @@ static inline void skb_tx_timestamp(struct sk_buff *skb)
 	sw_tx_timestamp(skb);
 }
 
+/**
+ * skb_complete_wifi_ack - deliver skb with wifi status
+ *
+ * @skb: the original outgoing packet
+ * @acked: ack status
+ *
+ */
+void skb_complete_wifi_ack(struct sk_buff *skb, bool acked);
+
 extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 5ac682f73d63..fa6f5381c5d6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -564,6 +564,7 @@ enum sock_flags {
 	SOCK_FASYNC, /* fasync() active */
 	SOCK_RXQ_OVFL,
 	SOCK_ZEROCOPY, /* buffers from userspace */
+	SOCK_WIFI_STATUS, /* push wifi status to userspace */
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -1714,6 +1715,8 @@ static inline int sock_intr_errno(long timeo)
 
 extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	struct sk_buff *skb);
+extern void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb);
 
 static __inline__ void
 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
@@ -1741,6 +1744,9 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 		__sock_recv_timestamp(msg, sk, skb);
 	else
 		sk->sk_stamp = kt;
+
+	if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
+		__sock_recv_wifi_status(msg, sk, skb);
 }
 
 extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ca4db40e75b8..2f6babd5a570 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3168,6 +3168,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 }
 EXPORT_SYMBOL_GPL(skb_tstamp_tx);
 
+void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
+{
+	struct sock *sk = skb->sk;
+	struct sock_exterr_skb *serr;
+	int err;
+
+	skb->wifi_acked_valid = 1;
+	skb->wifi_acked = acked;
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
+
+	err = sock_queue_err_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
+
 
 /**
  * skb_partial_csum_set - set up and verify partial csum values for packet
diff --git a/net/core/sock.c b/net/core/sock.c
index 4ed7b1d12f5e..cbdf51c0d5ac 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -740,6 +740,11 @@ set_rcvbuf:
 	case SO_RXQ_OVFL:
 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
 		break;
+
+	case SO_WIFI_STATUS:
+		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -961,6 +966,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
 		break;
 
+	case SO_WIFI_STATUS:
+		v.val = !!sock_flag(sk, SOCK_WIFI_STATUS);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/socket.c b/net/socket.c
index 2877647f347b..425ef4270460 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -538,6 +538,8 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 		*tx_flags |= SKBTX_HW_TSTAMP;
 	if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
 		*tx_flags |= SKBTX_SW_TSTAMP;
+	if (sock_flag(sk, SOCK_WIFI_STATUS))
+		*tx_flags |= SKBTX_WIFI_STATUS;
 	return 0;
 }
 EXPORT_SYMBOL(sock_tx_timestamp);
@@ -674,6 +676,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
+void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb)
+{
+	int ack;
+
+	if (!sock_flag(sk, SOCK_WIFI_STATUS))
+		return;
+	if (!skb->wifi_acked_valid)
+		return;
+
+	ack = skb->wifi_acked;
+
+	put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
+}
+EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
+
 static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
 				   struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From 6cc00d545a21ed26696f3bda865ebf11eccbf2b5 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@cozybit.com>
Date: Thu, 3 Nov 2011 21:11:11 -0700
Subject: mac80211: QoS multicast frames have No Ack policy

Previously QoS multicast frames had the Normal Acknowledgment QoS
control bits set. This would cause broadcast frames to be discarded by
peers with which we have a BA session, since their sequence number would
fall outside the allowed range. Set No Ack QoS control bits on multicast
QoS frames and filter these in de-aggregation code.

Signed-off-by: Thomas Pedersen <thomas@cozybit.com>

v2: Use proper QoS Ack Policy ctl field mask (Christian)

v3: Clean up conditional (Johannes)
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 1 +
 net/mac80211/rx.c         | 9 ++++++++-
 net/mac80211/wme.c        | 3 ++-
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ffc073ab3ff8..66cedf6eb5c2 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -128,6 +128,7 @@
 #define IEEE80211_QOS_CTL_ACK_POLICY_NOACK	0x0020
 #define IEEE80211_QOS_CTL_ACK_POLICY_NO_EXPL	0x0040
 #define IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK	0x0060
+#define IEEE80211_QOS_CTL_ACK_POLICY_MASK	0x0060
 /* A-MSDU 802.11n */
 #define IEEE80211_QOS_CTL_A_MSDU_PRESENT	0x0080
 /* Mesh Control 802.11s */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3173dcfc2136..72c1eb4eb451 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -747,7 +747,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx)
 	struct sta_info *sta = rx->sta;
 	struct tid_ampdu_rx *tid_agg_rx;
 	u16 sc;
-	int tid;
+	u8 tid, ack_policy;
 
 	if (!ieee80211_is_data_qos(hdr->frame_control))
 		goto dont_reorder;
@@ -760,6 +760,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx)
 	if (!sta)
 		goto dont_reorder;
 
+	ack_policy = *ieee80211_get_qos_ctl(hdr) &
+		     IEEE80211_QOS_CTL_ACK_POLICY_MASK;
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
 	tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
@@ -770,6 +772,11 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx)
 	if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC)))
 		goto dont_reorder;
 
+	/* not part of a BA session */
+	if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
+	    ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
+		goto dont_reorder;
+
 	/* new, potentially un-ordered, ampdu frame - process it */
 
 	/* reset session timer */
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index d0240bba45f3..d4f789a4e4f1 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -150,7 +150,8 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata,
 		/* preserve EOSP bit */
 		ack_policy = *p & IEEE80211_QOS_CTL_EOSP;
 
-		if (unlikely(sdata->local->wifi_wme_noack_test))
+		if (unlikely(sdata->local->wifi_wme_noack_test) ||
+		    is_multicast_ether_addr(hdr->addr1))
 			ack_policy |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK;
 		/* qos header is 2 bytes */
 		*p++ = ack_policy | tid;
-- 
cgit v1.2.3


From 28946da763e8b8d8ffd01ab861b684a4afb4bc3b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:12 +0100
Subject: nl80211: allow subscribing to unexpected class3 frames

To implement AP mode without monitor interfaces we
need to be able to send a deauth to stations that
send frames without being associated. Enable this
by adding a new nl80211 event for such frames that
an application can subscribe to.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 12 +++++++++
 include/net/cfg80211.h  | 17 +++++++++++++
 net/wireless/mlme.c     | 16 ++++++++++++
 net/wireless/nl80211.c  | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  3 +++
 5 files changed, 114 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 8049bf77d799..9107adc73e0b 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -509,6 +509,16 @@
  * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup).
  * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame.
  *
+ * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP
+ *	(or GO) interface (i.e. hostapd) to ask for unexpected frames to
+ *	implement sending deauth to stations that send unexpected class 3
+ *	frames. Also used as the event sent by the kernel when such a frame
+ *	is received.
+ *	For the event, the %NL80211_ATTR_MAC attribute carries the TA and
+ *	other attributes like the interface index are present.
+ *	If used as the command it must have an interface index and you can
+ *	only unsubscribe from the event by closing the socket.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -638,6 +648,8 @@ enum nl80211_commands {
 	NL80211_CMD_TDLS_OPER,
 	NL80211_CMD_TDLS_MGMT,
 
+	NL80211_CMD_UNEXPECTED_FRAME,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0c71d4a30cd6..ef118e452589 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2183,6 +2183,8 @@ struct wireless_dev {
 
 	int beacon_interval;
 
+	u32 ap_unexpected_nlpid;
+
 #ifdef CONFIG_CFG80211_WEXT
 	/* wext data */
 	struct {
@@ -3193,6 +3195,21 @@ void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
 void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
 				     const u8 *bssid, bool preauth, gfp_t gfp);
 
+/**
+ * cfg80211_rx_spurious_frame - inform userspace about a spurious frame
+ * @dev: The device the frame matched to
+ * @addr: the transmitter address
+ * @gfp: context flags
+ *
+ * This function is used in AP mode (only!) to inform userspace that
+ * a spurious class 3 frame was received, to be able to deauth the
+ * sender.
+ * Returns %true if the frame was passed to userspace (or this failed
+ * for a reason other than not having a subscription.)
+ */
+bool cfg80211_rx_spurious_frame(struct net_device *dev,
+				const u8 *addr, gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 21fc9702f81c..f4d868b1e11c 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -879,6 +879,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
 	}
 
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
+
+	if (nlpid == wdev->ap_unexpected_nlpid)
+		wdev->ap_unexpected_nlpid = 0;
 }
 
 void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
@@ -1107,3 +1110,16 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
 	nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
 }
 EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
+
+bool cfg80211_rx_spurious_frame(struct net_device *dev,
+				const u8 *addr, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_GO))
+		return false;
+
+	return nl80211_unexpected_frame(dev, addr, gfp);
+}
+EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2bcaa579cebf..9910c3cb9a85 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5832,6 +5832,23 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int nl80211_register_unexpected_frame(struct sk_buff *skb,
+					     struct genl_info *info)
+{
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (wdev->iftype != NL80211_IFTYPE_AP &&
+	    wdev->iftype != NL80211_IFTYPE_P2P_GO)
+		return -EINVAL;
+
+	if (wdev->ap_unexpected_nlpid)
+		return -EBUSY;
+
+	wdev->ap_unexpected_nlpid = info->snd_pid;
+	return 0;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -6387,6 +6404,14 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_UNEXPECTED_FRAME,
+		.doit = nl80211_register_unexpected_frame,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -7171,6 +7196,47 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+	u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid);
+
+	if (!nlpid)
+		return false;
+
+	msg = nlmsg_new(100, gfp);
+	if (!msg)
+		return true;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_UNEXPECTED_FRAME);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return true;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr);
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0) {
+		nlmsg_free(msg);
+		return true;
+	}
+
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+	return true;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+	return true;
+}
+
 int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 		      struct net_device *netdev, u32 nlpid,
 		      int freq, const u8 *buf, size_t len, gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index f24a1fbeaf19..d94456e54f4e 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -117,4 +117,7 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 				    struct net_device *netdev, int index,
 				    const u8 *bssid, bool preauth, gfp_t gfp);
 
+bool nl80211_unexpected_frame(struct net_device *dev,
+			      const u8 *addr, gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3


From 562a74803f4881772ba2375ec4e5aa0ad90f4caa Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Nov 2011 12:39:33 +0100
Subject: nl80211: advertise device AP SME

Add the ability to advertise that the device
contains the AP SME and what features it can
support. There are currently no features in
the bitmap -- probe response offload will be
advertised by a few patches Arik is working
on now (who took over from Guy Eilam) and a
device with AP SME will typically implement
and require response offload.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath6kl/init.c |  3 ++-
 include/linux/nl80211.h                | 15 +++++++++++++++
 include/net/cfg80211.h                 |  6 ++++++
 net/wireless/core.c                    |  4 ++++
 net/wireless/nl80211.c                 |  4 ++++
 5 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath6kl/init.c b/drivers/net/wireless/ath/ath6kl/init.c
index c1d2366704b5..81e0031012ca 100644
--- a/drivers/net/wireless/ath/ath6kl/init.c
+++ b/drivers/net/wireless/ath/ath6kl/init.c
@@ -1548,7 +1548,8 @@ static int ath6kl_init(struct net_device *dev)
 	ar->conf_flags = ATH6KL_CONF_IGNORE_ERP_BARKER |
 			 ATH6KL_CONF_ENABLE_11N | ATH6KL_CONF_ENABLE_TX_BURST;
 
-	ar->wdev->wiphy->flags |= WIPHY_FLAG_SUPPORTS_FW_ROAM;
+	ar->wdev->wiphy->flags |= WIPHY_FLAG_SUPPORTS_FW_ROAM |
+				  WIPHY_FLAG_HAVE_AP_SME;
 
 	status = ath6kl_target_config_wlan_params(ar);
 	if (!status)
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 9107adc73e0b..ff39e4b234d4 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1121,6 +1121,11 @@ enum nl80211_commands {
  *	%NL80211_CMD_TDLS_MGMT. Otherwise %NL80211_CMD_TDLS_OPER should be
  *	used for asking the driver to perform a TDLS operation.
  *
+ * @NL80211_ATTR_DEVICE_AP_SME: This u32 attribute may be listed for devices
+ *	that have AP support to indicate that they have the AP SME integrated
+ *	with support for the features listed in this attribute, see
+ *	&enum nl80211_ap_sme_features.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1349,6 +1354,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_TDLS_SUPPORT,
 	NL80211_ATTR_TDLS_EXTERNAL_SETUP,
 
+	NL80211_ATTR_DEVICE_AP_SME,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2662,4 +2669,12 @@ enum nl80211_tdls_operation {
 	NL80211_TDLS_DISABLE_LINK,
 };
 
+/*
+ * enum nl80211_ap_sme_features - device-integrated AP features
+ * Reserved for future use, no bits are defined in
+ * NL80211_ATTR_DEVICE_AP_SME yet.
+enum nl80211_ap_sme_features {
+};
+ */
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ef118e452589..86d207da6cce 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1679,6 +1679,7 @@ struct cfg80211_ops {
  *	teardown packets should be sent through the @NL80211_CMD_TDLS_MGMT
  *	command. When this flag is not set, @NL80211_CMD_TDLS_OPER should be
  *	used for asking the driver/firmware to perform a TDLS operation.
+ * @WIPHY_FLAG_HAVE_AP_SME: device integrates AP SME
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1697,6 +1698,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_AP_UAPSD			= BIT(14),
 	WIPHY_FLAG_SUPPORTS_TDLS		= BIT(15),
 	WIPHY_FLAG_TDLS_EXTERNAL_SETUP		= BIT(16),
+	WIPHY_FLAG_HAVE_AP_SME			= BIT(17),
 };
 
 /**
@@ -1907,6 +1909,8 @@ struct wiphy_wowlan_support {
  *	may request, if implemented.
  *
  * @wowlan: WoWLAN support information
+ *
+ * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1930,6 +1934,8 @@ struct wiphy {
 
 	u32 flags;
 
+	u32 ap_sme_capa;
+
 	enum cfg80211_signal_type signal_type;
 
 	int bss_priv_size;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 220f3bd176f8..ccdfed897651 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -492,6 +492,10 @@ int wiphy_register(struct wiphy *wiphy)
 		    !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY)))
 		return -EINVAL;
 
+	if (WARN_ON(wiphy->ap_sme_capa &&
+		    !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME)))
+		return -EINVAL;
+
 	if (WARN_ON(wiphy->addresses && !wiphy->n_addresses))
 		return -EINVAL;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9910c3cb9a85..2094c8468d78 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1007,6 +1007,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (nl80211_put_iface_combinations(&dev->wiphy, msg))
 		goto nla_put_failure;
 
+	if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME)
+		NLA_PUT_U32(msg, NL80211_ATTR_DEVICE_AP_SME,
+			    dev->wiphy.ap_sme_capa);
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-- 
cgit v1.2.3


From 7f6cf311a594c1e7ca8120367dd1d4c685aabff1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:15 +0100
Subject: nl80211: add API to probe a client

When the AP SME in hostapd is used it wants to
probe the clients when they have been idle for
some time. Add explicit API to support this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  10 +++++
 include/net/cfg80211.h  |  17 ++++++++
 net/wireless/nl80211.c  | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index ff39e4b234d4..901a70d327d1 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -519,6 +519,14 @@
  *	If used as the command it must have an interface index and you can
  *	only unsubscribe from the event by closing the socket.
  *
+ * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface
+ *	by sending a null data frame to it and reporting when the frame is
+ *	acknowleged. This is used to allow timing out inactive clients. Uses
+ *	%NL80211_ATTR_IFINDEX and %NL80211_ATTR_MAC. The command returns a
+ *	direct reply with an %NL80211_ATTR_COOKIE that is later used to match
+ *	up the event with the request. The event includes the same data and
+ *	has %NL80211_ATTR_ACK set if the frame was ACKed.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -650,6 +658,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_UNEXPECTED_FRAME,
 
+	NL80211_CMD_PROBE_CLIENT,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 86d207da6cce..389e85e8c03d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1432,6 +1432,9 @@ struct cfg80211_gtk_rekey_data {
  *
  * @tdls_mgmt: Transmit a TDLS management frame.
  * @tdls_oper: Perform a high-level TDLS operation (e.g. TDLS link setup).
+ *
+ * @probe_client: probe an associated client, must return a cookie that it
+ *	later passes to cfg80211_probe_status().
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -1621,6 +1624,9 @@ struct cfg80211_ops {
 			     u16 status_code, const u8 *buf, size_t len);
 	int	(*tdls_oper)(struct wiphy *wiphy, struct net_device *dev,
 			     u8 *peer, enum nl80211_tdls_operation oper);
+
+	int	(*probe_client)(struct wiphy *wiphy, struct net_device *dev,
+				const u8 *peer, u64 *cookie);
 };
 
 /*
@@ -3216,6 +3222,17 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
 bool cfg80211_rx_spurious_frame(struct net_device *dev,
 				const u8 *addr, gfp_t gfp);
 
+/**
+ * cfg80211_probe_status - notify userspace about probe status
+ * @dev: the device the probe was sent on
+ * @addr: the address of the peer
+ * @cookie: the cookie filled in @probe_client previously
+ * @acked: indicates whether probe was acked or not
+ * @gfp: allocation flags
+ */
+void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
+			   u64 cookie, bool acked, gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2094c8468d78..a8eda12b46a8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -890,6 +890,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	}
 	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
 		CMD(sched_scan_start, START_SCHED_SCAN);
+	CMD(probe_client, PROBE_CLIENT);
 
 #undef CMD
 
@@ -5853,6 +5854,59 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb,
 	return 0;
 }
 
+static int nl80211_probe_client(struct sk_buff *skb,
+				struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct sk_buff *msg;
+	void *hdr;
+	const u8 *addr;
+	u64 cookie;
+	int err;
+
+	if (wdev->iftype != NL80211_IFTYPE_AP &&
+	    wdev->iftype != NL80211_IFTYPE_P2P_GO)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	if (!rdev->ops->probe_client)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_PROBE_CLIENT);
+
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto free_msg;
+	}
+
+	addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	err = rdev->ops->probe_client(&rdev->wiphy, dev, addr, &cookie);
+	if (err)
+		goto free_msg;
+
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_reply(msg, info);
+
+ nla_put_failure:
+	err = -ENOBUFS;
+ free_msg:
+	nlmsg_free(msg);
+	return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -6416,6 +6470,14 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_PROBE_CLIENT,
+		.doit = nl80211_probe_client,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -7478,6 +7540,48 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
+			   u64 cookie, bool acked, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PROBE_CLIENT);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr);
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+	if (acked)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_ACK);
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_probe_status);
+
 static int nl80211_netlink_notify(struct notifier_block * nb,
 				  unsigned long state,
 				  void *_notify)
-- 
cgit v1.2.3


From 5e760230e42cf759bd923457ca2753aacf2e656e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:17 +0100
Subject: cfg80211: allow registering to beacons

Add the ability to register to received beacon frames
to allow implementing OLBC logic in userspace. The
registration is per wiphy since there's no point in
receiving the same frame multiple times.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  7 +++++
 include/net/cfg80211.h  | 20 ++++++++++++++
 net/wireless/core.h     |  2 ++
 net/wireless/nl80211.c  | 70 ++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 98 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 901a70d327d1..c29a284c27e6 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -527,6 +527,11 @@
  *	up the event with the request. The event includes the same data and
  *	has %NL80211_ATTR_ACK set if the frame was ACKed.
  *
+ * @NL80211_CMD_REGISTER_BEACONS: Register this socket to receive beacons from
+ *	other BSSes when any interfaces are in AP mode. This helps implement
+ *	OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME
+ *	messages. Note that per PHY only one application may register.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -660,6 +665,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_PROBE_CLIENT,
 
+	NL80211_CMD_REGISTER_BEACONS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 389e85e8c03d..d01307f54faa 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1686,6 +1686,9 @@ struct cfg80211_ops {
  *	command. When this flag is not set, @NL80211_CMD_TDLS_OPER should be
  *	used for asking the driver/firmware to perform a TDLS operation.
  * @WIPHY_FLAG_HAVE_AP_SME: device integrates AP SME
+ * @WIPHY_FLAG_REPORTS_OBSS: the device will report beacons from other BSSes
+ *	when there are virtual interfaces in AP mode by calling
+ *	cfg80211_report_obss_beacon().
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1705,6 +1708,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_SUPPORTS_TDLS		= BIT(15),
 	WIPHY_FLAG_TDLS_EXTERNAL_SETUP		= BIT(16),
 	WIPHY_FLAG_HAVE_AP_SME			= BIT(17),
+	WIPHY_FLAG_REPORTS_OBSS			= BIT(18),
 };
 
 /**
@@ -3233,6 +3237,22 @@ bool cfg80211_rx_spurious_frame(struct net_device *dev,
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 			   u64 cookie, bool acked, gfp_t gfp);
 
+/**
+ * cfg80211_report_obss_beacon - report beacon from other APs
+ * @wiphy: The wiphy that received the beacon
+ * @frame: the frame
+ * @len: length of the frame
+ * @freq: frequency the frame was received on
+ * @gfp: allocation flags
+ *
+ * Use this function to report to userspace when a beacon was
+ * received. It is not useful to call this when there is no
+ * netdev that is in AP/GO mode.
+ */
+void cfg80211_report_obss_beacon(struct wiphy *wiphy,
+				 const u8 *frame, size_t len,
+				 int freq, gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b9ec3061ed72..4c6ff4024356 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -54,6 +54,8 @@ struct cfg80211_registered_device {
 	int opencount; /* also protected by devlist_mtx */
 	wait_queue_head_t dev_wait;
 
+	u32 ap_beacons_nlpid;
+
 	/* BSSes/scanning */
 	spinlock_t bss_lock;
 	struct list_head bss_list;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a8eda12b46a8..68b6708b996f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -891,6 +891,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
 		CMD(sched_scan_start, START_SCHED_SCAN);
 	CMD(probe_client, PROBE_CLIENT);
+	if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
+		i++;
+		NLA_PUT_U32(msg, i, NL80211_CMD_REGISTER_BEACONS);
+	}
 
 #undef CMD
 
@@ -5907,6 +5911,21 @@ static int nl80211_probe_client(struct sk_buff *skb,
 	return err;
 }
 
+static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS))
+		return -EOPNOTSUPP;
+
+	if (rdev->ap_beacons_nlpid)
+		return -EBUSY;
+
+	rdev->ap_beacons_nlpid = info->snd_pid;
+
+	return 0;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -6478,6 +6497,14 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_REGISTER_BEACONS,
+		.doit = nl80211_register_beacons,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WIPHY |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -7582,6 +7609,44 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 }
 EXPORT_SYMBOL(cfg80211_probe_status);
 
+void cfg80211_report_obss_beacon(struct wiphy *wiphy,
+				 const u8 *frame, size_t len,
+				 int freq, gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid);
+
+	if (!nlpid)
+		return;
+
+	msg = nlmsg_new(len + 100, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	if (freq)
+		NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, freq);
+	NLA_PUT(msg, NL80211_ATTR_FRAME, len, frame);
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_report_obss_beacon);
+
 static int nl80211_netlink_notify(struct notifier_block * nb,
 				  unsigned long state,
 				  void *_notify)
@@ -7595,9 +7660,12 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	rcu_read_lock();
 
-	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
+	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
 		list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
 			cfg80211_mlme_unregister_socket(wdev, notify->pid);
+		if (rdev->ap_beacons_nlpid == notify->pid)
+			rdev->ap_beacons_nlpid = 0;
+	}
 
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From b92ab5d86dafc2b3733c5fdd5def40c8fe7ea7c9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:19 +0100
Subject: cfg80211: add event for unexpected 4addr frames

The frames are used by AP/STA WDS mode, and hostapd
needs to know when such a frame was received to set
up the VLAN appropriately to allow using it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 10 +++++++++-
 include/net/cfg80211.h  | 16 ++++++++++++++++
 net/wireless/mlme.c     | 14 ++++++++++++++
 net/wireless/nl80211.c  | 19 +++++++++++++++++--
 net/wireless/nl80211.h  |  2 ++
 5 files changed, 58 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index c29a284c27e6..09474ab7de8c 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -517,7 +517,13 @@
  *	For the event, the %NL80211_ATTR_MAC attribute carries the TA and
  *	other attributes like the interface index are present.
  *	If used as the command it must have an interface index and you can
- *	only unsubscribe from the event by closing the socket.
+ *	only unsubscribe from the event by closing the socket. Subscription
+ *	is also for %NL80211_CMD_UNEXPECTED_4ADDR_FRAME events.
+ *
+ * @NL80211_CMD_UNEXPECTED_4ADDR_FRAME: Sent as an event indicating that the
+ *	associated station identified by %NL80211_ATTR_MAC sent a 4addr frame
+ *	and wasn't already in a 4-addr VLAN. The event will be sent similarly
+ *	to the %NL80211_CMD_UNEXPECTED_FRAME event, to the same listener.
  *
  * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface
  *	by sending a null data frame to it and reporting when the frame is
@@ -667,6 +673,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_REGISTER_BEACONS,
 
+	NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d01307f54faa..be3535f0895e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3226,6 +3226,22 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
 bool cfg80211_rx_spurious_frame(struct net_device *dev,
 				const u8 *addr, gfp_t gfp);
 
+/**
+ * cfg80211_rx_unexpected_4addr_frame - inform about unexpected WDS frame
+ * @dev: The device the frame matched to
+ * @addr: the transmitter address
+ * @gfp: context flags
+ *
+ * This function is used in AP mode (only!) to inform userspace that
+ * an associated station sent a 4addr frame but that wasn't expected.
+ * It is allowed and desirable to send this event only once for each
+ * station to avoid event flooding.
+ * Returns %true if the frame was passed to userspace (or this failed
+ * for a reason other than not having a subscription.)
+ */
+bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
+					const u8 *addr, gfp_t gfp);
+
 /**
  * cfg80211_probe_status - notify userspace about probe status
  * @dev: the device the probe was sent on
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index f4d868b1e11c..34891e08c54a 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1123,3 +1123,17 @@ bool cfg80211_rx_spurious_frame(struct net_device *dev,
 	return nl80211_unexpected_frame(dev, addr, gfp);
 }
 EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
+
+bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
+					const u8 *addr, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_GO &&
+		    wdev->iftype != NL80211_IFTYPE_AP_VLAN))
+		return false;
+
+	return nl80211_unexpected_4addr_frame(dev, addr, gfp);
+}
+EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 68b6708b996f..5b659068b020 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7289,7 +7289,8 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
-bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
+static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
+				       const u8 *addr, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
@@ -7305,7 +7306,7 @@ bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
 	if (!msg)
 		return true;
 
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_UNEXPECTED_FRAME);
+	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
 	if (!hdr) {
 		nlmsg_free(msg);
 		return true;
@@ -7330,6 +7331,20 @@ bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
 	return true;
 }
 
+bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
+{
+	return __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME,
+					  addr, gfp);
+}
+
+bool nl80211_unexpected_4addr_frame(struct net_device *dev,
+				    const u8 *addr, gfp_t gfp)
+{
+	return __nl80211_unexpected_frame(dev,
+					  NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
+					  addr, gfp);
+}
+
 int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 		      struct net_device *netdev, u32 nlpid,
 		      int freq, const u8 *buf, size_t len, gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index d94456e54f4e..12bf4d185abe 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -119,5 +119,7 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 
 bool nl80211_unexpected_frame(struct net_device *dev,
 			      const u8 *addr, gfp_t gfp);
+bool nl80211_unexpected_4addr_frame(struct net_device *dev,
+				    const u8 *addr, gfp_t gfp);
 
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3


From e247bd9068e3e86c3571147c128883596ace9d05 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:21 +0100
Subject: cfg80211/mac80211: allow management TX to not wait for ACK

For probe responses it can be useful to not wait for ACK to
avoid retransmissions if the station that sent the probe is
already on the next channel, so allow userspace to request
not caring about the ACK with a new nl80211 flag.

Since mac80211 needs to be updated for the new function
prototype anyway implement it right away -- it's just a
few lines of code.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c |  3 ++-
 include/linux/nl80211.h                    |  7 ++++++
 include/net/cfg80211.h                     |  2 +-
 net/mac80211/cfg.c                         | 11 ++++++---
 net/wireless/core.h                        |  2 +-
 net/wireless/mlme.c                        |  5 ++--
 net/wireless/nl80211.c                     | 39 ++++++++++++++++++------------
 7 files changed, 46 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 3aff36bad5d3..daf444bf8d48 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1732,7 +1732,8 @@ static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 			  struct ieee80211_channel *chan, bool offchan,
 			  enum nl80211_channel_type channel_type,
 			  bool channel_type_valid, unsigned int wait,
-			  const u8 *buf, size_t len, bool no_cck, u64 *cookie)
+			  const u8 *buf, size_t len, bool no_cck,
+			  bool dont_wait_for_ack, u64 *cookie)
 {
 	struct ath6kl *ar = ath6kl_priv(dev);
 	u32 id;
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 09474ab7de8c..165e16fc7af1 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1151,6 +1151,11 @@ enum nl80211_commands {
  *	with support for the features listed in this attribute, see
  *	&enum nl80211_ap_sme_features.
  *
+ * @NL80211_ATTR_DONT_WAIT_FOR_ACK: Used with %NL80211_CMD_FRAME, this tells
+ *	the driver to not wait for an acknowledgement. Note that due to this,
+ *	it will also not give a status callback nor return a cookie. This is
+ *	mostly useful for probe responses to save airtime.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1381,6 +1386,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_DEVICE_AP_SME,
 
+	NL80211_ATTR_DONT_WAIT_FOR_ACK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index be3535f0895e..00287bdef919 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1588,7 +1588,7 @@ struct cfg80211_ops {
 			  enum nl80211_channel_type channel_type,
 			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, bool no_cck,
-			  u64 *cookie);
+			  bool dont_wait_for_ack, u64 *cookie);
 	int	(*mgmt_tx_cancel_wait)(struct wiphy *wiphy,
 				       struct net_device *dev,
 				       u64 cookie);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e072fea69a30..ab3258ac0b2c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1936,7 +1936,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 			     enum nl80211_channel_type channel_type,
 			     bool channel_type_valid, unsigned int wait,
 			     const u8 *buf, size_t len, bool no_cck,
-			     u64 *cookie)
+			     bool dont_wait_for_ack, u64 *cookie)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -1944,10 +1944,15 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 	struct sta_info *sta;
 	struct ieee80211_work *wk;
 	const struct ieee80211_mgmt *mgmt = (void *)buf;
-	u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
-		    IEEE80211_TX_CTL_REQ_TX_STATUS;
+	u32 flags;
 	bool is_offchan = false;
 
+	if (dont_wait_for_ack)
+		flags = IEEE80211_TX_CTL_NO_ACK;
+	else
+		flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
+			IEEE80211_TX_CTL_REQ_TX_STATUS;
+
 	/* Check that we are on the requested channel for transmission */
 	if (chan != local->tmp_channel &&
 	    chan != local->oper_channel)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 4c6ff4024356..1c7d4df5418c 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -378,7 +378,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  enum nl80211_channel_type channel_type,
 			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, bool no_cck,
-			  u64 *cookie);
+			  bool dont_wait_for_ack, u64 *cookie);
 
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 34891e08c54a..6c1bafd508c8 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -904,7 +904,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  enum nl80211_channel_type channel_type,
 			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, bool no_cck,
-			  u64 *cookie)
+			  bool dont_wait_for_ack, u64 *cookie)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	const struct ieee80211_mgmt *mgmt;
@@ -995,7 +995,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 	/* Transmit the Action frame as requested by user space */
 	return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan,
 				  channel_type, channel_type_valid,
-				  wait, buf, len, no_cck, cookie);
+				  wait, buf, len, no_cck, dont_wait_for_ack,
+				  cookie);
 }
 
 bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5b659068b020..0ef09415c89a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -196,6 +196,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_TDLS_OPERATION] = { .type = NLA_U8 },
 	[NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG },
+	[NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -5282,10 +5283,11 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	int err;
 	void *hdr;
 	u64 cookie;
-	struct sk_buff *msg;
+	struct sk_buff *msg = NULL;
 	unsigned int wait = 0;
-	bool offchan;
-	bool no_cck;
+	bool offchan, no_cck, dont_wait_for_ack;
+
+	dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK];
 
 	if (!info->attrs[NL80211_ATTR_FRAME] ||
 	    !info->attrs[NL80211_ATTR_WIPHY_FREQ])
@@ -5329,29 +5331,36 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	if (chan == NULL)
 		return -EINVAL;
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
+	if (!dont_wait_for_ack) {
+		msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (!msg)
+			return -ENOMEM;
 
-	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
-			     NL80211_CMD_FRAME);
+		hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+				     NL80211_CMD_FRAME);
 
-	if (IS_ERR(hdr)) {
-		err = PTR_ERR(hdr);
-		goto free_msg;
+		if (IS_ERR(hdr)) {
+			err = PTR_ERR(hdr);
+			goto free_msg;
+		}
 	}
+
 	err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type,
 				    channel_type_valid, wait,
 				    nla_data(info->attrs[NL80211_ATTR_FRAME]),
 				    nla_len(info->attrs[NL80211_ATTR_FRAME]),
-				    no_cck, &cookie);
+				    no_cck, dont_wait_for_ack, &cookie);
 	if (err)
 		goto free_msg;
 
-	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+	if (msg) {
+		NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
 
-	genlmsg_end(msg, hdr);
-	return genlmsg_reply(msg, info);
+		genlmsg_end(msg, hdr);
+		return genlmsg_reply(msg, info);
+	}
+
+	return 0;
 
  nla_put_failure:
 	err = -ENOBUFS;
-- 
cgit v1.2.3


From 1f074bd8eb7a4a210a5119cd7220f89da6c7a2c3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 6 Nov 2011 14:13:33 +0100
Subject: nl80211: advertise socket TX status capability

The new wifi socket TX capability should be
supported by wifi drivers, let them advertise
whether they do or not.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 18 ++++++++++++++++++
 include/net/cfg80211.h  |  3 ++-
 net/wireless/nl80211.c  |  2 ++
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 165e16fc7af1..3152ddfb4294 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -695,6 +695,8 @@ enum nl80211_commands {
 #define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE
 #define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT
 
+#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
+
 /* source-level API compatibility */
 #define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG
 #define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG
@@ -1156,6 +1158,9 @@ enum nl80211_commands {
  *	it will also not give a status callback nor return a cookie. This is
  *	mostly useful for probe responses to save airtime.
  *
+ * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from
+ *	&enum nl80211_feature_flags and is advertised in wiphy information.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1388,6 +1393,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_DONT_WAIT_FOR_ACK,
 
+	NL80211_ATTR_FEATURE_FLAGS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1422,6 +1429,7 @@ enum nl80211_attrs {
 #define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES
 #define NL80211_ATTR_KEY NL80211_ATTR_KEY
 #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
+#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
@@ -2709,4 +2717,14 @@ enum nl80211_ap_sme_features {
 };
  */
 
+/**
+ * enum nl80211_feature_flags - device/driver features
+ * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back
+ *	TX status to the socket error queue when requested with the
+ *	socket option.
+ */
+enum nl80211_feature_flags {
+	NL80211_FEATURE_SK_TX_STATUS	= 1 << 0,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 00287bdef919..e1ee1416631d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1881,6 +1881,7 @@ struct wiphy_wowlan_support {
  * @software_iftypes: bitmask of software interface types, these are not
  *	subject to any restrictions since they are purely managed in SW.
  * @flags: wiphy flags, see &enum wiphy_flags
+ * @features: features advertised to nl80211, see &enum nl80211_feature_flags.
  * @bss_priv_size: each BSS struct has private data allocated with it,
  *	this variable determines its size
  * @max_scan_ssids: maximum number of SSIDs the device can scan for in
@@ -1942,7 +1943,7 @@ struct wiphy {
 	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
 	u16 interface_modes;
 
-	u32 flags;
+	u32 flags, features;
 
 	u32 ap_sme_capa;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0ef09415c89a..864fcb6f217e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1017,6 +1017,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		NLA_PUT_U32(msg, NL80211_ATTR_DEVICE_AP_SME,
 			    dev->wiphy.ap_sme_capa);
 
+	NLA_PUT_U32(msg, NL80211_ATTR_FEATURE_FLAGS, dev->wiphy.features);
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-- 
cgit v1.2.3


From 5009065d38c95455bd2d27c2838313e3dd0c5bc7 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Thu, 10 Nov 2011 11:32:25 +0200
Subject: iommu/core: stop converting bytes to page order back and forth

Express sizes in bytes rather than in page order, to eliminate the
size->order->size conversions we have whenever the IOMMU API is calling
the low level drivers' map/unmap methods.

Adopt all existing drivers.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu.c   | 13 +++++--------
 drivers/iommu/intel-iommu.c | 11 ++++-------
 drivers/iommu/iommu.c       |  8 +++++---
 drivers/iommu/msm_iommu.c   | 19 +++++++------------
 drivers/iommu/omap-iommu.c  | 14 +++++---------
 include/linux/iommu.h       |  6 +++---
 6 files changed, 29 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4ee277a8521a..a3b7072e86e2 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2702,9 +2702,8 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 }
 
 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
-			 phys_addr_t paddr, int gfp_order, int iommu_prot)
+			 phys_addr_t paddr, size_t page_size, int iommu_prot)
 {
-	unsigned long page_size = 0x1000UL << gfp_order;
 	struct protection_domain *domain = dom->priv;
 	int prot = 0;
 	int ret;
@@ -2721,13 +2720,11 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 	return ret;
 }
 
-static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
-			   int gfp_order)
+static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
+			   size_t page_size)
 {
 	struct protection_domain *domain = dom->priv;
-	unsigned long page_size, unmap_size;
-
-	page_size  = 0x1000UL << gfp_order;
+	size_t unmap_size;
 
 	mutex_lock(&domain->api_lock);
 	unmap_size = iommu_unmap_page(domain, iova, page_size);
@@ -2735,7 +2732,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 
 	domain_flush_tlb_pde(domain);
 
-	return get_order(unmap_size);
+	return unmap_size;
 }
 
 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c0c7820d4c46..2a165010a1c1 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3979,12 +3979,11 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
 
 static int intel_iommu_map(struct iommu_domain *domain,
 			   unsigned long iova, phys_addr_t hpa,
-			   int gfp_order, int iommu_prot)
+			   size_t size, int iommu_prot)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
 	u64 max_addr;
 	int prot = 0;
-	size_t size;
 	int ret;
 
 	if (iommu_prot & IOMMU_READ)
@@ -3994,7 +3993,6 @@ static int intel_iommu_map(struct iommu_domain *domain,
 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
 		prot |= DMA_PTE_SNP;
 
-	size     = PAGE_SIZE << gfp_order;
 	max_addr = iova + size;
 	if (dmar_domain->max_addr < max_addr) {
 		u64 end;
@@ -4017,11 +4015,10 @@ static int intel_iommu_map(struct iommu_domain *domain,
 	return ret;
 }
 
-static int intel_iommu_unmap(struct iommu_domain *domain,
-			     unsigned long iova, int gfp_order)
+static size_t intel_iommu_unmap(struct iommu_domain *domain,
+			     unsigned long iova, size_t size)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
-	size_t size = PAGE_SIZE << gfp_order;
 	int order;
 
 	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
@@ -4030,7 +4027,7 @@ static int intel_iommu_unmap(struct iommu_domain *domain,
 	if (dmar_domain->max_addr == iova + size)
 		dmar_domain->max_addr = iova;
 
-	return order;
+	return PAGE_SIZE << order;
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2fb2963df553..7a2953d8f12e 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -168,13 +168,13 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 
 	BUG_ON(!IS_ALIGNED(iova | paddr, size));
 
-	return domain->ops->map(domain, iova, paddr, gfp_order, prot);
+	return domain->ops->map(domain, iova, paddr, size, prot);
 }
 EXPORT_SYMBOL_GPL(iommu_map);
 
 int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
 {
-	size_t size;
+	size_t size, unmapped;
 
 	if (unlikely(domain->ops->unmap == NULL))
 		return -ENODEV;
@@ -183,6 +183,8 @@ int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
 
 	BUG_ON(!IS_ALIGNED(iova, size));
 
-	return domain->ops->unmap(domain, iova, gfp_order);
+	unmapped = domain->ops->unmap(domain, iova, size);
+
+	return get_order(unmapped);
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 5865dd2e28f9..13718d958da8 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -352,7 +352,7 @@ fail:
 }
 
 static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
-			 phys_addr_t pa, int order, int prot)
+			 phys_addr_t pa, size_t len, int prot)
 {
 	struct msm_priv *priv;
 	unsigned long flags;
@@ -363,7 +363,6 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
 	unsigned long *sl_pte;
 	unsigned long sl_offset;
 	unsigned int pgprot;
-	size_t len = 0x1000UL << order;
 	int ret = 0, tex, sh;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -463,8 +462,8 @@ fail:
 	return ret;
 }
 
-static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
-			    int order)
+static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
+			    size_t len)
 {
 	struct msm_priv *priv;
 	unsigned long flags;
@@ -474,7 +473,6 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
 	unsigned long *sl_table;
 	unsigned long *sl_pte;
 	unsigned long sl_offset;
-	size_t len = 0x1000UL << order;
 	int i, ret = 0;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -544,15 +542,12 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
 
 	ret = __flush_iotlb(domain);
 
-	/*
-	 * the IOMMU API requires us to return the order of the unmapped
-	 * page (on success).
-	 */
-	if (!ret)
-		ret = order;
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
-	return ret;
+
+	/* the IOMMU API requires us to return how many bytes were unmapped */
+	len = ret ? 0 : len;
+	return len;
 }
 
 static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 8f32b2bf7587..ad80b1d0d099 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1019,12 +1019,11 @@ static void iopte_cachep_ctor(void *iopte)
 }
 
 static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
-			 phys_addr_t pa, int order, int prot)
+			 phys_addr_t pa, size_t bytes, int prot)
 {
 	struct omap_iommu_domain *omap_domain = domain->priv;
 	struct omap_iommu *oiommu = omap_domain->iommu_dev;
 	struct device *dev = oiommu->dev;
-	size_t bytes = PAGE_SIZE << order;
 	struct iotlb_entry e;
 	int omap_pgsz;
 	u32 ret, flags;
@@ -1049,19 +1048,16 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
 	return ret;
 }
 
-static int omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
-			    int order)
+static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
+			    size_t size)
 {
 	struct omap_iommu_domain *omap_domain = domain->priv;
 	struct omap_iommu *oiommu = omap_domain->iommu_dev;
 	struct device *dev = oiommu->dev;
-	size_t unmap_size;
 
-	dev_dbg(dev, "unmapping da 0x%lx order %d\n", da, order);
+	dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
 
-	unmap_size = iopgtable_clear_entry(oiommu, da);
-
-	return unmap_size ? get_order(unmap_size) : -EINVAL;
+	return iopgtable_clear_entry(oiommu, da);
 }
 
 static int
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 432acc4c054d..d5ebf3f4dd53 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -54,9 +54,9 @@ struct iommu_ops {
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
 	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
 	int (*map)(struct iommu_domain *domain, unsigned long iova,
-		   phys_addr_t paddr, int gfp_order, int prot);
-	int (*unmap)(struct iommu_domain *domain, unsigned long iova,
-		     int gfp_order);
+		   phys_addr_t paddr, size_t size, int prot);
+	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
+		     size_t size);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
 				    unsigned long iova);
 	int (*domain_has_cap)(struct iommu_domain *domain,
-- 
cgit v1.2.3


From 7d3002cc8c160dbda0e6ab9cd66dc6eb401b8b70 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Thu, 10 Nov 2011 11:32:26 +0200
Subject: iommu/core: split mapping to page sizes as supported by the hardware

When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.

The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.

This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.

Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.

register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.

Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.

Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/iommu.c      | 131 ++++++++++++++++++++++++++++++++++++++++-----
 drivers/iommu/omap-iovmm.c |  17 +++---
 include/linux/iommu.h      |  20 +++++--
 virt/kvm/iommu.c           |   8 +--
 4 files changed, 144 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 7a2953d8f12e..b278458d5816 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -16,6 +16,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#define pr_fmt(fmt)    "%s: " fmt, __func__
+
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
@@ -47,6 +49,16 @@ int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops)
 	if (bus->iommu_ops != NULL)
 		return -EBUSY;
 
+	/*
+	 * Set the default pgsize values, which retain the existing
+	 * IOMMU API behavior: drivers will be called to map
+	 * regions that are sized/aligned to order of 4KiB pages.
+	 *
+	 * This will be removed once all drivers are migrated.
+	 */
+	if (!ops->pgsize_bitmap)
+		ops->pgsize_bitmap = ~0xFFFUL;
+
 	bus->iommu_ops = ops;
 
 	/* Do IOMMU specific setup for this bus-type */
@@ -157,34 +169,125 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
 EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
 
 int iommu_map(struct iommu_domain *domain, unsigned long iova,
-	      phys_addr_t paddr, int gfp_order, int prot)
+	      phys_addr_t paddr, size_t size, int prot)
 {
-	size_t size;
+	unsigned long orig_iova = iova;
+	unsigned int min_pagesz;
+	size_t orig_size = size;
+	int ret = 0;
 
 	if (unlikely(domain->ops->map == NULL))
 		return -ENODEV;
 
-	size         = PAGE_SIZE << gfp_order;
+	/* find out the minimum page size supported */
+	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+
+	/*
+	 * both the virtual address and the physical one, as well as
+	 * the size of the mapping, must be aligned (at least) to the
+	 * size of the smallest page supported by the hardware
+	 */
+	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
+			"0x%x\n", iova, (unsigned long)paddr,
+			(unsigned long)size, min_pagesz);
+		return -EINVAL;
+	}
+
+	pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
+				(unsigned long)paddr, (unsigned long)size);
+
+	while (size) {
+		unsigned long pgsize, addr_merge = iova | paddr;
+		unsigned int pgsize_idx;
+
+		/* Max page size that still fits into 'size' */
+		pgsize_idx = __fls(size);
+
+		/* need to consider alignment requirements ? */
+		if (likely(addr_merge)) {
+			/* Max page size allowed by both iova and paddr */
+			unsigned int align_pgsize_idx = __ffs(addr_merge);
+
+			pgsize_idx = min(pgsize_idx, align_pgsize_idx);
+		}
+
+		/* build a mask of acceptable page sizes */
+		pgsize = (1UL << (pgsize_idx + 1)) - 1;
+
+		/* throw away page sizes not supported by the hardware */
+		pgsize &= domain->ops->pgsize_bitmap;
 
-	BUG_ON(!IS_ALIGNED(iova | paddr, size));
+		/* make sure we're still sane */
+		BUG_ON(!pgsize);
 
-	return domain->ops->map(domain, iova, paddr, size, prot);
+		/* pick the biggest page */
+		pgsize_idx = __fls(pgsize);
+		pgsize = 1UL << pgsize_idx;
+
+		pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
+					(unsigned long)paddr, pgsize);
+
+		ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
+		if (ret)
+			break;
+
+		iova += pgsize;
+		paddr += pgsize;
+		size -= pgsize;
+	}
+
+	/* unroll mapping in case something went wrong */
+	if (ret)
+		iommu_unmap(domain, orig_iova, orig_size - size);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_map);
 
-int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
+size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 {
-	size_t size, unmapped;
+	size_t unmapped_page, unmapped = 0;
+	unsigned int min_pagesz;
 
 	if (unlikely(domain->ops->unmap == NULL))
 		return -ENODEV;
 
-	size         = PAGE_SIZE << gfp_order;
-
-	BUG_ON(!IS_ALIGNED(iova, size));
-
-	unmapped = domain->ops->unmap(domain, iova, size);
-
-	return get_order(unmapped);
+	/* find out the minimum page size supported */
+	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+
+	/*
+	 * The virtual address, as well as the size of the mapping, must be
+	 * aligned (at least) to the size of the smallest page supported
+	 * by the hardware
+	 */
+	if (!IS_ALIGNED(iova | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
+					iova, (unsigned long)size, min_pagesz);
+		return -EINVAL;
+	}
+
+	pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
+							(unsigned long)size);
+
+	/*
+	 * Keep iterating until we either unmap 'size' bytes (or more)
+	 * or we hit an area that isn't mapped.
+	 */
+	while (unmapped < size) {
+		size_t left = size - unmapped;
+
+		unmapped_page = domain->ops->unmap(domain, iova, left);
+		if (!unmapped_page)
+			break;
+
+		pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
+					(unsigned long)unmapped_page);
+
+		iova += unmapped_page;
+		unmapped += unmapped_page;
+	}
+
+	return unmapped;
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
index e8fdb8830f69..0b7b14cb030b 100644
--- a/drivers/iommu/omap-iovmm.c
+++ b/drivers/iommu/omap-iovmm.c
@@ -409,7 +409,6 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
 	unsigned int i, j;
 	struct scatterlist *sg;
 	u32 da = new->da_start;
-	int order;
 
 	if (!domain || !sgt)
 		return -EINVAL;
@@ -428,12 +427,10 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
 		if (bytes_to_iopgsz(bytes) < 0)
 			goto err_out;
 
-		order = get_order(bytes);
-
 		pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
 			 i, da, pa, bytes);
 
-		err = iommu_map(domain, da, pa, order, flags);
+		err = iommu_map(domain, da, pa, bytes, flags);
 		if (err)
 			goto err_out;
 
@@ -448,10 +445,9 @@ err_out:
 		size_t bytes;
 
 		bytes = sg->length + sg->offset;
-		order = get_order(bytes);
 
 		/* ignore failures.. we're already handling one */
-		iommu_unmap(domain, da, order);
+		iommu_unmap(domain, da, bytes);
 
 		da += bytes;
 	}
@@ -466,7 +462,8 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
 	size_t total = area->da_end - area->da_start;
 	const struct sg_table *sgt = area->sgt;
 	struct scatterlist *sg;
-	int i, err;
+	int i;
+	size_t unmapped;
 
 	BUG_ON(!sgtable_ok(sgt));
 	BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
@@ -474,13 +471,11 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
 	start = area->da_start;
 	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
 		size_t bytes;
-		int order;
 
 		bytes = sg->length + sg->offset;
-		order = get_order(bytes);
 
-		err = iommu_unmap(domain, start, order);
-		if (err < 0)
+		unmapped = iommu_unmap(domain, start, bytes);
+		if (unmapped < bytes)
 			break;
 
 		dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d5ebf3f4dd53..cc26f89c4ee6 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -48,6 +48,19 @@ struct iommu_domain {
 
 #ifdef CONFIG_IOMMU_API
 
+/**
+ * struct iommu_ops - iommu ops and capabilities
+ * @domain_init: init iommu domain
+ * @domain_destroy: destroy iommu domain
+ * @attach_dev: attach device to an iommu domain
+ * @detach_dev: detach device from an iommu domain
+ * @map: map a physically contiguous memory region to an iommu domain
+ * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @iova_to_phys: translate iova to physical address
+ * @domain_has_cap: domain capabilities query
+ * @commit: commit iommu domain
+ * @pgsize_bitmap: bitmap of supported page sizes
+ */
 struct iommu_ops {
 	int (*domain_init)(struct iommu_domain *domain);
 	void (*domain_destroy)(struct iommu_domain *domain);
@@ -61,6 +74,7 @@ struct iommu_ops {
 				    unsigned long iova);
 	int (*domain_has_cap)(struct iommu_domain *domain,
 			      unsigned long cap);
+	unsigned long pgsize_bitmap;
 };
 
 extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops);
@@ -72,9 +86,9 @@ extern int iommu_attach_device(struct iommu_domain *domain,
 extern void iommu_detach_device(struct iommu_domain *domain,
 				struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
-		     phys_addr_t paddr, int gfp_order, int prot);
-extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-		       int gfp_order);
+		     phys_addr_t paddr, size_t size, int prot);
+extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+		       size_t size);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
 				      unsigned long iova);
 extern int iommu_domain_has_cap(struct iommu_domain *domain,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index a195c07fa829..304d7e5717e9 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -113,7 +113,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 
 		/* Map into IO address space */
 		r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
-			      get_order(page_size), flags);
+			      page_size, flags);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%llx\n", pfn);
@@ -292,15 +292,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 
 	while (gfn < end_gfn) {
 		unsigned long unmap_pages;
-		int order;
+		size_t size;
 
 		/* Get physical address */
 		phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
 		pfn  = phys >> PAGE_SHIFT;
 
 		/* Unmap address from IO address space */
-		order       = iommu_unmap(domain, gfn_to_gpa(gfn), 0);
-		unmap_pages = 1ULL << order;
+		size       = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
+		unmap_pages = 1ULL << get_order(size);
 
 		/* Unpin all pages we just unmapped to not leak any memory */
 		kvm_unpin_pages(kvm, pfn, unmap_pages);
-- 
cgit v1.2.3


From d65670a78cdbfae94f20a9e05ec705871d7cdf2b Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 31 Oct 2011 17:06:35 -0400
Subject: clocksource: Avoid selecting mult values that might overflow when
 adjusted

For some frequencies, the clocks_calc_mult_shift() function will
unfortunately select mult values very close to 0xffffffff.  This
has the potential to overflow when NTP adjusts the clock, adding
to the mult value.

This patch adds a clocksource.maxadj value, which provides
an approximation of an 11% adjustment(NTP limits adjustments to
500ppm and the tick adjustment is limited to 10%), which could
be made to the clocksource.mult value. This is then used to both
check that the current mult value won't overflow/underflow, as
well as warning us if the timekeeping_adjust() code pushes over
that 11% boundary.

v2: Fix max_adjustment calculation, and improve WARN_ONCE
messages.

v3: Don't warn before maxadj has actually been set

CC: Yong Zhang <yong.zhang0@gmail.com>
CC: David Daney <ddaney.cavm@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Chen Jie <chenj@lemote.com>
CC: zhangfx <zhangfx@lemote.com>
CC: stable@kernel.org
Reported-by: Chen Jie <chenj@lemote.com>
Reported-by: zhangfx <zhangfx@lemote.com>
Tested-by: Yong Zhang <yong.zhang0@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/clocksource.h |  3 ++-
 kernel/time/clocksource.c   | 58 +++++++++++++++++++++++++++++++++++++--------
 kernel/time/timekeeping.c   |  7 ++++++
 3 files changed, 57 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 139c4db55f17..c86c940d1de3 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -156,6 +156,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @mult:		cycle to nanosecond multiplier
  * @shift:		cycle to nanosecond divisor (power of two)
  * @max_idle_ns:	max idle time permitted by the clocksource (nsecs)
+ * @maxadj		maximum adjustment value to mult (~11%)
  * @flags:		flags describing special properties
  * @archdata:		arch-specific data
  * @suspend:		suspend function for the clocksource, if necessary
@@ -172,7 +173,7 @@ struct clocksource {
 	u32 mult;
 	u32 shift;
 	u64 max_idle_ns;
-
+	u32 maxadj;
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
 	struct arch_clocksource_data archdata;
 #endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index cf52fda2e096..cfc65e1eb9fb 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -491,6 +491,22 @@ void clocksource_touch_watchdog(void)
 	clocksource_resume_watchdog();
 }
 
+/**
+ * clocksource_max_adjustment- Returns max adjustment amount
+ * @cs:         Pointer to clocksource
+ *
+ */
+static u32 clocksource_max_adjustment(struct clocksource *cs)
+{
+	u64 ret;
+	/*
+	 * We won't try to correct for more then 11% adjustments (110,000 ppm),
+	 */
+	ret = (u64)cs->mult * 11;
+	do_div(ret,100);
+	return (u32)ret;
+}
+
 /**
  * clocksource_max_deferment - Returns max time the clocksource can be deferred
  * @cs:         Pointer to clocksource
@@ -503,25 +519,28 @@ static u64 clocksource_max_deferment(struct clocksource *cs)
 	/*
 	 * Calculate the maximum number of cycles that we can pass to the
 	 * cyc2ns function without overflowing a 64-bit signed result. The
-	 * maximum number of cycles is equal to ULLONG_MAX/cs->mult which
-	 * is equivalent to the below.
-	 * max_cycles < (2^63)/cs->mult
-	 * max_cycles < 2^(log2((2^63)/cs->mult))
-	 * max_cycles < 2^(log2(2^63) - log2(cs->mult))
-	 * max_cycles < 2^(63 - log2(cs->mult))
-	 * max_cycles < 1 << (63 - log2(cs->mult))
+	 * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj)
+	 * which is equivalent to the below.
+	 * max_cycles < (2^63)/(cs->mult + cs->maxadj)
+	 * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj)))
+	 * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj))
+	 * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj))
+	 * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj))
 	 * Please note that we add 1 to the result of the log2 to account for
 	 * any rounding errors, ensure the above inequality is satisfied and
 	 * no overflow will occur.
 	 */
-	max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
+	max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1));
 
 	/*
 	 * The actual maximum number of cycles we can defer the clocksource is
 	 * determined by the minimum of max_cycles and cs->mask.
+	 * Note: Here we subtract the maxadj to make sure we don't sleep for
+	 * too long if there's a large negative adjustment.
 	 */
 	max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
-	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
+	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj,
+					cs->shift);
 
 	/*
 	 * To ensure that the clocksource does not wrap whilst we are idle,
@@ -640,7 +659,6 @@ static void clocksource_enqueue(struct clocksource *cs)
 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 	u64 sec;
-
 	/*
 	 * Calc the maximum number of seconds which we can run before
 	 * wrapping around. For clocksources which have a mask > 32bit
@@ -661,6 +679,20 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 
 	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 			       NSEC_PER_SEC / scale, sec * scale);
+
+	/*
+	 * for clocksources that have large mults, to avoid overflow.
+	 * Since mult may be adjusted by ntp, add an safety extra margin
+	 *
+	 */
+	cs->maxadj = clocksource_max_adjustment(cs);
+	while ((cs->mult + cs->maxadj < cs->mult)
+		|| (cs->mult - cs->maxadj > cs->mult)) {
+		cs->mult >>= 1;
+		cs->shift--;
+		cs->maxadj = clocksource_max_adjustment(cs);
+	}
+
 	cs->max_idle_ns = clocksource_max_deferment(cs);
 }
 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
@@ -701,6 +733,12 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale);
  */
 int clocksource_register(struct clocksource *cs)
 {
+	/* calculate max adjustment for given mult/shift */
+	cs->maxadj = clocksource_max_adjustment(cs);
+	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+		"Clocksource %s might overflow on 11%% adjustment\n",
+		cs->name);
+
 	/* calculate max idle time permitted for this clocksource */
 	cs->max_idle_ns = clocksource_max_deferment(cs);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2b021b0e8507..e65ff3171102 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -820,6 +820,13 @@ static void timekeeping_adjust(s64 offset)
 	} else
 		return;
 
+	WARN_ONCE(timekeeper.clock->maxadj &&
+			(timekeeper.mult + adj > timekeeper.clock->mult +
+						timekeeper.clock->maxadj),
+			"Adjusting %s more then 11%% (%ld vs %ld)\n",
+			timekeeper.clock->name, (long)timekeeper.mult + adj,
+			(long)timekeeper.clock->mult +
+				timekeeper.clock->maxadj);
 	timekeeper.mult += adj;
 	timekeeper.xtime_interval += interval;
 	timekeeper.xtime_nsec -= offset;
-- 
cgit v1.2.3


From 39ce61a846c8e1fa00cb57ad5af021542e6e8403 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 11 Nov 2011 12:46:23 -0200
Subject: [media] dvb: Allow select between DVB-C Annex A and Annex C

DVB-C, as defined by ITU-T J.83 has 3 annexes. The differences between
Annex A and Annex C is that Annex C uses a subset of the modulation
types, and uses a different rolloff factor. A different rolloff means
that the bandwidth required is slicely different, and may affect the
saw filter configuration at the tuners. Also, some demods have different
configurations, depending on using Annex A or Annex C.

So, allow userspace to specify it, by changing the rolloff factor.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 4 ++++
 drivers/media/dvb/dvb-core/dvb_frontend.c       | 2 ++
 include/linux/dvb/frontend.h                    | 2 ++
 3 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 3bc8a61efe30..6ac803959a47 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -311,6 +311,8 @@ typedef enum fe_rolloff {
 	ROLLOFF_20,
 	ROLLOFF_25,
 	ROLLOFF_AUTO,
+	ROLLOFF_15, /* DVB-C Annex A */
+	ROLLOFF_13, /* DVB-C Annex C */
 } fe_rolloff_t;
 		</programlisting>
 		</section>
@@ -778,8 +780,10 @@ typedef enum fe_hierarchy {
 			<listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
 			<listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
 			<listitem><para><link linkend="DTV-SYMBOL-RATE"><constant>DTV_SYMBOL_RATE</constant></link></para></listitem>
+			<listitem><para><link linkend="DTV-ROLLOFF"><constant>DTV_ROLLOFF</constant></link></para></listitem>
 			<listitem><para><link linkend="DTV-INNER-FEC"><constant>DTV_INNER_FEC</constant></link></para></listitem>
 		</itemizedlist>
+		<para>The Rolloff of 0.15 (ROLLOFF_15) is assumed, as ITU-T J.83 Annex A is more common. For Annex C, rolloff should be 0.13 (ROLLOFF_13). All other values are invalid.</para>
 	</section>
 	<section id="dvbc-annex-b-params">
 		<title>DVB-C Annex B delivery system</title>
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 2c0acdb4d811..c849455458ea 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -876,6 +876,7 @@ static int dvb_frontend_clear_cache(struct dvb_frontend *fe)
 	c->symbol_rate = QAM_AUTO;
 	c->code_rate_HP = FEC_AUTO;
 	c->code_rate_LP = FEC_AUTO;
+	c->rolloff = ROLLOFF_AUTO;
 
 	c->isdbt_partial_reception = -1;
 	c->isdbt_sb_mode = -1;
@@ -1030,6 +1031,7 @@ static void dtv_property_cache_init(struct dvb_frontend *fe,
 		break;
 	case FE_QAM:
 		c->delivery_system = SYS_DVBC_ANNEX_AC;
+		c->rolloff = ROLLOFF_15; /* implied for Annex A */
 		break;
 	case FE_OFDM:
 		c->delivery_system = SYS_DVBT;
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 1b1094c35e4f..d9251df867b5 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -329,6 +329,8 @@ typedef enum fe_rolloff {
 	ROLLOFF_20,
 	ROLLOFF_25,
 	ROLLOFF_AUTO,
+	ROLLOFF_15,	/* DVB-C Annex A */
+	ROLLOFF_13,	/* DVB-C Annex C */
 } fe_rolloff_t;
 
 typedef enum fe_delivery_system {
-- 
cgit v1.2.3


From 5a6b5e02d673486c96003d4a6e3e2510f4c59f92 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Sat, 3 Sep 2011 01:36:08 +0000
Subject: fbdev: remove display subsystem

This four year old subsystem does not have a single in-tree user
not even in staging and as far as I know also none out-of-tree.
I think that justifies removing it which cleans the config up.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Acked-by: James Simmons <jsimmons@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/video/Kconfig                 |   1 -
 drivers/video/Makefile                |   2 +-
 drivers/video/display/Kconfig         |  24 ----
 drivers/video/display/Makefile        |   6 -
 drivers/video/display/display-sysfs.c | 219 ----------------------------------
 include/linux/display.h               |  61 ----------
 6 files changed, 1 insertion(+), 312 deletions(-)
 delete mode 100644 drivers/video/display/Kconfig
 delete mode 100644 drivers/video/display/Makefile
 delete mode 100644 drivers/video/display/display-sysfs.c
 delete mode 100644 include/linux/display.h

(limited to 'include/linux')

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 8165c5577d71..1b5b98f29482 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -2413,7 +2413,6 @@ source "drivers/video/omap/Kconfig"
 source "drivers/video/omap2/Kconfig"
 
 source "drivers/video/backlight/Kconfig"
-source "drivers/video/display/Kconfig"
 
 if VT
 	source "drivers/video/console/Kconfig"
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 9b9d8fff7732..142606814d98 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -13,7 +13,7 @@ fb-objs                           := $(fb-y)
 
 obj-$(CONFIG_VT)		  += console/
 obj-$(CONFIG_LOGO)		  += logo/
-obj-y				  += backlight/ display/
+obj-y				  += backlight/
 
 obj-$(CONFIG_FB_CFB_FILLRECT)  += cfbfillrect.o
 obj-$(CONFIG_FB_CFB_COPYAREA)  += cfbcopyarea.o
diff --git a/drivers/video/display/Kconfig b/drivers/video/display/Kconfig
deleted file mode 100644
index f99af931d4f8..000000000000
--- a/drivers/video/display/Kconfig
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Display drivers configuration
-#
-
-menu "Display device support"
-
-config DISPLAY_SUPPORT
-	tristate "Display panel/monitor support"
-	---help---
-	  This framework adds support for low-level control of a display.
-	  This includes support for power.
-
-	  Enable this to be able to choose the drivers for controlling the
-	  physical display panel/monitor on some platforms. This not only
-	  covers LCD displays for PDAs but also other types of displays
-	  such as CRT, TVout etc.
-
-	  To have support for your specific display panel you will have to
-	  select the proper drivers which depend on this option.
-
-comment "Display hardware drivers"
-	depends on DISPLAY_SUPPORT
-
-endmenu
diff --git a/drivers/video/display/Makefile b/drivers/video/display/Makefile
deleted file mode 100644
index c0ea832bf171..000000000000
--- a/drivers/video/display/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# Display drivers
-
-display-objs				:= display-sysfs.o
-
-obj-$(CONFIG_DISPLAY_SUPPORT)		+= display.o
-
diff --git a/drivers/video/display/display-sysfs.c b/drivers/video/display/display-sysfs.c
deleted file mode 100644
index 0c647d7af0ee..000000000000
--- a/drivers/video/display/display-sysfs.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- *  display-sysfs.c - Display output driver sysfs interface
- *
- *  Copyright (C) 2007 James Simmons <jsimmons@infradead.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or (at
- *  your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#include <linux/module.h>
-#include <linux/display.h>
-#include <linux/ctype.h>
-#include <linux/idr.h>
-#include <linux/err.h>
-#include <linux/kdev_t.h>
-#include <linux/slab.h>
-
-static ssize_t display_show_name(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	return snprintf(buf, PAGE_SIZE, "%s\n", dsp->name);
-}
-
-static ssize_t display_show_type(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	return snprintf(buf, PAGE_SIZE, "%s\n", dsp->type);
-}
-
-static ssize_t display_show_contrast(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	ssize_t rc = -ENXIO;
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver) && dsp->driver->get_contrast)
-		rc = sprintf(buf, "%d\n", dsp->driver->get_contrast(dsp));
-	mutex_unlock(&dsp->lock);
-	return rc;
-}
-
-static ssize_t display_store_contrast(struct device *dev,
-					struct device_attribute *attr,
-					const char *buf, size_t count)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	ssize_t ret = -EINVAL, size;
-	int contrast;
-	char *endp;
-
-	contrast = simple_strtoul(buf, &endp, 0);
-	size = endp - buf;
-
-	if (isspace(*endp))
-		size++;
-
-	if (size != count)
-		return ret;
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver && dsp->driver->set_contrast)) {
-		pr_debug("display: set contrast to %d\n", contrast);
-		dsp->driver->set_contrast(dsp, contrast);
-		ret = count;
-	}
-	mutex_unlock(&dsp->lock);
-	return ret;
-}
-
-static ssize_t display_show_max_contrast(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	ssize_t rc = -ENXIO;
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver))
-		rc = sprintf(buf, "%d\n", dsp->driver->max_contrast);
-	mutex_unlock(&dsp->lock);
-	return rc;
-}
-
-static struct device_attribute display_attrs[] = {
-	__ATTR(name, S_IRUGO, display_show_name, NULL),
-	__ATTR(type, S_IRUGO, display_show_type, NULL),
-	__ATTR(contrast, S_IRUGO | S_IWUSR, display_show_contrast, display_store_contrast),
-	__ATTR(max_contrast, S_IRUGO, display_show_max_contrast, NULL),
-};
-
-static int display_suspend(struct device *dev, pm_message_t state)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver->suspend))
-		dsp->driver->suspend(dsp, state);
-	mutex_unlock(&dsp->lock);
-	return 0;
-};
-
-static int display_resume(struct device *dev)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver->resume))
-		dsp->driver->resume(dsp);
-	mutex_unlock(&dsp->lock);
-	return 0;
-};
-
-static struct mutex allocated_dsp_lock;
-static DEFINE_IDR(allocated_dsp);
-static struct class *display_class;
-
-struct display_device *display_device_register(struct display_driver *driver,
-						struct device *parent, void *devdata)
-{
-	struct display_device *new_dev = NULL;
-	int ret = -EINVAL;
-
-	if (unlikely(!driver))
-		return ERR_PTR(ret);
-
-	mutex_lock(&allocated_dsp_lock);
-	ret = idr_pre_get(&allocated_dsp, GFP_KERNEL);
-	mutex_unlock(&allocated_dsp_lock);
-	if (!ret)
-		return ERR_PTR(ret);
-
-	new_dev = kzalloc(sizeof(struct display_device), GFP_KERNEL);
-	if (likely(new_dev) && unlikely(driver->probe(new_dev, devdata))) {
-		// Reserve the index for this display
-		mutex_lock(&allocated_dsp_lock);
-		ret = idr_get_new(&allocated_dsp, new_dev, &new_dev->idx);
-		mutex_unlock(&allocated_dsp_lock);
-
-		if (!ret) {
-			new_dev->dev = device_create(display_class, parent,
-						     MKDEV(0, 0), new_dev,
-						     "display%d", new_dev->idx);
-			if (!IS_ERR(new_dev->dev)) {
-				new_dev->parent = parent;
-				new_dev->driver = driver;
-				mutex_init(&new_dev->lock);
-				return new_dev;
-			}
-			mutex_lock(&allocated_dsp_lock);
-			idr_remove(&allocated_dsp, new_dev->idx);
-			mutex_unlock(&allocated_dsp_lock);
-			ret = -EINVAL;
-		}
-	}
-	kfree(new_dev);
-	return ERR_PTR(ret);
-}
-EXPORT_SYMBOL(display_device_register);
-
-void display_device_unregister(struct display_device *ddev)
-{
-	if (!ddev)
-		return;
-	// Free device
-	mutex_lock(&ddev->lock);
-	device_unregister(ddev->dev);
-	mutex_unlock(&ddev->lock);
-	// Mark device index as available
-	mutex_lock(&allocated_dsp_lock);
-	idr_remove(&allocated_dsp, ddev->idx);
-	mutex_unlock(&allocated_dsp_lock);
-	kfree(ddev);
-}
-EXPORT_SYMBOL(display_device_unregister);
-
-static int __init display_class_init(void)
-{
-	display_class = class_create(THIS_MODULE, "display");
-	if (IS_ERR(display_class)) {
-		printk(KERN_ERR "Failed to create display class\n");
-		display_class = NULL;
-		return -EINVAL;
-	}
-	display_class->dev_attrs = display_attrs;
-	display_class->suspend = display_suspend;
-	display_class->resume = display_resume;
-	mutex_init(&allocated_dsp_lock);
-	return 0;
-}
-
-static void __exit display_class_exit(void)
-{
-	class_destroy(display_class);
-}
-
-module_init(display_class_init);
-module_exit(display_class_exit);
-
-MODULE_DESCRIPTION("Display Hardware handling");
-MODULE_AUTHOR("James Simmons <jsimmons@infradead.org>");
-MODULE_LICENSE("GPL");
-
diff --git a/include/linux/display.h b/include/linux/display.h
deleted file mode 100644
index 3bf70d639728..000000000000
--- a/include/linux/display.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright (C) 2006 James Simmons <jsimmons@infradead.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or (at
- *  your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#ifndef _LINUX_DISPLAY_H
-#define _LINUX_DISPLAY_H
-
-#include <linux/device.h>
-
-struct display_device;
-
-/* This structure defines all the properties of a Display. */
-struct display_driver {
-	int  (*set_contrast)(struct display_device *, unsigned int);
-	int  (*get_contrast)(struct display_device *);
-	void (*suspend)(struct display_device *, pm_message_t state);
-	void (*resume)(struct display_device *);
-	int  (*probe)(struct display_device *, void *);
-	int  (*remove)(struct display_device *);
-	int  max_contrast;
-};
-
-struct display_device {
-	struct module *owner;			/* Owner module */
-	struct display_driver *driver;
-	struct device *parent;			/* This is the parent */
-	struct device *dev;			/* This is this display device */
-	struct mutex lock;
-	void *priv_data;
-	char type[16];
-	char *name;
-	int idx;
-};
-
-extern struct display_device *display_device_register(struct display_driver *driver,
-					struct device *dev, void *devdata);
-extern void display_device_unregister(struct display_device *dev);
-
-extern int probe_edid(struct display_device *dev, void *devdata);
-
-#define to_display_device(obj) container_of(obj, struct display_device, class_dev)
-
-#endif
-- 
cgit v1.2.3


From 38eb6863ed21de9beab792f66cd282c21e0dc10b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 20 Oct 2011 13:42:22 +0200
Subject: zorro: Rename Picasso IV Z2 "MEM" to "RAM" for consistency

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
---
 include/linux/zorro_ids.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zorro_ids.h b/include/linux/zorro_ids.h
index 7e749088910d..74bc53bcfdcf 100644
--- a/include/linux/zorro_ids.h
+++ b/include/linux/zorro_ids.h
@@ -360,8 +360,8 @@
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_II_II_PLUS_RAM	ZORRO_ID(VILLAGE_TRONIC, 0x0B, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_II_II_PLUS_REG	ZORRO_ID(VILLAGE_TRONIC, 0x0C, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_II_II_PLUS_SEGMENTED_MODE	ZORRO_ID(VILLAGE_TRONIC, 0x0D, 0)
-#define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_MEM1		ZORRO_ID(VILLAGE_TRONIC, 0x15, 0)
-#define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_MEM2		ZORRO_ID(VILLAGE_TRONIC, 0x16, 0)
+#define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_RAM1		ZORRO_ID(VILLAGE_TRONIC, 0x15, 0)
+#define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_RAM2		ZORRO_ID(VILLAGE_TRONIC, 0x16, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_REG		ZORRO_ID(VILLAGE_TRONIC, 0x17, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z3		ZORRO_ID(VILLAGE_TRONIC, 0x18, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_ARIADNE			ZORRO_ID(VILLAGE_TRONIC, 0xC9, 0)
-- 
cgit v1.2.3


From 87bbbe22f84b91d0bcd3a7fc638e4f5e8224cc4e Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Thu, 10 Nov 2011 11:28:55 +0200
Subject: nl80211: Add probe response offload attribute

Notify user-space about probe-response offloading support in the driver.

A wiphy flag is used to indicate support and a bitmap of protocols
determines which protocols are supported.

Signed-off-by: Guy Eilam <guy@wizery.com>
Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 28 ++++++++++++++++++++++++++++
 include/net/cfg80211.h  | 10 ++++++++++
 net/wireless/nl80211.c  |  4 ++++
 3 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 3152ddfb4294..be92333cf8fe 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1160,6 +1160,11 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from
  *	&enum nl80211_feature_flags and is advertised in wiphy information.
+ * @NL80211_ATTR_PROBE_RESP_OFFLOAD: Indicates that the HW responds to probe
+ *
+ *	requests while operating in AP-mode.
+ *	This attribute holds a bitmap of the supported protocols for
+ *	offloading (see &enum nl80211_probe_resp_offload_support_attr).
  *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -1395,6 +1400,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_FEATURE_FLAGS,
 
+	NL80211_ATTR_PROBE_RESP_OFFLOAD,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2727,4 +2734,25 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS	= 1 << 0,
 };
 
+/**
+ * enum nl80211_probe_resp_offload_support_attr - optional supported
+ *	protocols for probe-response offloading by the driver/FW.
+ *	To be used with the %NL80211_ATTR_PROBE_RESP_OFFLOAD attribute.
+ *	Each enum value represents a bit in the bitmap of supported
+ *	protocols. Typically a subset of probe-requests belonging to a
+ *	supported protocol will be excluded from offload and uploaded
+ *	to the host.
+ *
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS: Support for WPS ver. 1
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2: Support for WPS ver. 2
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P: Support for P2P
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U: Support for 802.11u
+ */
+enum nl80211_probe_resp_offload_support_attr {
+	NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS =	1<<0,
+	NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 =	1<<1,
+	NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P =	1<<2,
+	NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U =	1<<3,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 50e3608f5656..093f538f65d6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1694,6 +1694,8 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_REPORTS_OBSS: the device will report beacons from other BSSes
  *	when there are virtual interfaces in AP mode by calling
  *	cfg80211_report_obss_beacon().
+ * @WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD: When operating as an AP, the device
+ *	responds to probe-requests in hardware.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1714,6 +1716,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_TDLS_EXTERNAL_SETUP		= BIT(16),
 	WIPHY_FLAG_HAVE_AP_SME			= BIT(17),
 	WIPHY_FLAG_REPORTS_OBSS			= BIT(18),
+	WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD	= BIT(19),
 };
 
 /**
@@ -1982,6 +1985,13 @@ struct wiphy {
 	u32 available_antennas_tx;
 	u32 available_antennas_rx;
 
+	/*
+	 * Bitmap of supported protocols for probe response offloading
+	 * see &enum nl80211_probe_resp_offload_support_attr. Only valid
+	 * when the wiphy flag @WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD is set.
+	 */
+	u32 probe_resp_offload;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 258fb881c8e3..f395a06c114a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -759,6 +759,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
 		    dev->wiphy.available_antennas_rx);
 
+	if (dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD)
+		NLA_PUT_U32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
+			    dev->wiphy.probe_resp_offload);
+
 	if ((dev->wiphy.available_antennas_tx ||
 	     dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) {
 		u32 tx_ant = 0, rx_ant = 0;
-- 
cgit v1.2.3


From 00f740e1a3b7abb51980371ee8fa113df22ae0b8 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Thu, 10 Nov 2011 11:28:56 +0200
Subject: nl80211: Pass probe response data to drivers

Pass probe-response data from usermode via beacon parameters.

Signed-off-by: Guy Eilam <guy@wizery.com>
Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 6 ++++++
 include/net/cfg80211.h  | 4 ++++
 net/wireless/nl80211.c  | 9 +++++++++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index be92333cf8fe..f9261c253735 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1166,6 +1166,10 @@ enum nl80211_commands {
  *	This attribute holds a bitmap of the supported protocols for
  *	offloading (see &enum nl80211_probe_resp_offload_support_attr).
  *
+ * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire
+ *	probe-response frame. The DA field in the 802.11 header is zero-ed out,
+ *	to be filled by the FW.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1402,6 +1406,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PROBE_RESP_OFFLOAD,
 
+	NL80211_ATTR_PROBE_RESP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 093f538f65d6..8d7ba0961d3e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -391,6 +391,8 @@ struct cfg80211_crypto_settings {
  * @assocresp_ies: extra information element(s) to add into (Re)Association
  *	Response frames or %NULL
  * @assocresp_ies_len: length of assocresp_ies in octets
+ * @probe_resp_len: length of probe response template (@probe_resp)
+ * @probe_resp: probe response template (AP mode only)
  */
 struct beacon_parameters {
 	u8 *head, *tail;
@@ -408,6 +410,8 @@ struct beacon_parameters {
 	size_t proberesp_ies_len;
 	const u8 *assocresp_ies;
 	size_t assocresp_ies_len;
+	int probe_resp_len;
+	u8 *probe_resp;
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f395a06c114a..6bc7c4b32fa5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -197,6 +197,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG },
 	[NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG },
+	[NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY,
+				      .len = IEEE80211_MAX_DATA_LEN },
 };
 
 /* policy for the key attributes */
@@ -2171,6 +2173,13 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
 			nla_len(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]);
 	}
 
+	if (info->attrs[NL80211_ATTR_PROBE_RESP]) {
+		params.probe_resp =
+			nla_data(info->attrs[NL80211_ATTR_PROBE_RESP]);
+		params.probe_resp_len =
+			nla_len(info->attrs[NL80211_ATTR_PROBE_RESP]);
+	}
+
 	err = call(&rdev->wiphy, dev, &params);
 	if (!err && params.interval)
 		wdev->beacon_interval = params.interval;
-- 
cgit v1.2.3


From 3d249d4ca7d0ed6629a135ea1ea21c72286c0d80 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 11 Nov 2011 22:16:48 +0000
Subject: net: introduce ethernet teaming device

This patch introduces new network device called team. It supposes to be
very fast, simple, userspace-driven alternative to existing bonding
driver.

Userspace library called libteam with couple of demo apps is available
here:
https://github.com/jpirko/libteam
Note it's still in its dipers atm.

team<->libteam use generic netlink for communication. That and rtnl
suppose to be the only way to configure team device, no sysfs etc.

Python binding of libteam was recently introduced.
Daemon providing arpmon/miimon active-backup functionality will be
introduced shortly. All what's necessary is already implemented in
kernel team driver.

v7->v8:
	- check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling
	  them.
	- use dev_kfree_skb_any() instead of dev_kfree_skb()

v6->v7:
	- transmit and receive functions are not checked in hot paths.
	  That also resolves memory leak on transmit when no port is
	  present

v5->v6:
	- changed couple of _rcu calls to non _rcu ones in non-readers

v4->v5:
	- team_change_mtu() uses team->lock while travesing though port
	  list
	- mac address changes are moved completely to jurisdiction of
	  userspace daemon. This way the daemon can do FOM1, FOM2 and
	  possibly other weird things with mac addresses.
	  Only round-robin mode sets up all ports to bond's address then
	  enslaved.
	- Extended Kconfig text

v3->v4:
	- remove redundant synchronize_rcu from __team_change_mode()
	- revert "set and clear of mode_ops happens per pointer, not per
	  byte"
	- extend comment of function __team_change_mode()

v2->v3:
	- team_change_mtu() uses rcu version of list traversal to unwind
	- set and clear of mode_ops happens per pointer, not per byte
	- port hashlist changed to be embedded into team structure
	- error branch in team_port_enter() does cleanup now
	- fixed rtln->rtnl

v1->v2:
	- modes are made as modules. Makes team more modular and
	  extendable.
	- several commenters' nitpicks found on v1 were fixed
	- several other bugs were fixed.
	- note I ignored Eric's comment about roundrobin port selector
	  as Eric's way may be easily implemented as another mode (mode
	  "random") in future.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/team.txt         |    2 +
 MAINTAINERS                               |    7 +
 drivers/net/Kconfig                       |    2 +
 drivers/net/Makefile                      |    1 +
 drivers/net/team/Kconfig                  |   43 +
 drivers/net/team/Makefile                 |    7 +
 drivers/net/team/team.c                   | 1583 +++++++++++++++++++++++++++++
 drivers/net/team/team_mode_activebackup.c |  137 +++
 drivers/net/team/team_mode_roundrobin.c   |  107 ++
 include/linux/Kbuild                      |    1 +
 include/linux/if.h                        |    1 +
 include/linux/if_team.h                   |  242 +++++
 12 files changed, 2133 insertions(+)
 create mode 100644 Documentation/networking/team.txt
 create mode 100644 drivers/net/team/Kconfig
 create mode 100644 drivers/net/team/Makefile
 create mode 100644 drivers/net/team/team.c
 create mode 100644 drivers/net/team/team_mode_activebackup.c
 create mode 100644 drivers/net/team/team_mode_roundrobin.c
 create mode 100644 include/linux/if_team.h

(limited to 'include/linux')

diff --git a/Documentation/networking/team.txt b/Documentation/networking/team.txt
new file mode 100644
index 000000000000..5a013686b9ea
--- /dev/null
+++ b/Documentation/networking/team.txt
@@ -0,0 +1,2 @@
+Team devices are driven from userspace via libteam library which is here:
+	https://github.com/jpirko/libteam
diff --git a/MAINTAINERS b/MAINTAINERS
index 4808256446f2..8d941692c394 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6484,6 +6484,13 @@ W:	http://tcp-lp-mod.sourceforge.net/
 S:	Maintained
 F:	net/ipv4/tcp_lp.c
 
+TEAM DRIVER
+M:	Jiri Pirko <jpirko@redhat.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/team/
+F:	include/linux/if_team.h
+
 TEGRA SUPPORT
 M:	Colin Cross <ccross@android.com>
 M:	Olof Johansson <olof@lixom.net>
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 583f66cd5bbd..b3020bea39e4 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -125,6 +125,8 @@ config IFB
 	  'ifb1' etc.
 	  Look at the iproute2 documentation directory for usage etc
 
+source "drivers/net/team/Kconfig"
+
 config MACVLAN
 	tristate "MAC-VLAN support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index fa877cd2b139..4e4ebfe1aa53 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_NET) += Space.o loopback.o
 obj-$(CONFIG_NETCONSOLE) += netconsole.o
 obj-$(CONFIG_PHYLIB) += phy/
 obj-$(CONFIG_RIONET) += rionet.o
+obj-$(CONFIG_NET_TEAM) += team/
 obj-$(CONFIG_TUN) += tun.o
 obj-$(CONFIG_VETH) += veth.o
 obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
diff --git a/drivers/net/team/Kconfig b/drivers/net/team/Kconfig
new file mode 100644
index 000000000000..248a144033ca
--- /dev/null
+++ b/drivers/net/team/Kconfig
@@ -0,0 +1,43 @@
+menuconfig NET_TEAM
+	tristate "Ethernet team driver support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  This allows one to create virtual interfaces that teams together
+	  multiple ethernet devices.
+
+	  Team devices can be added using the "ip" command from the
+	  iproute2 package:
+
+	  "ip link add link [ address MAC ] [ NAME ] type team"
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called team.
+
+if NET_TEAM
+
+config NET_TEAM_MODE_ROUNDROBIN
+	tristate "Round-robin mode support"
+	depends on NET_TEAM
+	---help---
+	  Basic mode where port used for transmitting packets is selected in
+	  round-robin fashion using packet counter.
+
+	  All added ports are setup to have bond's mac address.
+
+	  To compile this team mode as a module, choose M here: the module
+	  will be called team_mode_roundrobin.
+
+config NET_TEAM_MODE_ACTIVEBACKUP
+	tristate "Active-backup mode support"
+	depends on NET_TEAM
+	---help---
+	  Only one port is active at a time and the rest of ports are used
+	  for backup.
+
+	  Mac addresses of ports are not modified. Userspace is responsible
+	  to do so.
+
+	  To compile this team mode as a module, choose M here: the module
+	  will be called team_mode_activebackup.
+
+endif # NET_TEAM
diff --git a/drivers/net/team/Makefile b/drivers/net/team/Makefile
new file mode 100644
index 000000000000..85f2028a87af
--- /dev/null
+++ b/drivers/net/team/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the network team driver
+#
+
+obj-$(CONFIG_NET_TEAM) += team.o
+obj-$(CONFIG_NET_TEAM_MODE_ROUNDROBIN) += team_mode_roundrobin.o
+obj-$(CONFIG_NET_TEAM_MODE_ACTIVEBACKUP) += team_mode_activebackup.o
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
new file mode 100644
index 000000000000..60672bb09960
--- /dev/null
+++ b/drivers/net/team/team.c
@@ -0,0 +1,1583 @@
+/*
+ * net/drivers/team/team.c - Network team device driver
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+#include <linux/errno.h>
+#include <linux/ctype.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/socket.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/rtnetlink.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <linux/if_team.h>
+
+#define DRV_NAME "team"
+
+
+/**********
+ * Helpers
+ **********/
+
+#define team_port_exists(dev) (dev->priv_flags & IFF_TEAM_PORT)
+
+static struct team_port *team_port_get_rcu(const struct net_device *dev)
+{
+	struct team_port *port = rcu_dereference(dev->rx_handler_data);
+
+	return team_port_exists(dev) ? port : NULL;
+}
+
+static struct team_port *team_port_get_rtnl(const struct net_device *dev)
+{
+	struct team_port *port = rtnl_dereference(dev->rx_handler_data);
+
+	return team_port_exists(dev) ? port : NULL;
+}
+
+/*
+ * Since the ability to change mac address for open port device is tested in
+ * team_port_add, this function can be called without control of return value
+ */
+static int __set_port_mac(struct net_device *port_dev,
+			  const unsigned char *dev_addr)
+{
+	struct sockaddr addr;
+
+	memcpy(addr.sa_data, dev_addr, ETH_ALEN);
+	addr.sa_family = ARPHRD_ETHER;
+	return dev_set_mac_address(port_dev, &addr);
+}
+
+int team_port_set_orig_mac(struct team_port *port)
+{
+	return __set_port_mac(port->dev, port->orig.dev_addr);
+}
+
+int team_port_set_team_mac(struct team_port *port)
+{
+	return __set_port_mac(port->dev, port->team->dev->dev_addr);
+}
+EXPORT_SYMBOL(team_port_set_team_mac);
+
+
+/*******************
+ * Options handling
+ *******************/
+
+void team_options_register(struct team *team, struct team_option *option,
+			   size_t option_count)
+{
+	int i;
+
+	for (i = 0; i < option_count; i++, option++)
+		list_add_tail(&option->list, &team->option_list);
+}
+EXPORT_SYMBOL(team_options_register);
+
+static void __team_options_change_check(struct team *team,
+					struct team_option *changed_option);
+
+static void __team_options_unregister(struct team *team,
+				      struct team_option *option,
+				      size_t option_count)
+{
+	int i;
+
+	for (i = 0; i < option_count; i++, option++)
+		list_del(&option->list);
+}
+
+void team_options_unregister(struct team *team, struct team_option *option,
+			     size_t option_count)
+{
+	__team_options_unregister(team, option, option_count);
+	__team_options_change_check(team, NULL);
+}
+EXPORT_SYMBOL(team_options_unregister);
+
+static int team_option_get(struct team *team, struct team_option *option,
+			   void *arg)
+{
+	return option->getter(team, arg);
+}
+
+static int team_option_set(struct team *team, struct team_option *option,
+			   void *arg)
+{
+	int err;
+
+	err = option->setter(team, arg);
+	if (err)
+		return err;
+
+	__team_options_change_check(team, option);
+	return err;
+}
+
+/****************
+ * Mode handling
+ ****************/
+
+static LIST_HEAD(mode_list);
+static DEFINE_SPINLOCK(mode_list_lock);
+
+static struct team_mode *__find_mode(const char *kind)
+{
+	struct team_mode *mode;
+
+	list_for_each_entry(mode, &mode_list, list) {
+		if (strcmp(mode->kind, kind) == 0)
+			return mode;
+	}
+	return NULL;
+}
+
+static bool is_good_mode_name(const char *name)
+{
+	while (*name != '\0') {
+		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+			return false;
+		name++;
+	}
+	return true;
+}
+
+int team_mode_register(struct team_mode *mode)
+{
+	int err = 0;
+
+	if (!is_good_mode_name(mode->kind) ||
+	    mode->priv_size > TEAM_MODE_PRIV_SIZE)
+		return -EINVAL;
+	spin_lock(&mode_list_lock);
+	if (__find_mode(mode->kind)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+	list_add_tail(&mode->list, &mode_list);
+unlock:
+	spin_unlock(&mode_list_lock);
+	return err;
+}
+EXPORT_SYMBOL(team_mode_register);
+
+int team_mode_unregister(struct team_mode *mode)
+{
+	spin_lock(&mode_list_lock);
+	list_del_init(&mode->list);
+	spin_unlock(&mode_list_lock);
+	return 0;
+}
+EXPORT_SYMBOL(team_mode_unregister);
+
+static struct team_mode *team_mode_get(const char *kind)
+{
+	struct team_mode *mode;
+
+	spin_lock(&mode_list_lock);
+	mode = __find_mode(kind);
+	if (!mode) {
+		spin_unlock(&mode_list_lock);
+		request_module("team-mode-%s", kind);
+		spin_lock(&mode_list_lock);
+		mode = __find_mode(kind);
+	}
+	if (mode)
+		if (!try_module_get(mode->owner))
+			mode = NULL;
+
+	spin_unlock(&mode_list_lock);
+	return mode;
+}
+
+static void team_mode_put(const struct team_mode *mode)
+{
+	module_put(mode->owner);
+}
+
+static bool team_dummy_transmit(struct team *team, struct sk_buff *skb)
+{
+	dev_kfree_skb_any(skb);
+	return false;
+}
+
+rx_handler_result_t team_dummy_receive(struct team *team,
+				       struct team_port *port,
+				       struct sk_buff *skb)
+{
+	return RX_HANDLER_ANOTHER;
+}
+
+static void team_adjust_ops(struct team *team)
+{
+	/*
+	 * To avoid checks in rx/tx skb paths, ensure here that non-null and
+	 * correct ops are always set.
+	 */
+
+	if (list_empty(&team->port_list) ||
+	    !team->mode || !team->mode->ops->transmit)
+		team->ops.transmit = team_dummy_transmit;
+	else
+		team->ops.transmit = team->mode->ops->transmit;
+
+	if (list_empty(&team->port_list) ||
+	    !team->mode || !team->mode->ops->receive)
+		team->ops.receive = team_dummy_receive;
+	else
+		team->ops.receive = team->mode->ops->receive;
+}
+
+/*
+ * We can benefit from the fact that it's ensured no port is present
+ * at the time of mode change. Therefore no packets are in fly so there's no
+ * need to set mode operations in any special way.
+ */
+static int __team_change_mode(struct team *team,
+			      const struct team_mode *new_mode)
+{
+	/* Check if mode was previously set and do cleanup if so */
+	if (team->mode) {
+		void (*exit_op)(struct team *team) = team->ops.exit;
+
+		/* Clear ops area so no callback is called any longer */
+		memset(&team->ops, 0, sizeof(struct team_mode_ops));
+		team_adjust_ops(team);
+
+		if (exit_op)
+			exit_op(team);
+		team_mode_put(team->mode);
+		team->mode = NULL;
+		/* zero private data area */
+		memset(&team->mode_priv, 0,
+		       sizeof(struct team) - offsetof(struct team, mode_priv));
+	}
+
+	if (!new_mode)
+		return 0;
+
+	if (new_mode->ops->init) {
+		int err;
+
+		err = new_mode->ops->init(team);
+		if (err)
+			return err;
+	}
+
+	team->mode = new_mode;
+	memcpy(&team->ops, new_mode->ops, sizeof(struct team_mode_ops));
+	team_adjust_ops(team);
+
+	return 0;
+}
+
+static int team_change_mode(struct team *team, const char *kind)
+{
+	struct team_mode *new_mode;
+	struct net_device *dev = team->dev;
+	int err;
+
+	if (!list_empty(&team->port_list)) {
+		netdev_err(dev, "No ports can be present during mode change\n");
+		return -EBUSY;
+	}
+
+	if (team->mode && strcmp(team->mode->kind, kind) == 0) {
+		netdev_err(dev, "Unable to change to the same mode the team is in\n");
+		return -EINVAL;
+	}
+
+	new_mode = team_mode_get(kind);
+	if (!new_mode) {
+		netdev_err(dev, "Mode \"%s\" not found\n", kind);
+		return -EINVAL;
+	}
+
+	err = __team_change_mode(team, new_mode);
+	if (err) {
+		netdev_err(dev, "Failed to change to mode \"%s\"\n", kind);
+		team_mode_put(new_mode);
+		return err;
+	}
+
+	netdev_info(dev, "Mode changed to \"%s\"\n", kind);
+	return 0;
+}
+
+
+/************************
+ * Rx path frame handler
+ ************************/
+
+/* note: already called with rcu_read_lock */
+static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct team_port *port;
+	struct team *team;
+	rx_handler_result_t res;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return RX_HANDLER_CONSUMED;
+
+	*pskb = skb;
+
+	port = team_port_get_rcu(skb->dev);
+	team = port->team;
+
+	res = team->ops.receive(team, port, skb);
+	if (res == RX_HANDLER_ANOTHER) {
+		struct team_pcpu_stats *pcpu_stats;
+
+		pcpu_stats = this_cpu_ptr(team->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->rx_packets++;
+		pcpu_stats->rx_bytes += skb->len;
+		if (skb->pkt_type == PACKET_MULTICAST)
+			pcpu_stats->rx_multicast++;
+		u64_stats_update_end(&pcpu_stats->syncp);
+
+		skb->dev = team->dev;
+	} else {
+		this_cpu_inc(team->pcpu_stats->rx_dropped);
+	}
+
+	return res;
+}
+
+
+/****************
+ * Port handling
+ ****************/
+
+static bool team_port_find(const struct team *team,
+			   const struct team_port *port)
+{
+	struct team_port *cur;
+
+	list_for_each_entry(cur, &team->port_list, list)
+		if (cur == port)
+			return true;
+	return false;
+}
+
+/*
+ * Add/delete port to the team port list. Write guarded by rtnl_lock.
+ * Takes care of correct port->index setup (might be racy).
+ */
+static void team_port_list_add_port(struct team *team,
+				    struct team_port *port)
+{
+	port->index = team->port_count++;
+	hlist_add_head_rcu(&port->hlist,
+			   team_port_index_hash(team, port->index));
+	list_add_tail_rcu(&port->list, &team->port_list);
+}
+
+static void __reconstruct_port_hlist(struct team *team, int rm_index)
+{
+	int i;
+	struct team_port *port;
+
+	for (i = rm_index + 1; i < team->port_count; i++) {
+		port = team_get_port_by_index(team, i);
+		hlist_del_rcu(&port->hlist);
+		port->index--;
+		hlist_add_head_rcu(&port->hlist,
+				   team_port_index_hash(team, port->index));
+	}
+}
+
+static void team_port_list_del_port(struct team *team,
+				   struct team_port *port)
+{
+	int rm_index = port->index;
+
+	hlist_del_rcu(&port->hlist);
+	list_del_rcu(&port->list);
+	__reconstruct_port_hlist(team, rm_index);
+	team->port_count--;
+}
+
+#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \
+			    NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+			    NETIF_F_HIGHDMA | NETIF_F_LRO)
+
+static void __team_compute_features(struct team *team)
+{
+	struct team_port *port;
+	u32 vlan_features = TEAM_VLAN_FEATURES;
+	unsigned short max_hard_header_len = ETH_HLEN;
+
+	list_for_each_entry(port, &team->port_list, list) {
+		vlan_features = netdev_increment_features(vlan_features,
+					port->dev->vlan_features,
+					TEAM_VLAN_FEATURES);
+
+		if (port->dev->hard_header_len > max_hard_header_len)
+			max_hard_header_len = port->dev->hard_header_len;
+	}
+
+	team->dev->vlan_features = vlan_features;
+	team->dev->hard_header_len = max_hard_header_len;
+
+	netdev_change_features(team->dev);
+}
+
+static void team_compute_features(struct team *team)
+{
+	spin_lock(&team->lock);
+	__team_compute_features(team);
+	spin_unlock(&team->lock);
+}
+
+static int team_port_enter(struct team *team, struct team_port *port)
+{
+	int err = 0;
+
+	dev_hold(team->dev);
+	port->dev->priv_flags |= IFF_TEAM_PORT;
+	if (team->ops.port_enter) {
+		err = team->ops.port_enter(team, port);
+		if (err) {
+			netdev_err(team->dev, "Device %s failed to enter team mode\n",
+				   port->dev->name);
+			goto err_port_enter;
+		}
+	}
+
+	return 0;
+
+err_port_enter:
+	port->dev->priv_flags &= ~IFF_TEAM_PORT;
+	dev_put(team->dev);
+
+	return err;
+}
+
+static void team_port_leave(struct team *team, struct team_port *port)
+{
+	if (team->ops.port_leave)
+		team->ops.port_leave(team, port);
+	port->dev->priv_flags &= ~IFF_TEAM_PORT;
+	dev_put(team->dev);
+}
+
+static void __team_port_change_check(struct team_port *port, bool linkup);
+
+static int team_port_add(struct team *team, struct net_device *port_dev)
+{
+	struct net_device *dev = team->dev;
+	struct team_port *port;
+	char *portname = port_dev->name;
+	int err;
+
+	if (port_dev->flags & IFF_LOOPBACK ||
+	    port_dev->type != ARPHRD_ETHER) {
+		netdev_err(dev, "Device %s is of an unsupported type\n",
+			   portname);
+		return -EINVAL;
+	}
+
+	if (team_port_exists(port_dev)) {
+		netdev_err(dev, "Device %s is already a port "
+				"of a team device\n", portname);
+		return -EBUSY;
+	}
+
+	if (port_dev->flags & IFF_UP) {
+		netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
+			   portname);
+		return -EBUSY;
+	}
+
+	port = kzalloc(sizeof(struct team_port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	port->dev = port_dev;
+	port->team = team;
+
+	port->orig.mtu = port_dev->mtu;
+	err = dev_set_mtu(port_dev, dev->mtu);
+	if (err) {
+		netdev_dbg(dev, "Error %d calling dev_set_mtu\n", err);
+		goto err_set_mtu;
+	}
+
+	memcpy(port->orig.dev_addr, port_dev->dev_addr, ETH_ALEN);
+
+	err = team_port_enter(team, port);
+	if (err) {
+		netdev_err(dev, "Device %s failed to enter team mode\n",
+			   portname);
+		goto err_port_enter;
+	}
+
+	err = dev_open(port_dev);
+	if (err) {
+		netdev_dbg(dev, "Device %s opening failed\n",
+			   portname);
+		goto err_dev_open;
+	}
+
+	err = netdev_set_master(port_dev, dev);
+	if (err) {
+		netdev_err(dev, "Device %s failed to set master\n", portname);
+		goto err_set_master;
+	}
+
+	err = netdev_rx_handler_register(port_dev, team_handle_frame,
+					 port);
+	if (err) {
+		netdev_err(dev, "Device %s failed to register rx_handler\n",
+			   portname);
+		goto err_handler_register;
+	}
+
+	team_port_list_add_port(team, port);
+	team_adjust_ops(team);
+	__team_compute_features(team);
+	__team_port_change_check(port, !!netif_carrier_ok(port_dev));
+
+	netdev_info(dev, "Port device %s added\n", portname);
+
+	return 0;
+
+err_handler_register:
+	netdev_set_master(port_dev, NULL);
+
+err_set_master:
+	dev_close(port_dev);
+
+err_dev_open:
+	team_port_leave(team, port);
+	team_port_set_orig_mac(port);
+
+err_port_enter:
+	dev_set_mtu(port_dev, port->orig.mtu);
+
+err_set_mtu:
+	kfree(port);
+
+	return err;
+}
+
+static int team_port_del(struct team *team, struct net_device *port_dev)
+{
+	struct net_device *dev = team->dev;
+	struct team_port *port;
+	char *portname = port_dev->name;
+
+	port = team_port_get_rtnl(port_dev);
+	if (!port || !team_port_find(team, port)) {
+		netdev_err(dev, "Device %s does not act as a port of this team\n",
+			   portname);
+		return -ENOENT;
+	}
+
+	__team_port_change_check(port, false);
+	team_port_list_del_port(team, port);
+	team_adjust_ops(team);
+	netdev_rx_handler_unregister(port_dev);
+	netdev_set_master(port_dev, NULL);
+	dev_close(port_dev);
+	team_port_leave(team, port);
+	team_port_set_orig_mac(port);
+	dev_set_mtu(port_dev, port->orig.mtu);
+	synchronize_rcu();
+	kfree(port);
+	netdev_info(dev, "Port device %s removed\n", portname);
+	__team_compute_features(team);
+
+	return 0;
+}
+
+
+/*****************
+ * Net device ops
+ *****************/
+
+static const char team_no_mode_kind[] = "*NOMODE*";
+
+static int team_mode_option_get(struct team *team, void *arg)
+{
+	const char **str = arg;
+
+	*str = team->mode ? team->mode->kind : team_no_mode_kind;
+	return 0;
+}
+
+static int team_mode_option_set(struct team *team, void *arg)
+{
+	const char **str = arg;
+
+	return team_change_mode(team, *str);
+}
+
+static struct team_option team_options[] = {
+	{
+		.name = "mode",
+		.type = TEAM_OPTION_TYPE_STRING,
+		.getter = team_mode_option_get,
+		.setter = team_mode_option_set,
+	},
+};
+
+static int team_init(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	int i;
+
+	team->dev = dev;
+	spin_lock_init(&team->lock);
+
+	team->pcpu_stats = alloc_percpu(struct team_pcpu_stats);
+	if (!team->pcpu_stats)
+		return -ENOMEM;
+
+	for (i = 0; i < TEAM_PORT_HASHENTRIES; i++)
+		INIT_HLIST_HEAD(&team->port_hlist[i]);
+	INIT_LIST_HEAD(&team->port_list);
+
+	team_adjust_ops(team);
+
+	INIT_LIST_HEAD(&team->option_list);
+	team_options_register(team, team_options, ARRAY_SIZE(team_options));
+	netif_carrier_off(dev);
+
+	return 0;
+}
+
+static void team_uninit(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	struct team_port *tmp;
+
+	spin_lock(&team->lock);
+	list_for_each_entry_safe(port, tmp, &team->port_list, list)
+		team_port_del(team, port->dev);
+
+	__team_change_mode(team, NULL); /* cleanup */
+	__team_options_unregister(team, team_options, ARRAY_SIZE(team_options));
+	spin_unlock(&team->lock);
+}
+
+static void team_destructor(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+
+	free_percpu(team->pcpu_stats);
+	free_netdev(dev);
+}
+
+static int team_open(struct net_device *dev)
+{
+	netif_carrier_on(dev);
+	return 0;
+}
+
+static int team_close(struct net_device *dev)
+{
+	netif_carrier_off(dev);
+	return 0;
+}
+
+/*
+ * note: already called with rcu_read_lock
+ */
+static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	bool tx_success = false;
+	unsigned int len = skb->len;
+
+	tx_success = team->ops.transmit(team, skb);
+	if (tx_success) {
+		struct team_pcpu_stats *pcpu_stats;
+
+		pcpu_stats = this_cpu_ptr(team->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->tx_packets++;
+		pcpu_stats->tx_bytes += len;
+		u64_stats_update_end(&pcpu_stats->syncp);
+	} else {
+		this_cpu_inc(team->pcpu_stats->tx_dropped);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static void team_change_rx_flags(struct net_device *dev, int change)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	int inc;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		if (change & IFF_PROMISC) {
+			inc = dev->flags & IFF_PROMISC ? 1 : -1;
+			dev_set_promiscuity(port->dev, inc);
+		}
+		if (change & IFF_ALLMULTI) {
+			inc = dev->flags & IFF_ALLMULTI ? 1 : -1;
+			dev_set_allmulti(port->dev, inc);
+		}
+	}
+	rcu_read_unlock();
+}
+
+static void team_set_rx_mode(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		dev_uc_sync(port->dev, dev);
+		dev_mc_sync(port->dev, dev);
+	}
+	rcu_read_unlock();
+}
+
+static int team_set_mac_address(struct net_device *dev, void *p)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	struct sockaddr *addr = p;
+
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list)
+		if (team->ops.port_change_mac)
+			team->ops.port_change_mac(team, port);
+	rcu_read_unlock();
+	return 0;
+}
+
+static int team_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	int err;
+
+	/*
+	 * Alhough this is reader, it's guarded by team lock. It's not possible
+	 * to traverse list in reverse under rcu_read_lock
+	 */
+	spin_lock(&team->lock);
+	list_for_each_entry(port, &team->port_list, list) {
+		err = dev_set_mtu(port->dev, new_mtu);
+		if (err) {
+			netdev_err(dev, "Device %s failed to change mtu",
+				   port->dev->name);
+			goto unwind;
+		}
+	}
+	spin_unlock(&team->lock);
+
+	dev->mtu = new_mtu;
+
+	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(port, &team->port_list, list)
+		dev_set_mtu(port->dev, dev->mtu);
+	spin_unlock(&team->lock);
+
+	return err;
+}
+
+static struct rtnl_link_stats64 *
+team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_pcpu_stats *p;
+	u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes;
+	u32 rx_dropped = 0, tx_dropped = 0;
+	unsigned int start;
+	int i;
+
+	for_each_possible_cpu(i) {
+		p = per_cpu_ptr(team->pcpu_stats, i);
+		do {
+			start = u64_stats_fetch_begin_bh(&p->syncp);
+			rx_packets	= p->rx_packets;
+			rx_bytes	= p->rx_bytes;
+			rx_multicast	= p->rx_multicast;
+			tx_packets	= p->tx_packets;
+			tx_bytes	= p->tx_bytes;
+		} while (u64_stats_fetch_retry_bh(&p->syncp, start));
+
+		stats->rx_packets	+= rx_packets;
+		stats->rx_bytes		+= rx_bytes;
+		stats->multicast	+= rx_multicast;
+		stats->tx_packets	+= tx_packets;
+		stats->tx_bytes		+= tx_bytes;
+		/*
+		 * rx_dropped & tx_dropped are u32, updated
+		 * without syncp protection.
+		 */
+		rx_dropped	+= p->rx_dropped;
+		tx_dropped	+= p->tx_dropped;
+	}
+	stats->rx_dropped	= rx_dropped;
+	stats->tx_dropped	= tx_dropped;
+	return stats;
+}
+
+static void team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		const struct net_device_ops *ops = port->dev->netdev_ops;
+
+		if (ops->ndo_vlan_rx_add_vid)
+			ops->ndo_vlan_rx_add_vid(port->dev, vid);
+	}
+	rcu_read_unlock();
+}
+
+static void team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		const struct net_device_ops *ops = port->dev->netdev_ops;
+
+		if (ops->ndo_vlan_rx_kill_vid)
+			ops->ndo_vlan_rx_kill_vid(port->dev, vid);
+	}
+	rcu_read_unlock();
+}
+
+static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
+{
+	struct team *team = netdev_priv(dev);
+	int err;
+
+	spin_lock(&team->lock);
+	err = team_port_add(team, port_dev);
+	spin_unlock(&team->lock);
+	return err;
+}
+
+static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
+{
+	struct team *team = netdev_priv(dev);
+	int err;
+
+	spin_lock(&team->lock);
+	err = team_port_del(team, port_dev);
+	spin_unlock(&team->lock);
+	return err;
+}
+
+static const struct net_device_ops team_netdev_ops = {
+	.ndo_init		= team_init,
+	.ndo_uninit		= team_uninit,
+	.ndo_open		= team_open,
+	.ndo_stop		= team_close,
+	.ndo_start_xmit		= team_xmit,
+	.ndo_change_rx_flags	= team_change_rx_flags,
+	.ndo_set_rx_mode	= team_set_rx_mode,
+	.ndo_set_mac_address	= team_set_mac_address,
+	.ndo_change_mtu		= team_change_mtu,
+	.ndo_get_stats64	= team_get_stats64,
+	.ndo_vlan_rx_add_vid	= team_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= team_vlan_rx_kill_vid,
+	.ndo_add_slave		= team_add_slave,
+	.ndo_del_slave		= team_del_slave,
+};
+
+
+/***********************
+ * rt netlink interface
+ ***********************/
+
+static void team_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->netdev_ops = &team_netdev_ops;
+	dev->destructor	= team_destructor;
+	dev->tx_queue_len = 0;
+	dev->flags |= IFF_MULTICAST;
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+
+	/*
+	 * Indicate we support unicast address filtering. That way core won't
+	 * bring us to promisc mode in case a unicast addr is added.
+	 * Let this up to underlay drivers.
+	 */
+	dev->priv_flags |= IFF_UNICAST_FLT;
+
+	dev->features |= NETIF_F_LLTX;
+	dev->features |= NETIF_F_GRO;
+	dev->hw_features = NETIF_F_HW_VLAN_TX |
+			   NETIF_F_HW_VLAN_RX |
+			   NETIF_F_HW_VLAN_FILTER;
+
+	dev->features |= dev->hw_features;
+}
+
+static int team_newlink(struct net *src_net, struct net_device *dev,
+			struct nlattr *tb[], struct nlattr *data[])
+{
+	int err;
+
+	if (tb[IFLA_ADDRESS] == NULL)
+		random_ether_addr(dev->dev_addr);
+
+	err = register_netdevice(dev);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int team_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+	return 0;
+}
+
+static struct rtnl_link_ops team_link_ops __read_mostly = {
+	.kind		= DRV_NAME,
+	.priv_size	= sizeof(struct team),
+	.setup		= team_setup,
+	.newlink	= team_newlink,
+	.validate	= team_validate,
+};
+
+
+/***********************************
+ * Generic netlink custom interface
+ ***********************************/
+
+static struct genl_family team_nl_family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= TEAM_GENL_NAME,
+	.version	= TEAM_GENL_VERSION,
+	.maxattr	= TEAM_ATTR_MAX,
+	.netnsok	= true,
+};
+
+static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = {
+	[TEAM_ATTR_UNSPEC]			= { .type = NLA_UNSPEC, },
+	[TEAM_ATTR_TEAM_IFINDEX]		= { .type = NLA_U32 },
+	[TEAM_ATTR_LIST_OPTION]			= { .type = NLA_NESTED },
+	[TEAM_ATTR_LIST_PORT]			= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = {
+	[TEAM_ATTR_OPTION_UNSPEC]		= { .type = NLA_UNSPEC, },
+	[TEAM_ATTR_OPTION_NAME] = {
+		.type = NLA_STRING,
+		.len = TEAM_STRING_MAX_LEN,
+	},
+	[TEAM_ATTR_OPTION_CHANGED]		= { .type = NLA_FLAG },
+	[TEAM_ATTR_OPTION_TYPE]			= { .type = NLA_U8 },
+	[TEAM_ATTR_OPTION_DATA] = {
+		.type = NLA_BINARY,
+		.len = TEAM_STRING_MAX_LEN,
+	},
+};
+
+static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+			  &team_nl_family, 0, TEAM_CMD_NOOP);
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto err_msg_put;
+	}
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_msg_put:
+	nlmsg_free(msg);
+
+	return err;
+}
+
+/*
+ * Netlink cmd functions should be locked by following two functions.
+ * To ensure team_uninit would not be called in between, hold rcu_read_lock
+ * all the time.
+ */
+static struct team *team_nl_team_get(struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	int ifindex;
+	struct net_device *dev;
+	struct team *team;
+
+	if (!info->attrs[TEAM_ATTR_TEAM_IFINDEX])
+		return NULL;
+
+	ifindex = nla_get_u32(info->attrs[TEAM_ATTR_TEAM_IFINDEX]);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (!dev || dev->netdev_ops != &team_netdev_ops) {
+		rcu_read_unlock();
+		return NULL;
+	}
+
+	team = netdev_priv(dev);
+	spin_lock(&team->lock);
+	return team;
+}
+
+static void team_nl_team_put(struct team *team)
+{
+	spin_unlock(&team->lock);
+	rcu_read_unlock();
+}
+
+static int team_nl_send_generic(struct genl_info *info, struct team *team,
+				int (*fill_func)(struct sk_buff *skb,
+						 struct genl_info *info,
+						 int flags, struct team *team))
+{
+	struct sk_buff *skb;
+	int err;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	err = fill_func(skb, info, NLM_F_ACK, team);
+	if (err < 0)
+		goto err_fill;
+
+	err = genlmsg_unicast(genl_info_net(info), skb, info->snd_pid);
+	return err;
+
+err_fill:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int team_nl_fill_options_get_changed(struct sk_buff *skb,
+					    u32 pid, u32 seq, int flags,
+					    struct team *team,
+					    struct team_option *changed_option)
+{
+	struct nlattr *option_list;
+	void *hdr;
+	struct team_option *option;
+
+	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags,
+			  TEAM_CMD_OPTIONS_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex);
+	option_list = nla_nest_start(skb, TEAM_ATTR_LIST_OPTION);
+	if (!option_list)
+		return -EMSGSIZE;
+
+	list_for_each_entry(option, &team->option_list, list) {
+		struct nlattr *option_item;
+		long arg;
+
+		option_item = nla_nest_start(skb, TEAM_ATTR_ITEM_OPTION);
+		if (!option_item)
+			goto nla_put_failure;
+		NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_NAME, option->name);
+		if (option == changed_option)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_CHANGED);
+		switch (option->type) {
+		case TEAM_OPTION_TYPE_U32:
+			NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_U32);
+			team_option_get(team, option, &arg);
+			NLA_PUT_U32(skb, TEAM_ATTR_OPTION_DATA, arg);
+			break;
+		case TEAM_OPTION_TYPE_STRING:
+			NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_STRING);
+			team_option_get(team, option, &arg);
+			NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_DATA,
+				       (char *) arg);
+			break;
+		default:
+			BUG();
+		}
+		nla_nest_end(skb, option_item);
+	}
+
+	nla_nest_end(skb, option_list);
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int team_nl_fill_options_get(struct sk_buff *skb,
+				    struct genl_info *info, int flags,
+				    struct team *team)
+{
+	return team_nl_fill_options_get_changed(skb, info->snd_pid,
+						info->snd_seq, NLM_F_ACK,
+						team, NULL);
+}
+
+static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct team *team;
+	int err;
+
+	team = team_nl_team_get(info);
+	if (!team)
+		return -EINVAL;
+
+	err = team_nl_send_generic(info, team, team_nl_fill_options_get);
+
+	team_nl_team_put(team);
+
+	return err;
+}
+
+static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct team *team;
+	int err = 0;
+	int i;
+	struct nlattr *nl_option;
+
+	team = team_nl_team_get(info);
+	if (!team)
+		return -EINVAL;
+
+	err = -EINVAL;
+	if (!info->attrs[TEAM_ATTR_LIST_OPTION]) {
+		err = -EINVAL;
+		goto team_put;
+	}
+
+	nla_for_each_nested(nl_option, info->attrs[TEAM_ATTR_LIST_OPTION], i) {
+		struct nlattr *mode_attrs[TEAM_ATTR_OPTION_MAX + 1];
+		enum team_option_type opt_type;
+		struct team_option *option;
+		char *opt_name;
+		bool opt_found = false;
+
+		if (nla_type(nl_option) != TEAM_ATTR_ITEM_OPTION) {
+			err = -EINVAL;
+			goto team_put;
+		}
+		err = nla_parse_nested(mode_attrs, TEAM_ATTR_OPTION_MAX,
+				       nl_option, team_nl_option_policy);
+		if (err)
+			goto team_put;
+		if (!mode_attrs[TEAM_ATTR_OPTION_NAME] ||
+		    !mode_attrs[TEAM_ATTR_OPTION_TYPE] ||
+		    !mode_attrs[TEAM_ATTR_OPTION_DATA]) {
+			err = -EINVAL;
+			goto team_put;
+		}
+		switch (nla_get_u8(mode_attrs[TEAM_ATTR_OPTION_TYPE])) {
+		case NLA_U32:
+			opt_type = TEAM_OPTION_TYPE_U32;
+			break;
+		case NLA_STRING:
+			opt_type = TEAM_OPTION_TYPE_STRING;
+			break;
+		default:
+			goto team_put;
+		}
+
+		opt_name = nla_data(mode_attrs[TEAM_ATTR_OPTION_NAME]);
+		list_for_each_entry(option, &team->option_list, list) {
+			long arg;
+			struct nlattr *opt_data_attr;
+
+			if (option->type != opt_type ||
+			    strcmp(option->name, opt_name))
+				continue;
+			opt_found = true;
+			opt_data_attr = mode_attrs[TEAM_ATTR_OPTION_DATA];
+			switch (opt_type) {
+			case TEAM_OPTION_TYPE_U32:
+				arg = nla_get_u32(opt_data_attr);
+				break;
+			case TEAM_OPTION_TYPE_STRING:
+				arg = (long) nla_data(opt_data_attr);
+				break;
+			default:
+				BUG();
+			}
+			err = team_option_set(team, option, &arg);
+			if (err)
+				goto team_put;
+		}
+		if (!opt_found) {
+			err = -ENOENT;
+			goto team_put;
+		}
+	}
+
+team_put:
+	team_nl_team_put(team);
+
+	return err;
+}
+
+static int team_nl_fill_port_list_get_changed(struct sk_buff *skb,
+					      u32 pid, u32 seq, int flags,
+					      struct team *team,
+					      struct team_port *changed_port)
+{
+	struct nlattr *port_list;
+	void *hdr;
+	struct team_port *port;
+
+	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags,
+			  TEAM_CMD_PORT_LIST_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex);
+	port_list = nla_nest_start(skb, TEAM_ATTR_LIST_PORT);
+	if (!port_list)
+		return -EMSGSIZE;
+
+	list_for_each_entry(port, &team->port_list, list) {
+		struct nlattr *port_item;
+
+		port_item = nla_nest_start(skb, TEAM_ATTR_ITEM_PORT);
+		if (!port_item)
+			goto nla_put_failure;
+		NLA_PUT_U32(skb, TEAM_ATTR_PORT_IFINDEX, port->dev->ifindex);
+		if (port == changed_port)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_CHANGED);
+		if (port->linkup)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_LINKUP);
+		NLA_PUT_U32(skb, TEAM_ATTR_PORT_SPEED, port->speed);
+		NLA_PUT_U8(skb, TEAM_ATTR_PORT_DUPLEX, port->duplex);
+		nla_nest_end(skb, port_item);
+	}
+
+	nla_nest_end(skb, port_list);
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int team_nl_fill_port_list_get(struct sk_buff *skb,
+				      struct genl_info *info, int flags,
+				      struct team *team)
+{
+	return team_nl_fill_port_list_get_changed(skb, info->snd_pid,
+						  info->snd_seq, NLM_F_ACK,
+						  team, NULL);
+}
+
+static int team_nl_cmd_port_list_get(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct team *team;
+	int err;
+
+	team = team_nl_team_get(info);
+	if (!team)
+		return -EINVAL;
+
+	err = team_nl_send_generic(info, team, team_nl_fill_port_list_get);
+
+	team_nl_team_put(team);
+
+	return err;
+}
+
+static struct genl_ops team_nl_ops[] = {
+	{
+		.cmd = TEAM_CMD_NOOP,
+		.doit = team_nl_cmd_noop,
+		.policy = team_nl_policy,
+	},
+	{
+		.cmd = TEAM_CMD_OPTIONS_SET,
+		.doit = team_nl_cmd_options_set,
+		.policy = team_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = TEAM_CMD_OPTIONS_GET,
+		.doit = team_nl_cmd_options_get,
+		.policy = team_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = TEAM_CMD_PORT_LIST_GET,
+		.doit = team_nl_cmd_port_list_get,
+		.policy = team_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+static struct genl_multicast_group team_change_event_mcgrp = {
+	.name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME,
+};
+
+static int team_nl_send_event_options_get(struct team *team,
+					  struct team_option *changed_option)
+{
+	struct sk_buff *skb;
+	int err;
+	struct net *net = dev_net(team->dev);
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	err = team_nl_fill_options_get_changed(skb, 0, 0, 0, team,
+					       changed_option);
+	if (err < 0)
+		goto err_fill;
+
+	err = genlmsg_multicast_netns(net, skb, 0, team_change_event_mcgrp.id,
+				      GFP_KERNEL);
+	return err;
+
+err_fill:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int team_nl_send_event_port_list_get(struct team_port *port)
+{
+	struct sk_buff *skb;
+	int err;
+	struct net *net = dev_net(port->team->dev);
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	err = team_nl_fill_port_list_get_changed(skb, 0, 0, 0,
+						 port->team, port);
+	if (err < 0)
+		goto err_fill;
+
+	err = genlmsg_multicast_netns(net, skb, 0, team_change_event_mcgrp.id,
+				      GFP_KERNEL);
+	return err;
+
+err_fill:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int team_nl_init(void)
+{
+	int err;
+
+	err = genl_register_family_with_ops(&team_nl_family, team_nl_ops,
+					    ARRAY_SIZE(team_nl_ops));
+	if (err)
+		return err;
+
+	err = genl_register_mc_group(&team_nl_family, &team_change_event_mcgrp);
+	if (err)
+		goto err_change_event_grp_reg;
+
+	return 0;
+
+err_change_event_grp_reg:
+	genl_unregister_family(&team_nl_family);
+
+	return err;
+}
+
+static void team_nl_fini(void)
+{
+	genl_unregister_family(&team_nl_family);
+}
+
+
+/******************
+ * Change checkers
+ ******************/
+
+static void __team_options_change_check(struct team *team,
+					struct team_option *changed_option)
+{
+	int err;
+
+	err = team_nl_send_event_options_get(team, changed_option);
+	if (err)
+		netdev_warn(team->dev, "Failed to send options change via netlink\n");
+}
+
+/* rtnl lock is held */
+static void __team_port_change_check(struct team_port *port, bool linkup)
+{
+	int err;
+
+	if (port->linkup == linkup)
+		return;
+
+	port->linkup = linkup;
+	if (linkup) {
+		struct ethtool_cmd ecmd;
+
+		err = __ethtool_get_settings(port->dev, &ecmd);
+		if (!err) {
+			port->speed = ethtool_cmd_speed(&ecmd);
+			port->duplex = ecmd.duplex;
+			goto send_event;
+		}
+	}
+	port->speed = 0;
+	port->duplex = 0;
+
+send_event:
+	err = team_nl_send_event_port_list_get(port);
+	if (err)
+		netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink\n",
+			    port->dev->name);
+
+}
+
+static void team_port_change_check(struct team_port *port, bool linkup)
+{
+	struct team *team = port->team;
+
+	spin_lock(&team->lock);
+	__team_port_change_check(port, linkup);
+	spin_unlock(&team->lock);
+}
+
+/************************************
+ * Net device notifier event handler
+ ************************************/
+
+static int team_device_event(struct notifier_block *unused,
+			     unsigned long event, void *ptr)
+{
+	struct net_device *dev = (struct net_device *) ptr;
+	struct team_port *port;
+
+	port = team_port_get_rtnl(dev);
+	if (!port)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (netif_carrier_ok(dev))
+			team_port_change_check(port, true);
+	case NETDEV_DOWN:
+		team_port_change_check(port, false);
+	case NETDEV_CHANGE:
+		if (netif_running(port->dev))
+			team_port_change_check(port,
+					       !!netif_carrier_ok(port->dev));
+		break;
+	case NETDEV_UNREGISTER:
+		team_del_slave(port->team->dev, dev);
+		break;
+	case NETDEV_FEAT_CHANGE:
+		team_compute_features(port->team);
+		break;
+	case NETDEV_CHANGEMTU:
+		/* Forbid to change mtu of underlaying device */
+		return NOTIFY_BAD;
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid to change type of underlaying device */
+		return NOTIFY_BAD;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block team_notifier_block __read_mostly = {
+	.notifier_call = team_device_event,
+};
+
+
+/***********************
+ * Module init and exit
+ ***********************/
+
+static int __init team_module_init(void)
+{
+	int err;
+
+	register_netdevice_notifier(&team_notifier_block);
+
+	err = rtnl_link_register(&team_link_ops);
+	if (err)
+		goto err_rtnl_reg;
+
+	err = team_nl_init();
+	if (err)
+		goto err_nl_init;
+
+	return 0;
+
+err_nl_init:
+	rtnl_link_unregister(&team_link_ops);
+
+err_rtnl_reg:
+	unregister_netdevice_notifier(&team_notifier_block);
+
+	return err;
+}
+
+static void __exit team_module_exit(void)
+{
+	team_nl_fini();
+	rtnl_link_unregister(&team_link_ops);
+	unregister_netdevice_notifier(&team_notifier_block);
+}
+
+module_init(team_module_init);
+module_exit(team_module_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
+MODULE_DESCRIPTION("Ethernet team device driver");
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);
diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
new file mode 100644
index 000000000000..6fe920c440b3
--- /dev/null
+++ b/drivers/net/team/team_mode_activebackup.c
@@ -0,0 +1,137 @@
+/*
+ * net/drivers/team/team_mode_activebackup.c - Active-backup mode for team
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/rtnetlink.h>
+#include <linux/if_team.h>
+
+struct ab_priv {
+	struct team_port __rcu *active_port;
+};
+
+static struct ab_priv *ab_priv(struct team *team)
+{
+	return (struct ab_priv *) &team->mode_priv;
+}
+
+static rx_handler_result_t ab_receive(struct team *team, struct team_port *port,
+				      struct sk_buff *skb) {
+	struct team_port *active_port;
+
+	active_port = rcu_dereference(ab_priv(team)->active_port);
+	if (active_port != port)
+		return RX_HANDLER_EXACT;
+	return RX_HANDLER_ANOTHER;
+}
+
+static bool ab_transmit(struct team *team, struct sk_buff *skb)
+{
+	struct team_port *active_port;
+
+	active_port = rcu_dereference(ab_priv(team)->active_port);
+	if (unlikely(!active_port))
+		goto drop;
+	skb->dev = active_port->dev;
+	if (dev_queue_xmit(skb))
+		return false;
+	return true;
+
+drop:
+	dev_kfree_skb_any(skb);
+	return false;
+}
+
+static void ab_port_leave(struct team *team, struct team_port *port)
+{
+	if (ab_priv(team)->active_port == port)
+		rcu_assign_pointer(ab_priv(team)->active_port, NULL);
+}
+
+static int ab_active_port_get(struct team *team, void *arg)
+{
+	u32 *ifindex = arg;
+
+	*ifindex = 0;
+	if (ab_priv(team)->active_port)
+		*ifindex = ab_priv(team)->active_port->dev->ifindex;
+	return 0;
+}
+
+static int ab_active_port_set(struct team *team, void *arg)
+{
+	u32 *ifindex = arg;
+	struct team_port *port;
+
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		if (port->dev->ifindex == *ifindex) {
+			rcu_assign_pointer(ab_priv(team)->active_port, port);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static struct team_option ab_options[] = {
+	{
+		.name = "activeport",
+		.type = TEAM_OPTION_TYPE_U32,
+		.getter = ab_active_port_get,
+		.setter = ab_active_port_set,
+	},
+};
+
+int ab_init(struct team *team)
+{
+	team_options_register(team, ab_options, ARRAY_SIZE(ab_options));
+	return 0;
+}
+
+void ab_exit(struct team *team)
+{
+	team_options_unregister(team, ab_options, ARRAY_SIZE(ab_options));
+}
+
+static const struct team_mode_ops ab_mode_ops = {
+	.init			= ab_init,
+	.exit			= ab_exit,
+	.receive		= ab_receive,
+	.transmit		= ab_transmit,
+	.port_leave		= ab_port_leave,
+};
+
+static struct team_mode ab_mode = {
+	.kind		= "activebackup",
+	.owner		= THIS_MODULE,
+	.priv_size	= sizeof(struct ab_priv),
+	.ops		= &ab_mode_ops,
+};
+
+static int __init ab_init_module(void)
+{
+	return team_mode_register(&ab_mode);
+}
+
+static void __exit ab_cleanup_module(void)
+{
+	team_mode_unregister(&ab_mode);
+}
+
+module_init(ab_init_module);
+module_exit(ab_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
+MODULE_DESCRIPTION("Active-backup mode for team");
+MODULE_ALIAS("team-mode-activebackup");
diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c
new file mode 100644
index 000000000000..a0e8f806331a
--- /dev/null
+++ b/drivers/net/team/team_mode_roundrobin.c
@@ -0,0 +1,107 @@
+/*
+ * net/drivers/team/team_mode_roundrobin.c - Round-robin mode for team
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/if_team.h>
+
+struct rr_priv {
+	unsigned int sent_packets;
+};
+
+static struct rr_priv *rr_priv(struct team *team)
+{
+	return (struct rr_priv *) &team->mode_priv;
+}
+
+static struct team_port *__get_first_port_up(struct team *team,
+					     struct team_port *port)
+{
+	struct team_port *cur;
+
+	if (port->linkup)
+		return port;
+	cur = port;
+	list_for_each_entry_continue_rcu(cur, &team->port_list, list)
+		if (cur->linkup)
+			return cur;
+	list_for_each_entry_rcu(cur, &team->port_list, list) {
+		if (cur == port)
+			break;
+		if (cur->linkup)
+			return cur;
+	}
+	return NULL;
+}
+
+static bool rr_transmit(struct team *team, struct sk_buff *skb)
+{
+	struct team_port *port;
+	int port_index;
+
+	port_index = rr_priv(team)->sent_packets++ % team->port_count;
+	port = team_get_port_by_index_rcu(team, port_index);
+	port = __get_first_port_up(team, port);
+	if (unlikely(!port))
+		goto drop;
+	skb->dev = port->dev;
+	if (dev_queue_xmit(skb))
+		return false;
+	return true;
+
+drop:
+	dev_kfree_skb_any(skb);
+	return false;
+}
+
+static int rr_port_enter(struct team *team, struct team_port *port)
+{
+	return team_port_set_team_mac(port);
+}
+
+static void rr_port_change_mac(struct team *team, struct team_port *port)
+{
+	team_port_set_team_mac(port);
+}
+
+static const struct team_mode_ops rr_mode_ops = {
+	.transmit		= rr_transmit,
+	.port_enter		= rr_port_enter,
+	.port_change_mac	= rr_port_change_mac,
+};
+
+static struct team_mode rr_mode = {
+	.kind		= "roundrobin",
+	.owner		= THIS_MODULE,
+	.priv_size	= sizeof(struct rr_priv),
+	.ops		= &rr_mode_ops,
+};
+
+static int __init rr_init_module(void)
+{
+	return team_mode_register(&rr_mode);
+}
+
+static void __exit rr_cleanup_module(void)
+{
+	team_mode_unregister(&rr_mode);
+}
+
+module_init(rr_init_module);
+module_exit(rr_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
+MODULE_DESCRIPTION("Round-robin mode for team");
+MODULE_ALIAS("team-mode-roundrobin");
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 619b5657af77..0b091b32267d 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -185,6 +185,7 @@ header-y += if_pppol2tp.h
 header-y += if_pppox.h
 header-y += if_slip.h
 header-y += if_strip.h
+header-y += if_team.h
 header-y += if_tr.h
 header-y += if_tun.h
 header-y += if_tunnel.h
diff --git a/include/linux/if.h b/include/linux/if.h
index db20bd4fd16b..06b6ef60c821 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -79,6 +79,7 @@
 #define IFF_TX_SKB_SHARING	0x10000	/* The interface supports sharing
 					 * skbs on transmit */
 #define IFF_UNICAST_FLT	0x20000		/* Supports unicast filtering	*/
+#define IFF_TEAM_PORT	0x40000		/* device used as team port */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
new file mode 100644
index 000000000000..14f6388f5460
--- /dev/null
+++ b/include/linux/if_team.h
@@ -0,0 +1,242 @@
+/*
+ * include/linux/if_team.h - Network team device driver header
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _LINUX_IF_TEAM_H_
+#define _LINUX_IF_TEAM_H_
+
+#ifdef __KERNEL__
+
+struct team_pcpu_stats {
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
+	u32			rx_dropped;
+	u32			tx_dropped;
+};
+
+struct team;
+
+struct team_port {
+	struct net_device *dev;
+	struct hlist_node hlist; /* node in hash list */
+	struct list_head list; /* node in ordinary list */
+	struct team *team;
+	int index;
+
+	/*
+	 * A place for storing original values of the device before it
+	 * become a port.
+	 */
+	struct {
+		unsigned char dev_addr[MAX_ADDR_LEN];
+		unsigned int mtu;
+	} orig;
+
+	bool linkup;
+	u32 speed;
+	u8 duplex;
+
+	struct rcu_head rcu;
+};
+
+struct team_mode_ops {
+	int (*init)(struct team *team);
+	void (*exit)(struct team *team);
+	rx_handler_result_t (*receive)(struct team *team,
+				       struct team_port *port,
+				       struct sk_buff *skb);
+	bool (*transmit)(struct team *team, struct sk_buff *skb);
+	int (*port_enter)(struct team *team, struct team_port *port);
+	void (*port_leave)(struct team *team, struct team_port *port);
+	void (*port_change_mac)(struct team *team, struct team_port *port);
+};
+
+enum team_option_type {
+	TEAM_OPTION_TYPE_U32,
+	TEAM_OPTION_TYPE_STRING,
+};
+
+struct team_option {
+	struct list_head list;
+	const char *name;
+	enum team_option_type type;
+	int (*getter)(struct team *team, void *arg);
+	int (*setter)(struct team *team, void *arg);
+};
+
+struct team_mode {
+	struct list_head list;
+	const char *kind;
+	struct module *owner;
+	size_t priv_size;
+	const struct team_mode_ops *ops;
+};
+
+#define TEAM_PORT_HASHBITS 4
+#define TEAM_PORT_HASHENTRIES (1 << TEAM_PORT_HASHBITS)
+
+#define TEAM_MODE_PRIV_LONGS 4
+#define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS)
+
+struct team {
+	struct net_device *dev; /* associated netdevice */
+	struct team_pcpu_stats __percpu *pcpu_stats;
+
+	spinlock_t lock; /* used for overall locking, e.g. port lists write */
+
+	/*
+	 * port lists with port count
+	 */
+	int port_count;
+	struct hlist_head port_hlist[TEAM_PORT_HASHENTRIES];
+	struct list_head port_list;
+
+	struct list_head option_list;
+
+	const struct team_mode *mode;
+	struct team_mode_ops ops;
+	long mode_priv[TEAM_MODE_PRIV_LONGS];
+};
+
+static inline struct hlist_head *team_port_index_hash(struct team *team,
+						      int port_index)
+{
+	return &team->port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)];
+}
+
+static inline struct team_port *team_get_port_by_index(struct team *team,
+						       int port_index)
+{
+	struct hlist_node *p;
+	struct team_port *port;
+	struct hlist_head *head = team_port_index_hash(team, port_index);
+
+	hlist_for_each_entry(port, p, head, hlist)
+		if (port->index == port_index)
+			return port;
+	return NULL;
+}
+static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
+							   int port_index)
+{
+	struct hlist_node *p;
+	struct team_port *port;
+	struct hlist_head *head = team_port_index_hash(team, port_index);
+
+	hlist_for_each_entry_rcu(port, p, head, hlist)
+		if (port->index == port_index)
+			return port;
+	return NULL;
+}
+
+extern int team_port_set_team_mac(struct team_port *port);
+extern void team_options_register(struct team *team,
+				  struct team_option *option,
+				  size_t option_count);
+extern void team_options_unregister(struct team *team,
+				    struct team_option *option,
+				    size_t option_count);
+extern int team_mode_register(struct team_mode *mode);
+extern int team_mode_unregister(struct team_mode *mode);
+
+#endif /* __KERNEL__ */
+
+#define TEAM_STRING_MAX_LEN 32
+
+/**********************************
+ * NETLINK_GENERIC netlink family.
+ **********************************/
+
+enum {
+	TEAM_CMD_NOOP,
+	TEAM_CMD_OPTIONS_SET,
+	TEAM_CMD_OPTIONS_GET,
+	TEAM_CMD_PORT_LIST_GET,
+
+	__TEAM_CMD_MAX,
+	TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1),
+};
+
+enum {
+	TEAM_ATTR_UNSPEC,
+	TEAM_ATTR_TEAM_IFINDEX,		/* u32 */
+	TEAM_ATTR_LIST_OPTION,		/* nest */
+	TEAM_ATTR_LIST_PORT,		/* nest */
+
+	__TEAM_ATTR_MAX,
+	TEAM_ATTR_MAX = __TEAM_ATTR_MAX - 1,
+};
+
+/* Nested layout of get/set msg:
+ *
+ *	[TEAM_ATTR_LIST_OPTION]
+ *		[TEAM_ATTR_ITEM_OPTION]
+ *			[TEAM_ATTR_OPTION_*], ...
+ *		[TEAM_ATTR_ITEM_OPTION]
+ *			[TEAM_ATTR_OPTION_*], ...
+ *		...
+ *	[TEAM_ATTR_LIST_PORT]
+ *		[TEAM_ATTR_ITEM_PORT]
+ *			[TEAM_ATTR_PORT_*], ...
+ *		[TEAM_ATTR_ITEM_PORT]
+ *			[TEAM_ATTR_PORT_*], ...
+ *		...
+ */
+
+enum {
+	TEAM_ATTR_ITEM_OPTION_UNSPEC,
+	TEAM_ATTR_ITEM_OPTION,		/* nest */
+
+	__TEAM_ATTR_ITEM_OPTION_MAX,
+	TEAM_ATTR_ITEM_OPTION_MAX = __TEAM_ATTR_ITEM_OPTION_MAX - 1,
+};
+
+enum {
+	TEAM_ATTR_OPTION_UNSPEC,
+	TEAM_ATTR_OPTION_NAME,		/* string */
+	TEAM_ATTR_OPTION_CHANGED,	/* flag */
+	TEAM_ATTR_OPTION_TYPE,		/* u8 */
+	TEAM_ATTR_OPTION_DATA,		/* dynamic */
+
+	__TEAM_ATTR_OPTION_MAX,
+	TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1,
+};
+
+enum {
+	TEAM_ATTR_ITEM_PORT_UNSPEC,
+	TEAM_ATTR_ITEM_PORT,		/* nest */
+
+	__TEAM_ATTR_ITEM_PORT_MAX,
+	TEAM_ATTR_ITEM_PORT_MAX = __TEAM_ATTR_ITEM_PORT_MAX - 1,
+};
+
+enum {
+	TEAM_ATTR_PORT_UNSPEC,
+	TEAM_ATTR_PORT_IFINDEX,		/* u32 */
+	TEAM_ATTR_PORT_CHANGED,		/* flag */
+	TEAM_ATTR_PORT_LINKUP,		/* flag */
+	TEAM_ATTR_PORT_SPEED,		/* u32 */
+	TEAM_ATTR_PORT_DUPLEX,		/* u8 */
+
+	__TEAM_ATTR_PORT_MAX,
+	TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1,
+};
+
+/*
+ * NETLINK_GENERIC related info
+ */
+#define TEAM_GENL_NAME "team"
+#define TEAM_GENL_VERSION 0x1
+#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event"
+
+#endif /* _LINUX_IF_TEAM_H_ */
-- 
cgit v1.2.3


From 8b5c171bb3dc0686b2647a84e990199c5faa9ef8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Nov 2011 12:07:14 +0000
Subject: neigh: new unresolved queue limits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le mercredi 09 novembre 2011 à 16:21 -0500, David Miller a écrit :
> From: David Miller <davem@davemloft.net>
> Date: Wed, 09 Nov 2011 16:16:44 -0500 (EST)
>
> > From: Eric Dumazet <eric.dumazet@gmail.com>
> > Date: Wed, 09 Nov 2011 12:14:09 +0100
> >
> >> unres_qlen is the number of frames we are able to queue per unresolved
> >> neighbour. Its default value (3) was never changed and is responsible
> >> for strange drops, especially if IP fragments are used, or multiple
> >> sessions start in parallel. Even a single tcp flow can hit this limit.
> >  ...
> >
> > Ok, I've applied this, let's see what happens :-)
>
> Early answer, build fails.
>
> Please test build this patch with DECNET enabled and resubmit.  The
> decnet neigh layer still refers to the removed ->queue_len member.
>
> Thanks.

Ouch, this was fixed on one machine yesterday, but not the other one I
used this morning, sorry.

[PATCH V5 net-next] neigh: new unresolved queue limits

unres_qlen is the number of frames we are able to queue per unresolved
neighbour. Its default value (3) was never changed and is responsible
for strange drops, especially if IP fragments are used, or multiple
sessions start in parallel. Even a single tcp flow can hit this limit.

$ arp -d 192.168.20.108 ; ping -c 2 -s 8000 192.168.20.108
PING 192.168.20.108 (192.168.20.108) 8000(8028) bytes of data.
8008 bytes from 192.168.20.108: icmp_seq=2 ttl=64 time=0.322 ms

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  10 ++
 include/linux/neighbour.h              |   1 +
 include/net/neighbour.h                |   3 +-
 net/atm/clip.c                         |   2 +-
 net/core/neighbour.c                   | 162 ++++++++++++++++++++++-----------
 net/decnet/dn_neigh.c                  |   2 +-
 net/ipv4/arp.c                         |   2 +-
 net/ipv6/ndisc.c                       |   2 +-
 8 files changed, 128 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f049a1ca186f..b8867061fce4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -31,6 +31,16 @@ neigh/default/gc_thresh3 - INTEGER
 	when using large numbers of interfaces and when communicating
 	with large numbers of directly-connected peers.
 
+neigh/default/unres_qlen_bytes - INTEGER
+	The maximum number of bytes which may be used by packets
+	queued for each	unresolved address by other network layers.
+	(added in linux 3.3)
+
+neigh/default/unres_qlen - INTEGER
+	The maximum number of packets which may be queued for each
+	unresolved address by other network layers.
+	(deprecated in linux 3.3) : use unres_qlen_bytes instead.
+
 mtu_expires - INTEGER
 	Time, in seconds, that cached PMTU information is kept.
 
diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
index a7003b7a695d..b188f68a08c9 100644
--- a/include/linux/neighbour.h
+++ b/include/linux/neighbour.h
@@ -116,6 +116,7 @@ enum {
 	NDTPA_PROXY_DELAY,		/* u64, msecs */
 	NDTPA_PROXY_QLEN,		/* u32 */
 	NDTPA_LOCKTIME,			/* u64, msecs */
+	NDTPA_QUEUE_LENBYTES,		/* u32 */
 	__NDTPA_MAX
 };
 #define NDTPA_MAX (__NDTPA_MAX - 1)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 2720884287c3..7ae5acff96e9 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -59,7 +59,7 @@ struct neigh_parms {
 	int	reachable_time;
 	int	delay_probe_time;
 
-	int	queue_len;
+	int	queue_len_bytes;
 	int	ucast_probes;
 	int	app_probes;
 	int	mcast_probes;
@@ -99,6 +99,7 @@ struct neighbour {
 	rwlock_t		lock;
 	atomic_t		refcnt;
 	struct sk_buff_head	arp_queue;
+	unsigned int		arp_queue_len_bytes;
 	struct timer_list	timer;
 	unsigned long		used;
 	atomic_t		probes;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 852394072fa1..32c41b8a803e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -329,7 +329,7 @@ static struct neigh_table clip_tbl = {
 		.gc_staletime 		= 60 * HZ,
 		.reachable_time 	= 30 * HZ,
 		.delay_probe_time 	= 5 * HZ,
-		.queue_len 		= 3,
+		.queue_len_bytes 	= 64 * 1024,
 		.ucast_probes 		= 3,
 		.mcast_probes 		= 3,
 		.anycast_delay 		= 1 * HZ,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 039d51e6c284..2684794458ca 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -238,6 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 				   it to safe state.
 				 */
 				skb_queue_purge(&n->arp_queue);
+				n->arp_queue_len_bytes = 0;
 				n->output = neigh_blackhole;
 				if (n->nud_state & NUD_VALID)
 					n->nud_state = NUD_NOARP;
@@ -702,6 +703,7 @@ void neigh_destroy(struct neighbour *neigh)
 		printk(KERN_WARNING "Impossible event.\n");
 
 	skb_queue_purge(&neigh->arp_queue);
+	neigh->arp_queue_len_bytes = 0;
 
 	dev_put(neigh->dev);
 	neigh_parms_put(neigh->parms);
@@ -842,6 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh)
 		write_lock(&neigh->lock);
 	}
 	skb_queue_purge(&neigh->arp_queue);
+	neigh->arp_queue_len_bytes = 0;
 }
 
 static void neigh_probe(struct neighbour *neigh)
@@ -980,15 +983,20 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 
 	if (neigh->nud_state == NUD_INCOMPLETE) {
 		if (skb) {
-			if (skb_queue_len(&neigh->arp_queue) >=
-			    neigh->parms->queue_len) {
+			while (neigh->arp_queue_len_bytes + skb->truesize >
+			       neigh->parms->queue_len_bytes) {
 				struct sk_buff *buff;
+
 				buff = __skb_dequeue(&neigh->arp_queue);
+				if (!buff)
+					break;
+				neigh->arp_queue_len_bytes -= buff->truesize;
 				kfree_skb(buff);
 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
 			}
 			skb_dst_force(skb);
 			__skb_queue_tail(&neigh->arp_queue, skb);
+			neigh->arp_queue_len_bytes += skb->truesize;
 		}
 		rc = 1;
 	}
@@ -1175,6 +1183,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 			write_lock_bh(&neigh->lock);
 		}
 		skb_queue_purge(&neigh->arp_queue);
+		neigh->arp_queue_len_bytes = 0;
 	}
 out:
 	if (update_isrouter) {
@@ -1747,7 +1756,11 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
 
 	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
-	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+	NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
+	/* approximative value for deprecated QUEUE_LEN (in packets) */
+	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
+		    DIV_ROUND_UP(parms->queue_len_bytes,
+				 SKB_TRUESIZE(ETH_FRAME_LEN)));
 	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
 	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
 	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
@@ -1974,7 +1987,11 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 			switch (i) {
 			case NDTPA_QUEUE_LEN:
-				p->queue_len = nla_get_u32(tbp[i]);
+				p->queue_len_bytes = nla_get_u32(tbp[i]) *
+						     SKB_TRUESIZE(ETH_FRAME_LEN);
+				break;
+			case NDTPA_QUEUE_LENBYTES:
+				p->queue_len_bytes = nla_get_u32(tbp[i]);
 				break;
 			case NDTPA_PROXY_QLEN:
 				p->proxy_qlen = nla_get_u32(tbp[i]);
@@ -2635,117 +2652,158 @@ EXPORT_SYMBOL(neigh_app_ns);
 
 #ifdef CONFIG_SYSCTL
 
-#define NEIGH_VARS_MAX 19
+static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
+			   size_t *lenp, loff_t *ppos)
+{
+	int size, ret;
+	ctl_table tmp = *ctl;
+
+	tmp.data = &size;
+	size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
+	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret)
+		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
+	return ret;
+}
+
+enum {
+	NEIGH_VAR_MCAST_PROBE,
+	NEIGH_VAR_UCAST_PROBE,
+	NEIGH_VAR_APP_PROBE,
+	NEIGH_VAR_RETRANS_TIME,
+	NEIGH_VAR_BASE_REACHABLE_TIME,
+	NEIGH_VAR_DELAY_PROBE_TIME,
+	NEIGH_VAR_GC_STALETIME,
+	NEIGH_VAR_QUEUE_LEN,
+	NEIGH_VAR_QUEUE_LEN_BYTES,
+	NEIGH_VAR_PROXY_QLEN,
+	NEIGH_VAR_ANYCAST_DELAY,
+	NEIGH_VAR_PROXY_DELAY,
+	NEIGH_VAR_LOCKTIME,
+	NEIGH_VAR_RETRANS_TIME_MS,
+	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
+	NEIGH_VAR_GC_INTERVAL,
+	NEIGH_VAR_GC_THRESH1,
+	NEIGH_VAR_GC_THRESH2,
+	NEIGH_VAR_GC_THRESH3,
+	NEIGH_VAR_MAX
+};
 
 static struct neigh_sysctl_table {
 	struct ctl_table_header *sysctl_header;
-	struct ctl_table neigh_vars[NEIGH_VARS_MAX];
+	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
 	char *dev_name;
 } neigh_sysctl_template __read_mostly = {
 	.neigh_vars = {
-		{
+		[NEIGH_VAR_MCAST_PROBE] = {
 			.procname	= "mcast_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_UCAST_PROBE] = {
 			.procname	= "ucast_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_APP_PROBE] = {
 			.procname	= "app_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_RETRANS_TIME] = {
 			.procname	= "retrans_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
-		{
+		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
 			.procname	= "base_reachable_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
 		},
-		{
+		[NEIGH_VAR_DELAY_PROBE_TIME] = {
 			.procname	= "delay_first_probe_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
 		},
-		{
+		[NEIGH_VAR_GC_STALETIME] = {
 			.procname	= "gc_stale_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
 		},
-		{
+		[NEIGH_VAR_QUEUE_LEN] = {
 			.procname	= "unres_qlen",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
+			.proc_handler	= proc_unres_qlen,
+		},
+		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
+			.procname	= "unres_qlen_bytes",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_PROXY_QLEN] = {
 			.procname	= "proxy_qlen",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_ANYCAST_DELAY] = {
 			.procname	= "anycast_delay",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
-		{
+		[NEIGH_VAR_PROXY_DELAY] = {
 			.procname	= "proxy_delay",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
-		{
+		[NEIGH_VAR_LOCKTIME] = {
 			.procname	= "locktime",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
-		{
+		[NEIGH_VAR_RETRANS_TIME_MS] = {
 			.procname	= "retrans_time_ms",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_ms_jiffies,
 		},
-		{
+		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
 			.procname	= "base_reachable_time_ms",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_ms_jiffies,
 		},
-		{
+		[NEIGH_VAR_GC_INTERVAL] = {
 			.procname	= "gc_interval",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
 		},
-		{
+		[NEIGH_VAR_GC_THRESH1] = {
 			.procname	= "gc_thresh1",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_GC_THRESH2] = {
 			.procname	= "gc_thresh2",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_GC_THRESH3] = {
 			.procname	= "gc_thresh3",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
@@ -2778,47 +2836,49 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 	if (!t)
 		goto err;
 
-	t->neigh_vars[0].data  = &p->mcast_probes;
-	t->neigh_vars[1].data  = &p->ucast_probes;
-	t->neigh_vars[2].data  = &p->app_probes;
-	t->neigh_vars[3].data  = &p->retrans_time;
-	t->neigh_vars[4].data  = &p->base_reachable_time;
-	t->neigh_vars[5].data  = &p->delay_probe_time;
-	t->neigh_vars[6].data  = &p->gc_staletime;
-	t->neigh_vars[7].data  = &p->queue_len;
-	t->neigh_vars[8].data  = &p->proxy_qlen;
-	t->neigh_vars[9].data  = &p->anycast_delay;
-	t->neigh_vars[10].data = &p->proxy_delay;
-	t->neigh_vars[11].data = &p->locktime;
-	t->neigh_vars[12].data  = &p->retrans_time;
-	t->neigh_vars[13].data  = &p->base_reachable_time;
+	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
+	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
+	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
+	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
+	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
+	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
+	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
+	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
+	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
+	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
+	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
+	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
+	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
+	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
+	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
 
 	if (dev) {
 		dev_name_source = dev->name;
 		/* Terminate the table early */
-		memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
+		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
+		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
 	} else {
 		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
-		t->neigh_vars[14].data = (int *)(p + 1);
-		t->neigh_vars[15].data = (int *)(p + 1) + 1;
-		t->neigh_vars[16].data = (int *)(p + 1) + 2;
-		t->neigh_vars[17].data = (int *)(p + 1) + 3;
+		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
+		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
+		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
+		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
 	}
 
 
 	if (handler) {
 		/* RetransTime */
-		t->neigh_vars[3].proc_handler = handler;
-		t->neigh_vars[3].extra1 = dev;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
 		/* ReachableTime */
-		t->neigh_vars[4].proc_handler = handler;
-		t->neigh_vars[4].extra1 = dev;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
 		/* RetransTime (in milliseconds)*/
-		t->neigh_vars[12].proc_handler = handler;
-		t->neigh_vars[12].extra1 = dev;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
 		/* ReachableTime (in milliseconds) */
-		t->neigh_vars[13].proc_handler = handler;
-		t->neigh_vars[13].extra1 = dev;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
 	}
 
 	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 7f0eb087dc11..3532ac64c82d 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -107,7 +107,7 @@ struct neigh_table dn_neigh_table = {
 		.gc_staletime =	60 * HZ,
 		.reachable_time =		30 * HZ,
 		.delay_probe_time =	5 * HZ,
-		.queue_len =		3,
+		.queue_len_bytes =	64*1024,
 		.ucast_probes =	0,
 		.app_probes =		0,
 		.mcast_probes =	0,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96a164aa1367..d732827b32b9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -177,7 +177,7 @@ struct neigh_table arp_tbl = {
 		.gc_staletime		= 60 * HZ,
 		.reachable_time		= 30 * HZ,
 		.delay_probe_time	= 5 * HZ,
-		.queue_len		= 3,
+		.queue_len_bytes	= 64*1024,
 		.ucast_probes		= 3,
 		.mcast_probes		= 3,
 		.anycast_delay		= 1 * HZ,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 44e5b7f2a6c1..4a2098222625 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -141,7 +141,7 @@ struct neigh_table nd_tbl = {
 		.gc_staletime		= 60 * HZ,
 		.reachable_time		= ND_REACHABLE_TIME,
 		.delay_probe_time	= 5 * HZ,
-		.queue_len		= 3,
+		.queue_len_bytes	= 64*1024,
 		.ucast_probes		= 3,
 		.mcast_probes		= 3,
 		.anycast_delay		= 1 * HZ,
-- 
cgit v1.2.3


From f1c6f1a7eed963ed233ba4c8b6fa8addb86c6ddc Mon Sep 17 00:00:00 2001
From: Carsten Emde <C.Emde@osadl.org>
Date: Wed, 26 Oct 2011 23:14:16 +0200
Subject: sched: Set the command name of the idle tasks in SMP kernels

In UP systems, the idle task is initialized using the init_task
structure from which the command name is taken (currently "swapper").

In SMP systems, one idle task per CPU is forked by the worker thread
from which the task structure is copied. The command name is, therefore,
"kworker/0:0" or "kworker/0:1", if not updated. Since such update was
lacking, all idle tasks in SMP systems were incorrectly named. This
longtime bug was not discovered immediately, because there is no /proc/0
entry - the bug only becomes apparent when tracing is enabled.

This patch sets the command name of the idle tasks in SMP systems to the
name that is used in the INIT_TASK structure suffixed by a slash and the
number of the CPU.

Signed-off-by: Carsten Emde <C.Emde@osadl.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111026211708.768925506@osadl.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h | 4 +++-
 kernel/sched.c            | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 08ffab01e76c..b6e5b8b000e0 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -126,6 +126,8 @@ extern struct cred init_cred;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
+#define INIT_TASK_COMM "swapper"
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -162,7 +164,7 @@ extern struct cred init_cred;
 	.group_leader	= &tsk,						\
 	RCU_INIT_POINTER(.real_cred, &init_cred),			\
 	RCU_INIT_POINTER(.cred, &init_cred),				\
-	.comm		= "swapper",					\
+	.comm		= INIT_TASK_COMM,				\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
 	.files		= &init_files,					\
diff --git a/kernel/sched.c b/kernel/sched.c
index 3d2c436959a1..d6b149ccf925 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
 #include <linux/slab.h>
+#include <linux/init_task.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -6112,6 +6113,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	 */
 	idle->sched_class = &idle_sched_class;
 	ftrace_graph_init_idle_task(idle, cpu);
+#if defined(CONFIG_SMP)
+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
+#endif
 }
 
 /*
-- 
cgit v1.2.3


From 94d24fc47219219b5aa23b45956cc37ee5aa5b01 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 7 Jun 2011 11:17:30 +0200
Subject: printk, lockdep: Disable lock debugging on zap_locks()

zap_locks() is used by printk() in a last ditch effort to get data
out, clearly we cannot trust lock state after this so make it disable
lock debugging.

Also don't treat printk recursion through lockdep as a normal
recursion bug but try hard to get the lockdep splat out.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-kqxwmo4xz37e1s8w0xopvr0q@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 4 ++++
 kernel/printk.c         | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b6a56e37284c..d36619ead3ba 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -343,6 +343,8 @@ extern void lockdep_trace_alloc(gfp_t mask);
 
 #define lockdep_assert_held(l)	WARN_ON(debug_locks && !lockdep_is_held(l))
 
+#define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
+
 #else /* !LOCKDEP */
 
 static inline void lockdep_off(void)
@@ -392,6 +394,8 @@ struct lock_class_key { };
 
 #define lockdep_assert_held(l)			do { } while (0)
 
+#define lockdep_recursing(tsk)			(0)
+
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_LOCK_STAT
diff --git a/kernel/printk.c b/kernel/printk.c
index 1455a0d4eedd..6d087944e72a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -688,6 +688,7 @@ static void zap_locks(void)
 
 	oops_timestamp = jiffies;
 
+	debug_locks_off();
 	/* If a crash is occurring, make sure we can't deadlock */
 	raw_spin_lock_init(&logbuf_lock);
 	/* And make sure that we print immediately */
@@ -856,7 +857,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 		 * recursion and return - but flag the recursion so that
 		 * it can be printed at the next appropriate moment:
 		 */
-		if (!oops_in_progress) {
+		if (!oops_in_progress && !lockdep_recursing(current)) {
 			recursion_bug = 1;
 			goto out_restore_irqs;
 		}
-- 
cgit v1.2.3


From b2b5ce9d1ccf1c45f8ac68e5d901112ab76ba199 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Nov 2011 06:03:34 +0000
Subject: net: introduce build_skb()

One of the thing we discussed during netdev 2011 conference was the idea
to change some network drivers to allocate/populate their skb at RX
completion time, right before feeding the skb to network stack.

In old days, we allocated skbs when populating the RX ring.

This means bringing into cpu cache sk_buff and skb_shared_info cache
lines (since we clear/initialize them), then 'queue' skb->data to NIC.

By the time NIC fills a frame in skb->data buffer and host can process
it, cpu probably threw away the cache lines from its caches, because lot
of things happened between the allocation and final use.

So the deal would be to allocate only the data buffer for the NIC to
populate its RX ring buffer. And use build_skb() at RX completion to
attach a data buffer (now filled with an ethernet frame) to a new skb,
initialize the skb_shared_info portion, and give the hot skb to network
stack.

build_skb() is the function to allocate an skb, caller providing the
data buffer that should be attached to it. Drivers are expected to call
skb_reserve() right after build_skb() to adjust skb->data to the
Ethernet frame (usually skipping NET_SKB_PAD and NET_IP_ALIGN, but some
drivers might add a hardware provided alignment)

Data provided to build_skb() MUST have been allocated by a prior
kmalloc() call, with enough room to add SKB_DATA_ALIGN(sizeof(struct
skb_shared_info)) bytes at the end of the data without corrupting
incoming frame.

data = kmalloc(NET_SKB_PAD + NET_IP_ALIGN + 1536 +
               SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
	       GFP_ATOMIC);
...
skb = build_skb(data);
if (!skb) {
	recycle_data(data);
} else {
	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
	...
}

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Eilon Greenstein <eilong@broadcom.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
CC: Tom Herbert <therbert@google.com>
CC: Jamal Hadi Salim <hadi@mojatatu.com>
CC: Stephen Hemminger <shemminger@vyatta.com>
CC: Thomas Graf <tgraf@infradead.org>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  1 +
 net/core/skbuff.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fe864885c1ed..abad8a0941e8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -540,6 +540,7 @@ extern void consume_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *__alloc_skb(unsigned int size,
 				   gfp_t priority, int fclone, int node);
+extern struct sk_buff *build_skb(void *data);
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 18a3cebb753d..8d2c5b32f172 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -244,6 +244,55 @@ nodata:
 }
 EXPORT_SYMBOL(__alloc_skb);
 
+/**
+ * build_skb - build a network buffer
+ * @data: data buffer provided by caller
+ *
+ * Allocate a new &sk_buff. Caller provides space holding head and
+ * skb_shared_info. @data must have been allocated by kmalloc()
+ * The return is the new skb buffer.
+ * On a failure the return is %NULL, and @data is not freed.
+ * Notes :
+ *  Before IO, driver allocates only data buffer where NIC put incoming frame
+ *  Driver should add room at head (NET_SKB_PAD) and
+ *  MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
+ *  After IO, driver calls build_skb(), to allocate sk_buff and populate it
+ *  before giving packet to stack.
+ *  RX rings only contains data buffers, not full skbs.
+ */
+struct sk_buff *build_skb(void *data)
+{
+	struct skb_shared_info *shinfo;
+	struct sk_buff *skb;
+	unsigned int size;
+
+	skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	memset(skb, 0, offsetof(struct sk_buff, tail));
+	skb->truesize = SKB_TRUESIZE(size);
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb_reset_tail_pointer(skb);
+	skb->end = skb->tail + size;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	skb->mac_header = ~0U;
+#endif
+
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+	atomic_set(&shinfo->dataref, 1);
+	kmemcheck_annotate_variable(shinfo->destructor_arg);
+
+	return skb;
+}
+EXPORT_SYMBOL(build_skb);
+
 /**
  *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
-- 
cgit v1.2.3


From 968b822c0023861ef6e4e15bb68582b36e89ad29 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 3 Nov 2011 12:03:38 -0400
Subject: USB: Remove the SAW_IRQ hcd flag

The HCD_FLAG_SAW_IRQ flag was introduced in order to catch IRQ routing
errors: If an URB was unlinked and the host controller hadn't gotten
any IRQs, it seemed likely that the IRQs were directed to the wrong
vector.

This warning hasn't come up in many years, as far as I know; interrupt
routing now seems to be well under control.  Therefore there's no
reason to keep the flag around any more.  This patch (as1495) finally
removes it.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/c67x00/c67x00-hcd.c |  1 -
 drivers/usb/core/hcd-pci.c      |  4 ----
 drivers/usb/core/hcd.c          | 24 +++---------------------
 drivers/usb/host/hwa-hc.c       |  1 -
 drivers/usb/host/xhci-ring.c    | 12 +-----------
 drivers/usb/musb/musb_core.c    |  1 -
 include/linux/usb/hcd.h         |  2 --
 7 files changed, 4 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/c67x00/c67x00-hcd.c b/drivers/usb/c67x00/c67x00-hcd.c
index d3e1356d091e..75e47b860a53 100644
--- a/drivers/usb/c67x00/c67x00-hcd.c
+++ b/drivers/usb/c67x00/c67x00-hcd.c
@@ -271,7 +271,6 @@ static void c67x00_hcd_irq(struct c67x00_sie *sie, u16 int_status, u16 msg)
 	if (int_status & SOFEOP_FLG(sie->sie_num)) {
 		c67x00_ll_usb_clear_status(sie, SOF_EOP_IRQ_FLG);
 		c67x00_sched_kick(c67x00);
-		set_bit(HCD_FLAG_SAW_IRQ, &hcd->flags);
 	}
 }
 
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index a004db35f6d0..d136b8f4c8a7 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -453,10 +453,6 @@ static int resume_common(struct device *dev, int event)
 
 	pci_set_master(pci_dev);
 
-	clear_bit(HCD_FLAG_SAW_IRQ, &hcd->flags);
-	if (hcd->shared_hcd)
-		clear_bit(HCD_FLAG_SAW_IRQ, &hcd->shared_hcd->flags);
-
 	if (hcd->driver->pci_resume && !HCD_DEAD(hcd)) {
 		if (event != PM_EVENT_AUTO_RESUME)
 			wait_for_companions(pci_dev, hcd);
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 13222d352a61..43a89e4ba928 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1168,20 +1168,6 @@ int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb,
 	if (urb->unlinked)
 		return -EBUSY;
 	urb->unlinked = status;
-
-	/* IRQ setup can easily be broken so that USB controllers
-	 * never get completion IRQs ... maybe even the ones we need to
-	 * finish unlinking the initial failed usb_set_address()
-	 * or device descriptor fetch.
-	 */
-	if (!HCD_SAW_IRQ(hcd) && !is_root_hub(urb->dev)) {
-		dev_warn(hcd->self.controller, "Unlink after no-IRQ?  "
-			"Controller is probably using the wrong IRQ.\n");
-		set_bit(HCD_FLAG_SAW_IRQ, &hcd->flags);
-		if (hcd->shared_hcd)
-			set_bit(HCD_FLAG_SAW_IRQ, &hcd->shared_hcd->flags);
-	}
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(usb_hcd_check_unlink_urb);
@@ -2148,16 +2134,12 @@ irqreturn_t usb_hcd_irq (int irq, void *__hcd)
 	 */
 	local_irq_save(flags);
 
-	if (unlikely(HCD_DEAD(hcd) || !HCD_HW_ACCESSIBLE(hcd))) {
+	if (unlikely(HCD_DEAD(hcd) || !HCD_HW_ACCESSIBLE(hcd)))
 		rc = IRQ_NONE;
-	} else if (hcd->driver->irq(hcd) == IRQ_NONE) {
+	else if (hcd->driver->irq(hcd) == IRQ_NONE)
 		rc = IRQ_NONE;
-	} else {
-		set_bit(HCD_FLAG_SAW_IRQ, &hcd->flags);
-		if (hcd->shared_hcd)
-			set_bit(HCD_FLAG_SAW_IRQ, &hcd->shared_hcd->flags);
+	else
 		rc = IRQ_HANDLED;
-	}
 
 	local_irq_restore(flags);
 	return rc;
diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 9bfac657572e..565d79f06e6f 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -776,7 +776,6 @@ static int hwahc_probe(struct usb_interface *usb_iface,
 		goto error_alloc;
 	}
 	usb_hcd->wireless = 1;
-	set_bit(HCD_FLAG_SAW_IRQ, &usb_hcd->flags);
 	wusbhc = usb_hcd_to_wusbhc(usb_hcd);
 	hwahc = container_of(wusbhc, struct hwahc, wusbhc);
 	hwahc_init(hwahc);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 940321b3ec68..2f8c17381c6c 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2389,17 +2389,7 @@ hw_died:
 
 irqreturn_t xhci_msi_irq(int irq, struct usb_hcd *hcd)
 {
-	irqreturn_t ret;
-	struct xhci_hcd *xhci;
-
-	xhci = hcd_to_xhci(hcd);
-	set_bit(HCD_FLAG_SAW_IRQ, &hcd->flags);
-	if (xhci->shared_hcd)
-		set_bit(HCD_FLAG_SAW_IRQ, &xhci->shared_hcd->flags);
-
-	ret = xhci_irq(hcd);
-
-	return ret;
+	return xhci_irq(hcd);
 }
 
 /****		Endpoint Ring Operations	****/
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 20a28731c338..12044c473c38 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -661,7 +661,6 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 
 		handled = IRQ_HANDLED;
 		musb->is_active = 1;
-		set_bit(HCD_FLAG_SAW_IRQ, &hcd->flags);
 
 		musb->ep0_stage = MUSB_EP0_START;
 
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 03354d557b79..b2f62f3a32af 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -99,7 +99,6 @@ struct usb_hcd {
 	 */
 	unsigned long		flags;
 #define HCD_FLAG_HW_ACCESSIBLE		0	/* at full power */
-#define HCD_FLAG_SAW_IRQ		1
 #define HCD_FLAG_POLL_RH		2	/* poll for rh status? */
 #define HCD_FLAG_POLL_PENDING		3	/* status has changed? */
 #define HCD_FLAG_WAKEUP_PENDING		4	/* root hub is resuming? */
@@ -110,7 +109,6 @@ struct usb_hcd {
 	 * be slightly faster than test_bit().
 	 */
 #define HCD_HW_ACCESSIBLE(hcd)	((hcd)->flags & (1U << HCD_FLAG_HW_ACCESSIBLE))
-#define HCD_SAW_IRQ(hcd)	((hcd)->flags & (1U << HCD_FLAG_SAW_IRQ))
 #define HCD_POLL_RH(hcd)	((hcd)->flags & (1U << HCD_FLAG_POLL_RH))
 #define HCD_POLL_PENDING(hcd)	((hcd)->flags & (1U << HCD_FLAG_POLL_PENDING))
 #define HCD_WAKEUP_PENDING(hcd)	((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING))
-- 
cgit v1.2.3


From 157d2644cb0c1e71a18baaffca56d2b1d0ebf10f Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Mon, 17 Oct 2011 20:37:52 +0800
Subject: ARM: pxa: change gpio to platform device

Remove most gpio macros and change gpio driver to platform driver.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
---
 arch/arm/Kconfig                           |   2 +
 arch/arm/mach-mmp/aspenite.c               |   1 +
 arch/arm/mach-mmp/avengers_lite.c          |   1 +
 arch/arm/mach-mmp/brownstone.c             |   1 +
 arch/arm/mach-mmp/flint.c                  |   1 +
 arch/arm/mach-mmp/gplugd.c                 |   1 +
 arch/arm/mach-mmp/include/mach/gpio.h      |   3 +-
 arch/arm/mach-mmp/include/mach/mmp2.h      |   2 +
 arch/arm/mach-mmp/include/mach/pxa168.h    |   2 +
 arch/arm/mach-mmp/include/mach/pxa910.h    |   2 +
 arch/arm/mach-mmp/mmp2.c                   |  28 ++++--
 arch/arm/mach-mmp/pxa168.c                 |  29 +++++--
 arch/arm/mach-mmp/pxa910.c                 |  29 +++++--
 arch/arm/mach-mmp/tavorevb.c               |   1 +
 arch/arm/mach-mmp/teton_bga.c              |   1 +
 arch/arm/mach-mmp/ttc_dkb.c                |   1 +
 arch/arm/mach-pxa/devices.c                |  30 +++++++
 arch/arm/mach-pxa/devices.h                |   1 +
 arch/arm/mach-pxa/include/mach/gpio-pxa.h  | 131 ----------------------------
 arch/arm/mach-pxa/include/mach/gpio.h      |   5 +-
 arch/arm/mach-pxa/include/mach/idp.h       |   2 -
 arch/arm/mach-pxa/include/mach/littleton.h |   2 -
 arch/arm/mach-pxa/irq.c                    |   3 +-
 arch/arm/mach-pxa/mfp-pxa2xx.c             |   6 +-
 arch/arm/mach-pxa/pxa25x.c                 |   3 +-
 arch/arm/mach-pxa/pxa27x.c                 |   3 +-
 arch/arm/mach-pxa/pxa3xx.c                 |   6 +-
 arch/arm/mach-pxa/pxa95x.c                 |   3 +-
 arch/arm/plat-pxa/include/plat/gpio-pxa.h  |  45 ----------
 drivers/gpio/Kconfig                       |   6 ++
 drivers/gpio/Makefile                      |   2 +-
 drivers/gpio/gpio-pxa.c                    | 135 ++++++++++++++++++++++++++---
 include/linux/gpio-pxa.h                   |  16 ++++
 33 files changed, 271 insertions(+), 233 deletions(-)
 delete mode 100644 arch/arm/mach-pxa/include/mach/gpio-pxa.h
 delete mode 100644 arch/arm/plat-pxa/include/plat/gpio-pxa.h
 create mode 100644 include/linux/gpio-pxa.h

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 44789eff983f..57e16d4e14dc 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -591,6 +591,7 @@ config ARCH_MMP
 	select ARCH_REQUIRE_GPIOLIB
 	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
+	select GPIO_PXA
 	select HAVE_SCHED_CLOCK
 	select TICK_ONESHOT
 	select PLAT_PXA
@@ -673,6 +674,7 @@ config ARCH_PXA
 	select CLKSRC_MMIO
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_CLOCKEVENTS
+	select GPIO_PXA
 	select HAVE_SCHED_CLOCK
 	select TICK_ONESHOT
 	select PLAT_PXA
diff --git a/arch/arm/mach-mmp/aspenite.c b/arch/arm/mach-mmp/aspenite.c
index fb7dfc157a5c..edcbadad31c1 100644
--- a/arch/arm/mach-mmp/aspenite.c
+++ b/arch/arm/mach-mmp/aspenite.c
@@ -231,6 +231,7 @@ static void __init common_init(void)
 	pxa168_add_nand(&aspenite_nand_info);
 	pxa168_add_fb(&aspenite_lcd_info);
 	pxa168_add_keypad(&aspenite_keypad_info);
+	platform_device_register(&pxa168_device_gpio);
 
 	/* off-chip devices */
 	platform_device_register(&smc91x_device);
diff --git a/arch/arm/mach-mmp/avengers_lite.c b/arch/arm/mach-mmp/avengers_lite.c
index 39f0878d64a0..c5d53e0742e9 100644
--- a/arch/arm/mach-mmp/avengers_lite.c
+++ b/arch/arm/mach-mmp/avengers_lite.c
@@ -38,6 +38,7 @@ static void __init avengers_lite_init(void)
 
 	/* on-chip devices */
 	pxa168_add_uart(2);
+	platform_device_register(&pxa168_device_gpio);
 }
 
 MACHINE_START(AVENGERS_LITE, "PXA168 Avengers lite Development Platform")
diff --git a/arch/arm/mach-mmp/brownstone.c b/arch/arm/mach-mmp/brownstone.c
index 983cfb15fbde..eb07565a06a3 100644
--- a/arch/arm/mach-mmp/brownstone.c
+++ b/arch/arm/mach-mmp/brownstone.c
@@ -202,6 +202,7 @@ static void __init brownstone_init(void)
 	/* on-chip devices */
 	mmp2_add_uart(1);
 	mmp2_add_uart(3);
+	platform_device_register(&mmp2_device_gpio);
 	mmp2_add_twsi(1, NULL, ARRAY_AND_SIZE(brownstone_twsi1_info));
 	mmp2_add_sdhost(0, &mmp2_sdh_platdata_mmc0); /* SD/MMC */
 	mmp2_add_sdhost(2, &mmp2_sdh_platdata_mmc2); /* eMMC */
diff --git a/arch/arm/mach-mmp/flint.c b/arch/arm/mach-mmp/flint.c
index a64c172082c4..c1f0aa88dd8a 100644
--- a/arch/arm/mach-mmp/flint.c
+++ b/arch/arm/mach-mmp/flint.c
@@ -110,6 +110,7 @@ static void __init flint_init(void)
 	/* on-chip devices */
 	mmp2_add_uart(1);
 	mmp2_add_uart(2);
+	platform_device_register(&mmp2_device_gpio);
 
 	/* off-chip devices */
 	platform_device_register(&smc91x_device);
diff --git a/arch/arm/mach-mmp/gplugd.c b/arch/arm/mach-mmp/gplugd.c
index 69156568bc41..933420a7c3ba 100644
--- a/arch/arm/mach-mmp/gplugd.c
+++ b/arch/arm/mach-mmp/gplugd.c
@@ -184,6 +184,7 @@ static void __init gplugd_init(void)
 	pxa168_add_uart(3);
 	pxa168_add_ssp(0);
 	pxa168_add_twsi(0, NULL, ARRAY_AND_SIZE(gplugd_i2c_board_info));
+	platform_device_register(&pxa168_device_gpio);
 
 	pxa168_add_eth(&gplugd_eth_platform_data);
 }
diff --git a/arch/arm/mach-mmp/include/mach/gpio.h b/arch/arm/mach-mmp/include/mach/gpio.h
index 904466d7eb95..13219ebf5128 100644
--- a/arch/arm/mach-mmp/include/mach/gpio.h
+++ b/arch/arm/mach-mmp/include/mach/gpio.h
@@ -3,7 +3,6 @@
 
 #include <asm-generic/gpio.h>
 
-#define __gpio_is_inverted(gpio)	(0)
-#define __gpio_is_occupied(gpio)	(0)
+#include <mach/cputype.h>
 
 #endif /* __ASM_MACH_GPIO_H */
diff --git a/arch/arm/mach-mmp/include/mach/mmp2.h b/arch/arm/mach-mmp/include/mach/mmp2.h
index 2f7b2d3c2b18..cba22fed2265 100644
--- a/arch/arm/mach-mmp/include/mach/mmp2.h
+++ b/arch/arm/mach-mmp/include/mach/mmp2.h
@@ -32,6 +32,8 @@ extern struct pxa_device_desc mmp2_device_sdh3;
 extern struct pxa_device_desc mmp2_device_asram;
 extern struct pxa_device_desc mmp2_device_isram;
 
+extern struct platform_device mmp2_device_gpio;
+
 static inline int mmp2_add_uart(int id)
 {
 	struct pxa_device_desc *d = NULL;
diff --git a/arch/arm/mach-mmp/include/mach/pxa168.h b/arch/arm/mach-mmp/include/mach/pxa168.h
index 7fb568d2845b..f9286089da3a 100644
--- a/arch/arm/mach-mmp/include/mach/pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/pxa168.h
@@ -42,6 +42,8 @@ struct pxa168_usb_pdata {
 /* pdata can be NULL */
 int __init pxa168_add_usb_host(struct pxa168_usb_pdata *pdata);
 
+extern struct platform_device pxa168_device_gpio;
+
 static inline int pxa168_add_uart(int id)
 {
 	struct pxa_device_desc *d = NULL;
diff --git a/arch/arm/mach-mmp/include/mach/pxa910.h b/arch/arm/mach-mmp/include/mach/pxa910.h
index 91be75591398..4de13abef7bb 100644
--- a/arch/arm/mach-mmp/include/mach/pxa910.h
+++ b/arch/arm/mach-mmp/include/mach/pxa910.h
@@ -21,6 +21,8 @@ extern struct pxa_device_desc pxa910_device_pwm3;
 extern struct pxa_device_desc pxa910_device_pwm4;
 extern struct pxa_device_desc pxa910_device_nand;
 
+extern struct platform_device pxa910_device_gpio;
+
 static inline int pxa910_add_uart(int id)
 {
 	struct pxa_device_desc *d = NULL;
diff --git a/arch/arm/mach-mmp/mmp2.c b/arch/arm/mach-mmp/mmp2.c
index 5dd1d4a6aeb9..1ed222d3e22b 100644
--- a/arch/arm/mach-mmp/mmp2.c
+++ b/arch/arm/mach-mmp/mmp2.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/platform_device.h>
 
 #include <asm/hardware/cache-tauros2.h>
 
@@ -24,7 +25,6 @@
 #include <mach/irqs.h>
 #include <mach/dma.h>
 #include <mach/mfp.h>
-#include <mach/gpio-pxa.h>
 #include <mach/devices.h>
 #include <mach/mmp2.h>
 
@@ -33,8 +33,6 @@
 
 #define MFPR_VIRT_BASE	(APB_VIRT_BASE + 0x1e000)
 
-#define APMASK(i)	(GPIO_REGS_VIRT + BANK_OFF(i) + 0x9c)
-
 static struct mfp_addr_map mmp2_addr_map[] __initdata = {
 
 	MFP_ADDR_X(GPIO0, GPIO58, 0x54),
@@ -101,12 +99,6 @@ static void __init mmp2_init_gpio(void)
 
 	/* enable GPIO clock */
 	__raw_writel(APBC_APBCLK | APBC_FNCLK, APBC_MMP2_GPIO);
-
-	/* unmask GPIO edge detection for all 6 banks -- APMASKx */
-	for (i = 0; i < 6; i++)
-		__raw_writel(0xffffffff, APMASK(i));
-
-	pxa_init_gpio(IRQ_MMP2_GPIO, 0, 167, NULL);
 }
 
 void __init mmp2_init_irq(void)
@@ -230,3 +222,21 @@ MMP2_DEVICE(asram, "asram", -1, NONE, 0xe0000000, 0x4000);
 /* 0xd1000000 ~ 0xd101ffff is reserved for secure processor */
 MMP2_DEVICE(isram, "isram", -1, NONE, 0xd1020000, 0x18000);
 
+struct resource mmp2_resource_gpio[] = {
+	{
+		.start	= 0xd4019000,
+		.end	= 0xd4019fff,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_MMP2_GPIO,
+		.end	= IRQ_MMP2_GPIO,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device mmp2_device_gpio = {
+	.name		= "pxa-gpio",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(mmp2_resource_gpio),
+	.resource	= mmp2_resource_gpio,
+};
diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c
index 76ca15c00e45..fefdfe59c07c 100644
--- a/arch/arm/mach-mmp/pxa168.c
+++ b/arch/arm/mach-mmp/pxa168.c
@@ -13,6 +13,7 @@
 #include <linux/list.h>
 #include <linux/io.h>
 #include <linux/clk.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/time.h>
 #include <mach/addr-map.h>
@@ -20,7 +21,6 @@
 #include <mach/regs-apbc.h>
 #include <mach/regs-apmu.h>
 #include <mach/irqs.h>
-#include <mach/gpio-pxa.h>
 #include <mach/dma.h>
 #include <mach/devices.h>
 #include <mach/mfp.h>
@@ -43,20 +43,12 @@ static struct mfp_addr_map pxa168_mfp_addr_map[] __initdata =
 	MFP_ADDR_END,
 };
 
-#define APMASK(i)	(GPIO_REGS_VIRT + BANK_OFF(i) + 0x09c)
-
 static void __init pxa168_init_gpio(void)
 {
 	int i;
 
 	/* enable GPIO clock */
 	__raw_writel(APBC_APBCLK | APBC_FNCLK, APBC_PXA168_GPIO);
-
-	/* unmask GPIO edge detection for all 4 banks - APMASKx */
-	for (i = 0; i < 4; i++)
-		__raw_writel(0xffffffff, APMASK(i));
-
-	pxa_init_gpio(IRQ_PXA168_GPIOX, 0, 127, NULL);
 }
 
 void __init pxa168_init_irq(void)
@@ -174,6 +166,25 @@ PXA168_DEVICE(fb, "pxa168-fb", -1, LCD, 0xd420b000, 0x1c8);
 PXA168_DEVICE(keypad, "pxa27x-keypad", -1, KEYPAD, 0xd4012000, 0x4c);
 PXA168_DEVICE(eth, "pxa168-eth", -1, MFU, 0xc0800000, 0x0fff);
 
+struct resource pxa168_resource_gpio[] = {
+	{
+		.start	= 0xd4019000,
+		.end	= 0xd4019fff,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_PXA168_GPIOX,
+		.end	= IRQ_PXA168_GPIOX,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device pxa168_device_gpio = {
+	.name		= "pxa-gpio",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(pxa168_resource_gpio),
+	.resource	= pxa168_resource_gpio,
+};
+
 struct resource pxa168_usb_host_resources[] = {
 	/* USB Host conroller register base */
 	[0] = {
diff --git a/arch/arm/mach-mmp/pxa910.c b/arch/arm/mach-mmp/pxa910.c
index 4ebbfbba39fc..7b992ced095c 100644
--- a/arch/arm/mach-mmp/pxa910.c
+++ b/arch/arm/mach-mmp/pxa910.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/io.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/time.h>
 #include <mach/addr-map.h>
@@ -19,7 +20,6 @@
 #include <mach/regs-apmu.h>
 #include <mach/cputype.h>
 #include <mach/irqs.h>
-#include <mach/gpio-pxa.h>
 #include <mach/dma.h>
 #include <mach/mfp.h>
 #include <mach/devices.h>
@@ -77,20 +77,12 @@ static struct mfp_addr_map pxa910_mfp_addr_map[] __initdata =
 	MFP_ADDR_END,
 };
 
-#define APMASK(i)	(GPIO_REGS_VIRT + BANK_OFF(i) + 0x09c)
-
 static void __init pxa910_init_gpio(void)
 {
 	int i;
 
 	/* enable GPIO clock */
 	__raw_writel(APBC_APBCLK | APBC_FNCLK, APBC_PXA910_GPIO);
-
-	/* unmask GPIO edge detection for all 4 banks - APMASKx */
-	for (i = 0; i < 4; i++)
-		__raw_writel(0xffffffff, APMASK(i));
-
-	pxa_init_gpio(IRQ_PXA910_AP_GPIO, 0, 127, NULL);
 }
 
 void __init pxa910_init_irq(void)
@@ -179,3 +171,22 @@ PXA910_DEVICE(pwm2, "pxa910-pwm", 1, NONE, 0xd401a400, 0x10);
 PXA910_DEVICE(pwm3, "pxa910-pwm", 2, NONE, 0xd401a800, 0x10);
 PXA910_DEVICE(pwm4, "pxa910-pwm", 3, NONE, 0xd401ac00, 0x10);
 PXA910_DEVICE(nand, "pxa3xx-nand", -1, NAND, 0xd4283000, 0x80, 97, 99);
+
+struct resource pxa910_resource_gpio[] = {
+	{
+		.start	= 0xd4019000,
+		.end	= 0xd4019fff,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_PXA910_AP_GPIO,
+		.end	= IRQ_PXA910_AP_GPIO,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device pxa910_device_gpio = {
+	.name		= "pxa-gpio",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(pxa910_resource_gpio),
+	.resource	= pxa910_resource_gpio,
+};
diff --git a/arch/arm/mach-mmp/tavorevb.c b/arch/arm/mach-mmp/tavorevb.c
index 331f5f358b59..bb2ddb72bca2 100644
--- a/arch/arm/mach-mmp/tavorevb.c
+++ b/arch/arm/mach-mmp/tavorevb.c
@@ -94,6 +94,7 @@ static void __init tavorevb_init(void)
 
 	/* on-chip devices */
 	pxa910_add_uart(1);
+	platform_device_register(&pxa910_device_gpio);
 
 	/* off-chip devices */
 	platform_device_register(&smc91x_device);
diff --git a/arch/arm/mach-mmp/teton_bga.c b/arch/arm/mach-mmp/teton_bga.c
index 825a01cdcccd..703de85b571c 100644
--- a/arch/arm/mach-mmp/teton_bga.c
+++ b/arch/arm/mach-mmp/teton_bga.c
@@ -78,6 +78,7 @@ static void __init teton_bga_init(void)
 	pxa168_add_uart(1);
 	pxa168_add_keypad(&teton_bga_keypad_info);
 	pxa168_add_twsi(0, NULL, ARRAY_AND_SIZE(teton_bga_i2c_info));
+	platform_device_register(&pxa168_device_gpio);
 }
 
 MACHINE_START(TETON_BGA, "PXA168-based Teton BGA Development Platform")
diff --git a/arch/arm/mach-mmp/ttc_dkb.c b/arch/arm/mach-mmp/ttc_dkb.c
index fac0d5d9d464..a80ed262df1c 100644
--- a/arch/arm/mach-mmp/ttc_dkb.c
+++ b/arch/arm/mach-mmp/ttc_dkb.c
@@ -123,6 +123,7 @@ static struct platform_device ttc_dkb_device_onenand = {
 };
 
 static struct platform_device *ttc_dkb_devices[] = {
+	&pxa910_device_gpio,
 	&ttc_dkb_device_onenand,
 };
 
diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index 2e0425404de5..5bc13121eac5 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -1051,6 +1051,36 @@ struct platform_device pxa3xx_device_ssp4 = {
 };
 #endif /* CONFIG_PXA3xx || CONFIG_PXA95x */
 
+struct resource pxa_resource_gpio[] = {
+	{
+		.start	= 0x40e00000,
+		.end	= 0x40e0ffff,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_GPIO0,
+		.end	= IRQ_GPIO0,
+		.name	= "gpio0",
+		.flags	= IORESOURCE_IRQ,
+	}, {
+		.start	= IRQ_GPIO1,
+		.end	= IRQ_GPIO1,
+		.name	= "gpio1",
+		.flags	= IORESOURCE_IRQ,
+	}, {
+		.start	= IRQ_GPIO_2_x,
+		.end	= IRQ_GPIO_2_x,
+		.name	= "gpio_mux",
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device pxa_device_gpio = {
+	.name		= "pxa-gpio",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(pxa_resource_gpio),
+	.resource	= pxa_resource_gpio,
+};
+
 /* pxa2xx-spi platform-device ID equals respective SSP platform-device ID + 1.
  * See comment in arch/arm/mach-pxa/ssp.c::ssp_probe() */
 void __init pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info)
diff --git a/arch/arm/mach-pxa/devices.h b/arch/arm/mach-pxa/devices.h
index 2fd5a8b35757..1475db107254 100644
--- a/arch/arm/mach-pxa/devices.h
+++ b/arch/arm/mach-pxa/devices.h
@@ -16,6 +16,7 @@ extern struct platform_device pxa_device_ficp;
 extern struct platform_device sa1100_device_rtc;
 extern struct platform_device pxa_device_rtc;
 extern struct platform_device pxa_device_ac97;
+extern struct platform_device pxa_device_gpio;
 
 extern struct platform_device pxa27x_device_i2c_power;
 extern struct platform_device pxa27x_device_ohci;
diff --git a/arch/arm/mach-pxa/include/mach/gpio-pxa.h b/arch/arm/mach-pxa/include/mach/gpio-pxa.h
deleted file mode 100644
index 134b3bc332cd..000000000000
--- a/arch/arm/mach-pxa/include/mach/gpio-pxa.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Written by Philipp Zabel <philipp.zabel@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-#ifndef __MACH_PXA_GPIO_PXA_H
-#define __MACH_PXA_GPIO_PXA_H
-
-#include <mach/irqs.h>
-#include <mach/hardware.h>
-
-#define GPIO_REGS_VIRT	io_p2v(0x40E00000)
-
-#define BANK_OFF(n)	(((n) < 3) ? (n) << 2 : 0x100 + (((n) - 3) << 2))
-#define GPIO_REG(x)	(*(volatile u32 *)(GPIO_REGS_VIRT + (x)))
-
-/* GPIO Pin Level Registers */
-#define GPLR0		GPIO_REG(BANK_OFF(0) + 0x00)
-#define GPLR1		GPIO_REG(BANK_OFF(1) + 0x00)
-#define GPLR2		GPIO_REG(BANK_OFF(2) + 0x00)
-#define GPLR3		GPIO_REG(BANK_OFF(3) + 0x00)
-
-/* GPIO Pin Direction Registers */
-#define GPDR0		GPIO_REG(BANK_OFF(0) + 0x0c)
-#define GPDR1		GPIO_REG(BANK_OFF(1) + 0x0c)
-#define GPDR2		GPIO_REG(BANK_OFF(2) + 0x0c)
-#define GPDR3		GPIO_REG(BANK_OFF(3) + 0x0c)
-
-/* GPIO Pin Output Set Registers */
-#define GPSR0		GPIO_REG(BANK_OFF(0) + 0x18)
-#define GPSR1		GPIO_REG(BANK_OFF(1) + 0x18)
-#define GPSR2		GPIO_REG(BANK_OFF(2) + 0x18)
-#define GPSR3		GPIO_REG(BANK_OFF(3) + 0x18)
-
-/* GPIO Pin Output Clear Registers */
-#define GPCR0		GPIO_REG(BANK_OFF(0) + 0x24)
-#define GPCR1		GPIO_REG(BANK_OFF(1) + 0x24)
-#define GPCR2		GPIO_REG(BANK_OFF(2) + 0x24)
-#define GPCR3		GPIO_REG(BANK_OFF(3) + 0x24)
-
-/* GPIO Rising Edge Detect Registers */
-#define GRER0		GPIO_REG(BANK_OFF(0) + 0x30)
-#define GRER1		GPIO_REG(BANK_OFF(1) + 0x30)
-#define GRER2		GPIO_REG(BANK_OFF(2) + 0x30)
-#define GRER3		GPIO_REG(BANK_OFF(3) + 0x30)
-
-/* GPIO Falling Edge Detect Registers */
-#define GFER0		GPIO_REG(BANK_OFF(0) + 0x3c)
-#define GFER1		GPIO_REG(BANK_OFF(1) + 0x3c)
-#define GFER2		GPIO_REG(BANK_OFF(2) + 0x3c)
-#define GFER3		GPIO_REG(BANK_OFF(3) + 0x3c)
-
-/* GPIO Edge Detect Status Registers */
-#define GEDR0		GPIO_REG(BANK_OFF(0) + 0x48)
-#define GEDR1		GPIO_REG(BANK_OFF(1) + 0x48)
-#define GEDR2		GPIO_REG(BANK_OFF(2) + 0x48)
-#define GEDR3		GPIO_REG(BANK_OFF(3) + 0x48)
-
-/* GPIO Alternate Function Select Registers */
-#define GAFR0_L		GPIO_REG(0x0054)
-#define GAFR0_U		GPIO_REG(0x0058)
-#define GAFR1_L		GPIO_REG(0x005C)
-#define GAFR1_U		GPIO_REG(0x0060)
-#define GAFR2_L		GPIO_REG(0x0064)
-#define GAFR2_U		GPIO_REG(0x0068)
-#define GAFR3_L		GPIO_REG(0x006C)
-#define GAFR3_U		GPIO_REG(0x0070)
-
-/* More handy macros.  The argument is a literal GPIO number. */
-
-#define GPIO_bit(x)	(1 << ((x) & 0x1f))
-
-#define GPLR(x)		GPIO_REG(BANK_OFF((x) >> 5) + 0x00)
-#define GPDR(x)		GPIO_REG(BANK_OFF((x) >> 5) + 0x0c)
-#define GPSR(x)		GPIO_REG(BANK_OFF((x) >> 5) + 0x18)
-#define GPCR(x)		GPIO_REG(BANK_OFF((x) >> 5) + 0x24)
-#define GRER(x)		GPIO_REG(BANK_OFF((x) >> 5) + 0x30)
-#define GFER(x)		GPIO_REG(BANK_OFF((x) >> 5) + 0x3c)
-#define GEDR(x)		GPIO_REG(BANK_OFF((x) >> 5) + 0x48)
-#define GAFR(x)		GPIO_REG(0x54 + (((x) & 0x70) >> 2))
-
-
-#define gpio_to_bank(gpio)	((gpio) >> 5)
-
-#ifdef CONFIG_CPU_PXA26x
-/* GPIO86/87/88/89 on PXA26x have their direction bits in GPDR2 inverted,
- * as well as their Alternate Function value being '1' for GPIO in GAFRx.
- */
-static inline int __gpio_is_inverted(unsigned gpio)
-{
-	return cpu_is_pxa25x() && gpio > 85;
-}
-#else
-static inline int __gpio_is_inverted(unsigned gpio) { return 0; }
-#endif
-
-/*
- * On PXA25x and PXA27x, GAFRx and GPDRx together decide the alternate
- * function of a GPIO, and GPDRx cannot be altered once configured. It
- * is attributed as "occupied" here (I know this terminology isn't
- * accurate, you are welcome to propose a better one :-)
- */
-static inline int __gpio_is_occupied(unsigned gpio)
-{
-	if (cpu_is_pxa27x() || cpu_is_pxa25x()) {
-		int af = (GAFR(gpio) >> ((gpio & 0xf) * 2)) & 0x3;
-		int dir = GPDR(gpio) & GPIO_bit(gpio);
-
-		if (__gpio_is_inverted(gpio))
-			return af != 1 || dir == 0;
-		else
-			return af != 0 || dir != 0;
-	} else
-		return GPDR(gpio) & GPIO_bit(gpio);
-}
-
-#include <plat/gpio-pxa.h>
-#endif /* __MACH_PXA_GPIO_PXA_H */
diff --git a/arch/arm/mach-pxa/include/mach/gpio.h b/arch/arm/mach-pxa/include/mach/gpio.h
index 561cdbfd7ccf..0248e433bc98 100644
--- a/arch/arm/mach-pxa/include/mach/gpio.h
+++ b/arch/arm/mach-pxa/include/mach/gpio.h
@@ -25,7 +25,8 @@
 #define __ASM_ARCH_PXA_GPIO_H
 
 #include <asm-generic/gpio.h>
-/* The defines for the driver are needed for the accelerated accessors */
-#include "gpio-pxa.h"
+
+#include <mach/irqs.h>
+#include <mach/hardware.h>
 
 #endif
diff --git a/arch/arm/mach-pxa/include/mach/idp.h b/arch/arm/mach-pxa/include/mach/idp.h
index a7f912f5ea2f..22a96f87232b 100644
--- a/arch/arm/mach-pxa/include/mach/idp.h
+++ b/arch/arm/mach-pxa/include/mach/idp.h
@@ -131,8 +131,6 @@
 #define PCC_VS2		(1 << 1)
 #define PCC_VS1		(1 << 0)
 
-#define PCC_DETECT(x)	(GPLR(7 + (x)) & GPIO_bit(7 + (x)))
-
 /* A listing of interrupts used by external hardware devices */
 
 #define TOUCH_PANEL_IRQ			PXA_GPIO_TO_IRQ(5)
diff --git a/arch/arm/mach-pxa/include/mach/littleton.h b/arch/arm/mach-pxa/include/mach/littleton.h
index e20ac1b64b00..8066be54e9f5 100644
--- a/arch/arm/mach-pxa/include/mach/littleton.h
+++ b/arch/arm/mach-pxa/include/mach/littleton.h
@@ -1,8 +1,6 @@
 #ifndef __ASM_ARCH_LITTLETON_H
 #define __ASM_ARCH_LITTLETON_H
 
-#include <mach/gpio-pxa.h>
-
 #define LITTLETON_ETH_PHYS	0x30000000
 
 #define LITTLETON_GPIO_LCD_CS	(17)
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 36c538f48fa6..5dae15ea6718 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -22,7 +22,6 @@
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
-#include <mach/gpio-pxa.h>
 
 #include "generic.h"
 
@@ -122,7 +121,7 @@ asmlinkage void __exception_irq_entry ichp_handle_irq(struct pt_regs *regs)
 	} while (1);
 }
 
-void __init pxa_init_irq(int irq_nr, set_wake_t fn)
+void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int))
 {
 	int irq, i, n;
 
diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c
index 43a5f6861ca3..f14775536b83 100644
--- a/arch/arm/mach-pxa/mfp-pxa2xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa2xx.c
@@ -13,6 +13,7 @@
  *  published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio-pxa.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -20,7 +21,6 @@
 
 #include <mach/pxa2xx-regs.h>
 #include <mach/mfp-pxa2xx.h>
-#include <mach/gpio-pxa.h>
 
 #include "generic.h"
 
@@ -29,6 +29,10 @@
 #define GAFR_L(x)	__GAFR(0, x)
 #define GAFR_U(x)	__GAFR(1, x)
 
+#define BANK_OFF(n)	(((n) < 3) ? (n) << 2 : 0x100 + (((n) - 3) << 2))
+#define GPLR(x)		__REG2(0x40E00000, BANK_OFF((x) >> 5))
+#define GPDR(x)		__REG2(0x40E00000, BANK_OFF((x) >> 5) + 0x0c)
+
 #define PWER_WE35	(1 << 24)
 
 struct gpio_desc {
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index 0f38cfce5c35..91e4f6c03766 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -17,6 +17,7 @@
  * need be.
  */
 #include <linux/gpio.h>
+#include <linux/gpio-pxa.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -312,14 +313,12 @@ set_pwer:
 void __init pxa25x_init_irq(void)
 {
 	pxa_init_irq(32, pxa25x_set_wake);
-	pxa_init_gpio(IRQ_GPIO_2_x, 2, 84, pxa25x_set_wake);
 }
 
 #ifdef CONFIG_CPU_PXA26x
 void __init pxa26x_init_irq(void)
 {
 	pxa_init_irq(32, pxa25x_set_wake);
-	pxa_init_gpio(IRQ_GPIO_2_x, 2, 89, pxa25x_set_wake);
 }
 #endif
 
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 44563a0997bd..aed6cbcf3866 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -12,6 +12,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio-pxa.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -386,7 +387,6 @@ static int pxa27x_set_wake(struct irq_data *d, unsigned int on)
 void __init pxa27x_init_irq(void)
 {
 	pxa_init_irq(34, pxa27x_set_wake);
-	pxa_init_gpio(IRQ_GPIO_2_x, 2, 120, pxa27x_set_wake);
 }
 
 static struct map_desc pxa27x_io_desc[] __initdata = {
@@ -422,6 +422,7 @@ void __init pxa27x_set_i2c_power_info(struct i2c_pxa_platform_data *info)
 }
 
 static struct platform_device *devices[] __initdata = {
+	&pxa_device_gpio,
 	&pxa27x_device_udc,
 	&pxa_device_pmu,
 	&pxa_device_i2s,
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 0737c59b88ae..06cfe348c709 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -25,7 +25,6 @@
 #include <asm/mach/map.h>
 #include <asm/suspend.h>
 #include <mach/hardware.h>
-#include <mach/gpio-pxa.h>
 #include <mach/pxa3xx-regs.h>
 #include <mach/reset.h>
 #include <mach/ohci.h>
@@ -365,7 +364,8 @@ static struct irq_chip pxa_ext_wakeup_chip = {
 	.irq_set_type	= pxa_set_ext_wakeup_type,
 };
 
-static void __init pxa_init_ext_wakeup_irq(set_wake_t fn)
+static void __init pxa_init_ext_wakeup_irq(int (*fn)(struct irq_data *,
+					   unsigned int))
 {
 	int irq;
 
@@ -388,7 +388,6 @@ void __init pxa3xx_init_irq(void)
 
 	pxa_init_irq(56, pxa3xx_set_wake);
 	pxa_init_ext_wakeup_irq(pxa3xx_set_wake);
-	pxa_init_gpio(IRQ_GPIO_2_x, 2, 127, NULL);
 }
 
 static struct map_desc pxa3xx_io_desc[] __initdata = {
@@ -417,6 +416,7 @@ void __init pxa3xx_set_i2c_power_info(struct i2c_pxa_platform_data *info)
 }
 
 static struct platform_device *devices[] __initdata = {
+	&pxa_device_gpio,
 	&pxa27x_device_udc,
 	&pxa_device_pmu,
 	&pxa_device_i2s,
diff --git a/arch/arm/mach-pxa/pxa95x.c b/arch/arm/mach-pxa/pxa95x.c
index 51371b39d2a3..0961093dd033 100644
--- a/arch/arm/mach-pxa/pxa95x.c
+++ b/arch/arm/mach-pxa/pxa95x.c
@@ -20,7 +20,6 @@
 #include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
-#include <mach/gpio-pxa.h>
 #include <mach/pxa3xx-regs.h>
 #include <mach/pxa930.h>
 #include <mach/reset.h>
@@ -235,7 +234,6 @@ static struct clk_lookup pxa95x_clkregs[] = {
 void __init pxa95x_init_irq(void)
 {
 	pxa_init_irq(96, NULL);
-	pxa_init_gpio(IRQ_GPIO_2_x, 2, 127, NULL);
 }
 
 /*
@@ -248,6 +246,7 @@ void __init pxa95x_set_i2c_power_info(struct i2c_pxa_platform_data *info)
 }
 
 static struct platform_device *devices[] __initdata = {
+	&pxa_device_gpio,
 	&sa1100_device_rtc,
 	&pxa_device_rtc,
 	&pxa27x_device_ssp1,
diff --git a/arch/arm/plat-pxa/include/plat/gpio-pxa.h b/arch/arm/plat-pxa/include/plat/gpio-pxa.h
deleted file mode 100644
index 15bf9be051e8..000000000000
--- a/arch/arm/plat-pxa/include/plat/gpio-pxa.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef __PLAT_PXA_GPIO_H
-#define __PLAT_PXA_GPIO_H
-
-struct irq_data;
-
-/*
- * We handle the GPIOs by banks, each bank covers up to 32 GPIOs with
- * one set of registers. The register offsets are organized below:
- *
- *           GPLR    GPDR    GPSR    GPCR    GRER    GFER    GEDR
- * BANK 0 - 0x0000  0x000C  0x0018  0x0024  0x0030  0x003C  0x0048
- * BANK 1 - 0x0004  0x0010  0x001C  0x0028  0x0034  0x0040  0x004C
- * BANK 2 - 0x0008  0x0014  0x0020  0x002C  0x0038  0x0044  0x0050
- *
- * BANK 3 - 0x0100  0x010C  0x0118  0x0124  0x0130  0x013C  0x0148
- * BANK 4 - 0x0104  0x0110  0x011C  0x0128  0x0134  0x0140  0x014C
- * BANK 5 - 0x0108  0x0114  0x0120  0x012C  0x0138  0x0144  0x0150
- *
- * NOTE:
- *   BANK 3 is only available on PXA27x and later processors.
- *   BANK 4 and 5 are only available on PXA935
- */
-
-#define GPIO_BANK(n)	(GPIO_REGS_VIRT + BANK_OFF(n))
-
-#define GPLR_OFFSET	0x00
-#define GPDR_OFFSET	0x0C
-#define GPSR_OFFSET	0x18
-#define GPCR_OFFSET	0x24
-#define GRER_OFFSET	0x30
-#define GFER_OFFSET	0x3C
-#define GEDR_OFFSET	0x48
-
-/* NOTE: some PXAs have fewer on-chip GPIOs (like PXA255, with 85).
- * Those cases currently cause holes in the GPIO number space, the
- * actual number of the last GPIO is recorded by 'pxa_last_gpio'.
- */
-extern int pxa_last_gpio;
-
-typedef int (*set_wake_t)(struct irq_data *d, unsigned int on);
-
-extern void pxa_init_gpio(int mux_irq, int start, int end, set_wake_t fn);
-extern int pxa_irq_to_gpio(int irq);
-
-#endif /* __PLAT_PXA_GPIO_H */
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 8482a23887dc..aa0b94ff36d0 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -141,6 +141,12 @@ config GPIO_PL061
 	help
 	  Say yes here to support the PrimeCell PL061 GPIO device
 
+config GPIO_PXA
+	bool "PXA GPIO support"
+	depends on ARCH_PXA || ARCH_MMP
+	help
+	  Say yes here to support the PXA GPIO device
+
 config GPIO_XILINX
 	bool "Xilinx GPIO support"
 	depends on PPC_OF || MICROBLAZE
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index dbcb0bcfd8da..5b2b9e26f49c 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_GPIO_PCA953X)	+= gpio-pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= gpio-pcf857x.o
 obj-$(CONFIG_GPIO_PCH)		+= gpio-pch.o
 obj-$(CONFIG_GPIO_PL061)	+= gpio-pl061.o
-obj-$(CONFIG_PLAT_PXA)		+= gpio-pxa.o
+obj-$(CONFIG_GPIO_PXA)		+= gpio-pxa.o
 obj-$(CONFIG_GPIO_RDC321X)	+= gpio-rdc321x.o
 obj-$(CONFIG_PLAT_SAMSUNG)	+= gpio-samsung.o
 obj-$(CONFIG_ARCH_SA1100)	+= gpio-sa1100.o
diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c
index 31d2da4100cd..079f97fde2c7 100644
--- a/drivers/gpio/gpio-pxa.c
+++ b/drivers/gpio/gpio-pxa.c
@@ -12,13 +12,42 @@
  *  published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio-pxa.h>
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/platform_device.h>
 #include <linux/syscore_ops.h>
 #include <linux/slab.h>
 
-#include <mach/gpio-pxa.h>
+/*
+ * We handle the GPIOs by banks, each bank covers up to 32 GPIOs with
+ * one set of registers. The register offsets are organized below:
+ *
+ *           GPLR    GPDR    GPSR    GPCR    GRER    GFER    GEDR
+ * BANK 0 - 0x0000  0x000C  0x0018  0x0024  0x0030  0x003C  0x0048
+ * BANK 1 - 0x0004  0x0010  0x001C  0x0028  0x0034  0x0040  0x004C
+ * BANK 2 - 0x0008  0x0014  0x0020  0x002C  0x0038  0x0044  0x0050
+ *
+ * BANK 3 - 0x0100  0x010C  0x0118  0x0124  0x0130  0x013C  0x0148
+ * BANK 4 - 0x0104  0x0110  0x011C  0x0128  0x0134  0x0140  0x014C
+ * BANK 5 - 0x0108  0x0114  0x0120  0x012C  0x0138  0x0144  0x0150
+ *
+ * NOTE:
+ *   BANK 3 is only available on PXA27x and later processors.
+ *   BANK 4 and 5 are only available on PXA935
+ */
+
+#define GPLR_OFFSET	0x00
+#define GPDR_OFFSET	0x0C
+#define GPSR_OFFSET	0x18
+#define GPCR_OFFSET	0x24
+#define GRER_OFFSET	0x30
+#define GFER_OFFSET	0x3C
+#define GEDR_OFFSET	0x48
+#define GAFR_OFFSET	0x54
+
+#define BANK_OFF(n)	(((n) < 3) ? (n) << 2 : 0x100 + (((n) - 3) << 2))
 
 int pxa_last_gpio;
 
@@ -52,6 +81,7 @@ enum {
 static DEFINE_SPINLOCK(gpio_lock);
 static struct pxa_gpio_chip *pxa_gpio_chips;
 static int gpio_type;
+static void __iomem *gpio_reg_base;
 
 #define for_each_gpio_chip(i, c)			\
 	for (i = 0, c = &pxa_gpio_chips[0]; i <= pxa_last_gpio; i += 32, c++)
@@ -76,6 +106,53 @@ static inline int gpio_is_mmp_type(int type)
 	return (type & MMP_GPIO) != 0;
 }
 
+/* GPIO86/87/88/89 on PXA26x have their direction bits in PXA_GPDR(2 inverted,
+ * as well as their Alternate Function value being '1' for GPIO in GAFRx.
+ */
+static inline int __gpio_is_inverted(int gpio)
+{
+	if ((gpio_type == PXA26X_GPIO) && (gpio > 85))
+		return 1;
+	return 0;
+}
+
+/*
+ * On PXA25x and PXA27x, GAFRx and GPDRx together decide the alternate
+ * function of a GPIO, and GPDRx cannot be altered once configured. It
+ * is attributed as "occupied" here (I know this terminology isn't
+ * accurate, you are welcome to propose a better one :-)
+ */
+static inline int __gpio_is_occupied(unsigned gpio)
+{
+	struct pxa_gpio_chip *pxachip;
+	void __iomem *base;
+	unsigned long gafr = 0, gpdr = 0;
+	int ret, af = 0, dir = 0;
+
+	pxachip = gpio_to_pxachip(gpio);
+	base = gpio_chip_base(&pxachip->chip);
+	gpdr = readl_relaxed(base + GPDR_OFFSET);
+
+	switch (gpio_type) {
+	case PXA25X_GPIO:
+	case PXA26X_GPIO:
+	case PXA27X_GPIO:
+		gafr = readl_relaxed(base + GAFR_OFFSET);
+		af = (gafr >> ((gpio & 0xf) * 2)) & 0x3;
+		dir = gpdr & GPIO_bit(gpio);
+
+		if (__gpio_is_inverted(gpio))
+			ret = (af != 1) || (dir == 0);
+		else
+			ret = (af != 0) || (dir != 0);
+		break;
+	default:
+		ret = gpdr & GPIO_bit(gpio);
+		break;
+	}
+	return ret;
+}
+
 #ifdef CONFIG_ARCH_PXA
 static inline int __pxa_gpio_to_irq(int gpio)
 {
@@ -187,7 +264,7 @@ static void pxa_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 				(value ? GPSR_OFFSET : GPCR_OFFSET));
 }
 
-static int __init pxa_init_gpio_chip(int gpio_end)
+static int __devinit pxa_init_gpio_chip(int gpio_end)
 {
 	int i, gpio, nbanks = gpio_to_bank(gpio_end) + 1;
 	struct pxa_gpio_chip *chips;
@@ -202,7 +279,7 @@ static int __init pxa_init_gpio_chip(int gpio_end)
 		struct gpio_chip *c = &chips[i].chip;
 
 		sprintf(chips[i].label, "gpio-%d", i);
-		chips[i].regbase = GPIO_BANK(i);
+		chips[i].regbase = gpio_reg_base + BANK_OFF(i);
 
 		c->base  = gpio;
 		c->label = chips[i].label;
@@ -384,17 +461,35 @@ static int pxa_gpio_nums(void)
 	return count;
 }
 
-void __init pxa_init_gpio(int mux_irq, int start, int end, set_wake_t fn)
+static int __devinit pxa_gpio_probe(struct platform_device *pdev)
 {
 	struct pxa_gpio_chip *c;
+	struct resource *res;
 	int gpio, irq;
+	int irq0 = 0, irq1 = 0, irq_mux, gpio_offset = 0;
 
 	pxa_last_gpio = pxa_gpio_nums();
 	if (!pxa_last_gpio)
-		return;
+		return -EINVAL;
+
+	irq0 = platform_get_irq_byname(pdev, "gpio0");
+	irq1 = platform_get_irq_byname(pdev, "gpio1");
+	irq_mux = platform_get_irq_byname(pdev, "gpio_mux");
+	if ((irq0 > 0 && irq1 <= 0) || (irq0 <= 0 && irq1 > 0)
+		|| (irq_mux <= 0))
+		return -EINVAL;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+	gpio_reg_base = ioremap(res->start, resource_size(res));
+	if (!gpio_reg_base)
+		return -EINVAL;
+
+	if (irq0 > 0)
+		gpio_offset = 2;
 
 	/* Initialize GPIO chips */
-	pxa_init_gpio_chip(end);
+	pxa_init_gpio_chip(pxa_last_gpio);
 
 	/* clear all GPIO edge detects */
 	for_each_gpio_chip(gpio, c) {
@@ -417,16 +512,29 @@ void __init pxa_init_gpio(int mux_irq, int start, int end, set_wake_t fn)
 	irq_set_chained_handler(IRQ_GPIO1, pxa_gpio_demux_handler);
 #endif
 
-	for (irq  = gpio_to_irq(start); irq <= gpio_to_irq(end); irq++) {
+	for (irq  = gpio_to_irq(gpio_offset);
+		irq <= gpio_to_irq(pxa_last_gpio); irq++) {
 		irq_set_chip_and_handler(irq, &pxa_muxed_gpio_chip,
 					 handle_edge_irq);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
 
-	/* Install handler for GPIO>=2 edge detect interrupts */
-	irq_set_chained_handler(mux_irq, pxa_gpio_demux_handler);
-	pxa_muxed_gpio_chip.irq_set_wake = fn;
+	irq_set_chained_handler(irq_mux, pxa_gpio_demux_handler);
+	return 0;
+}
+
+static struct platform_driver pxa_gpio_driver = {
+	.probe		= pxa_gpio_probe,
+	.driver		= {
+		.name	= "pxa-gpio",
+	},
+};
+
+static int __init pxa_gpio_init(void)
+{
+	return platform_driver_register(&pxa_gpio_driver);
 }
+postcore_initcall(pxa_gpio_init);
 
 #ifdef CONFIG_PM
 static int pxa_gpio_suspend(void)
@@ -470,3 +578,10 @@ struct syscore_ops pxa_gpio_syscore_ops = {
 	.suspend	= pxa_gpio_suspend,
 	.resume		= pxa_gpio_resume,
 };
+
+static int __init pxa_gpio_sysinit(void)
+{
+	register_syscore_ops(&pxa_gpio_syscore_ops);
+	return 0;
+}
+postcore_initcall(pxa_gpio_sysinit);
diff --git a/include/linux/gpio-pxa.h b/include/linux/gpio-pxa.h
new file mode 100644
index 000000000000..05071ee34c3f
--- /dev/null
+++ b/include/linux/gpio-pxa.h
@@ -0,0 +1,16 @@
+#ifndef __GPIO_PXA_H
+#define __GPIO_PXA_H
+
+#define GPIO_bit(x)	(1 << ((x) & 0x1f))
+
+#define gpio_to_bank(gpio)	((gpio) >> 5)
+
+/* NOTE: some PXAs have fewer on-chip GPIOs (like PXA255, with 85).
+ * Those cases currently cause holes in the GPIO number space, the
+ * actual number of the last GPIO is recorded by 'pxa_last_gpio'.
+ */
+extern int pxa_last_gpio;
+
+extern int pxa_irq_to_gpio(int irq);
+
+#endif /* __GPIO_PXA_H */
-- 
cgit v1.2.3


From 1460432cb513f0c16136ed132c20ecfbf8ccf942 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 21 Oct 2011 15:56:05 -0400
Subject: iommu: Add iommu_device_group callback and iommu_group sysfs entry

An IOMMU group is a set of devices for which the IOMMU cannot
distinguish transactions.  For PCI devices, a group often occurs
when a PCI bridge is involved.  Transactions from any device
behind the bridge appear to be sourced from the bridge itself.
We leave it to the IOMMU driver to define the grouping restraints
for their platform.

Using this new interface, the group for a device can be retrieved
using the iommu_device_group() callback.  Users will compare the
value returned against the value returned for other devices to
determine whether they are part of the same group.  Devices with
no group are not translated by the IOMMU.  There should be no
expectations about the group numbers as they may be arbitrarily
assigned by the IOMMU driver and may not be persistent across boots.

We also provide a sysfs interface to the group numbers here so
that userspace can understand IOMMU dependencies between devices
for managing safe, userspace drivers.

[Some code changes by Joerg Roedel <joerg.roedel@amd.com>]

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/iommu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iommu.h |  7 ++++++
 2 files changed, 67 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2fb2963df553..9c35be4b333f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -25,8 +25,59 @@
 #include <linux/errno.h>
 #include <linux/iommu.h>
 
+static ssize_t show_iommu_group(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned int groupid;
+
+	if (iommu_device_group(dev, &groupid))
+		return 0;
+
+	return sprintf(buf, "%u", groupid);
+}
+static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL);
+
+static int add_iommu_group(struct device *dev, void *data)
+{
+	unsigned int groupid;
+
+	if (iommu_device_group(dev, &groupid) == 0)
+		return device_create_file(dev, &dev_attr_iommu_group);
+
+	return 0;
+}
+
+static int remove_iommu_group(struct device *dev)
+{
+	unsigned int groupid;
+
+	if (iommu_device_group(dev, &groupid) == 0)
+		device_remove_file(dev, &dev_attr_iommu_group);
+
+	return 0;
+}
+
+static int iommu_device_notifier(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	if (action == BUS_NOTIFY_ADD_DEVICE)
+		return add_iommu_group(dev, NULL);
+	else if (action == BUS_NOTIFY_DEL_DEVICE)
+		return remove_iommu_group(dev);
+
+	return 0;
+}
+
+static struct notifier_block iommu_device_nb = {
+	.notifier_call = iommu_device_notifier,
+};
+
 static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
 {
+	bus_register_notifier(bus, &iommu_device_nb);
+	bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
 }
 
 /**
@@ -186,3 +237,12 @@ int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
 	return domain->ops->unmap(domain, iova, gfp_order);
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
+
+int iommu_device_group(struct device *dev, unsigned int *groupid)
+{
+	if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group)
+		return dev->bus->iommu_ops->device_group(dev, groupid);
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(iommu_device_group);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 432acc4c054d..93617e7779a1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -61,6 +61,7 @@ struct iommu_ops {
 				    unsigned long iova);
 	int (*domain_has_cap)(struct iommu_domain *domain,
 			      unsigned long cap);
+	int (*device_group)(struct device *dev, unsigned int *groupid);
 };
 
 extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops);
@@ -81,6 +82,7 @@ extern int iommu_domain_has_cap(struct iommu_domain *domain,
 				unsigned long cap);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 					iommu_fault_handler_t handler);
+extern int iommu_device_group(struct device *dev, unsigned int *groupid);
 
 /**
  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
@@ -179,6 +181,11 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 {
 }
 
+static inline int iommu_device_group(struct device *dev, unsigned int *groupid);
+{
+	return -ENODEV;
+}
+
 #endif /* CONFIG_IOMMU_API */
 
 #endif /* __LINUX_IOMMU_H */
-- 
cgit v1.2.3


From 95bdaf71ccf2cb4bba0c9a3d2baea0e7916f466b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 15 Nov 2011 12:48:29 +0100
Subject: iommu: Fix compile error with !IOMMU_API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 93617e7779a1..0f318fd549be 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -181,7 +181,7 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 {
 }
 
-static inline int iommu_device_group(struct device *dev, unsigned int *groupid);
+static inline int iommu_device_group(struct device *dev, unsigned int *groupid)
 {
 	return -ENODEV;
 }
-- 
cgit v1.2.3


From 4556143cab73e013d0c3fa00f0f4f4373882399e Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Sun, 6 Nov 2011 19:06:25 +0100
Subject: USB: serial: remove write_urb_busy field from usb_serial_port

Remove no longer used write_urb_busy field from struct usb_serial_port.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/serial.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index b29f70b2ecae..8ccd405e9005 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -62,7 +62,6 @@ enum port_dev_state {
  * @bulk_out_size: the size of the bulk_out_buffer, in bytes.
  * @write_urb: pointer to the bulk out struct urb for this port.
  * @write_fifo: kfifo used to buffer outgoing data
- * @write_urb_busy: port`s writing status
  * @bulk_out_buffers: pointers to the bulk out buffers for this port
  * @write_urbs: pointers to the bulk out urbs for this port
  * @write_urbs_free: status bitmap the for bulk out urbs
@@ -103,7 +102,6 @@ struct usb_serial_port {
 	int			bulk_out_size;
 	struct urb		*write_urb;
 	struct kfifo		write_fifo;
-	int			write_urb_busy;
 
 	unsigned char		*bulk_out_buffers[2];
 	struct urb		*write_urbs[2];
-- 
cgit v1.2.3


From d83b405383c965498923f3561c3321e2b5df5727 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Sun, 6 Nov 2011 19:06:37 +0100
Subject: USB: serial: add support for multiple read urbs

Add support for multiple read urbs to generic read implementation.

Use a static array of two read urbs for now which is enough to get a
50% throughput increase in one test setup.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c    | 83 ++++++++++++++++++++++++++++++-----------
 drivers/usb/serial/usb-serial.c | 50 +++++++++++++++----------
 include/linux/usb/serial.h      |  9 ++++-
 3 files changed, 101 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index e4db5ad2bc55..f7403576f99f 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -1,7 +1,7 @@
 /*
  * USB Serial Converter Generic functions
  *
- * Copyright (C) 2010 Johan Hovold (jhovold@gmail.com)
+ * Copyright (C) 2010 - 2011 Johan Hovold (jhovold@gmail.com)
  * Copyright (C) 1999 - 2002 Greg Kroah-Hartman (greg@kroah.com)
  *
  *	This program is free software; you can redistribute it and/or
@@ -132,7 +132,7 @@ int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port
 
 	/* if we have a bulk endpoint, start reading from it */
 	if (port->bulk_in_size)
-		result = usb_serial_generic_submit_read_urb(port, GFP_KERNEL);
+		result = usb_serial_generic_submit_read_urbs(port, GFP_KERNEL);
 
 	return result;
 }
@@ -157,8 +157,10 @@ static void generic_cleanup(struct usb_serial_port *port)
 			kfifo_reset_out(&port->write_fifo);
 			spin_unlock_irqrestore(&port->lock, flags);
 		}
-		if (port->bulk_in_size)
-			usb_kill_urb(port->read_urb);
+		if (port->bulk_in_size) {
+			for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i)
+				usb_kill_urb(port->read_urbs[i]);
+		}
 	}
 }
 
@@ -308,19 +310,52 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 	return chars;
 }
 
-int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
+static int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
+						int index, gfp_t mem_flags)
+{
+	int res;
+
+	if (!test_and_clear_bit(index, &port->read_urbs_free))
+		return 0;
+
+	dbg("%s - port %d, urb %d\n", __func__, port->number, index);
+
+	res = usb_submit_urb(port->read_urbs[index], mem_flags);
+	if (res) {
+		if (res != -EPERM) {
+			dev_err(&port->dev,
+					"%s - usb_submit_urb failed: %d\n",
+					__func__, res);
+		}
+		set_bit(index, &port->read_urbs_free);
+		return res;
+	}
+
+	return 0;
+}
+
+int usb_serial_generic_submit_read_urbs(struct usb_serial_port *port,
 					gfp_t mem_flags)
 {
-	int result;
+	int res;
+	int i;
 
-	result = usb_submit_urb(port->read_urb, mem_flags);
-	if (result && result != -EPERM) {
-		dev_err(&port->dev, "%s - error submitting urb: %d\n",
-							__func__, result);
+	dbg("%s - port %d", __func__, port->number);
+
+	for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) {
+		res = usb_serial_generic_submit_read_urb(port, i, mem_flags);
+		if (res)
+			goto err;
 	}
-	return result;
+
+	return 0;
+err:
+	for (; i >= 0; --i)
+		usb_kill_urb(port->read_urbs[i]);
+
+	return res;
 }
-EXPORT_SYMBOL_GPL(usb_serial_generic_submit_read_urb);
+EXPORT_SYMBOL_GPL(usb_serial_generic_submit_read_urbs);
 
 void usb_serial_generic_process_read_urb(struct urb *urb)
 {
@@ -356,14 +391,19 @@ void usb_serial_generic_read_bulk_callback(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	unsigned char *data = urb->transfer_buffer;
-	int status = urb->status;
 	unsigned long flags;
+	int i;
 
-	dbg("%s - port %d", __func__, port->number);
+	for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) {
+		if (urb == port->read_urbs[i])
+			break;
+	}
+	set_bit(i, &port->read_urbs_free);
 
-	if (unlikely(status != 0)) {
-		dbg("%s - nonzero read bulk status received: %d",
-		    __func__, status);
+	dbg("%s - port %d, urb %d, len %d\n", __func__, port->number, i,
+							urb->actual_length);
+	if (urb->status) {
+		dbg("%s - non-zero urb status: %d\n", __func__, urb->status);
 		return;
 	}
 
@@ -376,7 +416,7 @@ void usb_serial_generic_read_bulk_callback(struct urb *urb)
 	port->throttled = port->throttle_req;
 	if (!port->throttled) {
 		spin_unlock_irqrestore(&port->lock, flags);
-		usb_serial_generic_submit_read_urb(port, GFP_ATOMIC);
+		usb_serial_generic_submit_read_urb(port, i, GFP_ATOMIC);
 	} else
 		spin_unlock_irqrestore(&port->lock, flags);
 }
@@ -443,7 +483,7 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty)
 	spin_unlock_irq(&port->lock);
 
 	if (was_throttled)
-		usb_serial_generic_submit_read_urb(port, GFP_KERNEL);
+		usb_serial_generic_submit_read_urbs(port, GFP_KERNEL);
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_unthrottle);
 
@@ -509,8 +549,9 @@ int usb_serial_generic_resume(struct usb_serial *serial)
 		if (!test_bit(ASYNCB_INITIALIZED, &port->port.flags))
 			continue;
 
-		if (port->read_urb) {
-			r = usb_submit_urb(port->read_urb, GFP_NOIO);
+		if (port->bulk_in_size) {
+			r = usb_serial_generic_submit_read_urbs(port,
+								GFP_NOIO);
 			if (r < 0)
 				c++;
 		}
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index bfbe6e5431d3..8d5190f3d8eb 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -562,7 +562,8 @@ static void kill_traffic(struct usb_serial_port *port)
 {
 	int i;
 
-	usb_kill_urb(port->read_urb);
+	for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i)
+		usb_kill_urb(port->read_urbs[i]);
 	for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i)
 		usb_kill_urb(port->write_urbs[i]);
 	/*
@@ -594,15 +595,17 @@ static void port_release(struct device *dev)
 	kill_traffic(port);
 	cancel_work_sync(&port->work);
 
-	usb_free_urb(port->read_urb);
 	usb_free_urb(port->interrupt_in_urb);
 	usb_free_urb(port->interrupt_out_urb);
+	for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) {
+		usb_free_urb(port->read_urbs[i]);
+		kfree(port->bulk_in_buffers[i]);
+	}
 	for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) {
 		usb_free_urb(port->write_urbs[i]);
 		kfree(port->bulk_out_buffers[i]);
 	}
 	kfifo_free(&port->write_fifo);
-	kfree(port->bulk_in_buffer);
 	kfree(port->interrupt_in_buffer);
 	kfree(port->interrupt_out_buffer);
 	kfree(port);
@@ -721,6 +724,7 @@ int usb_serial_probe(struct usb_interface *interface,
 	unsigned int minor;
 	int buffer_size;
 	int i;
+	int j;
 	int num_interrupt_in = 0;
 	int num_interrupt_out = 0;
 	int num_bulk_in = 0;
@@ -903,31 +907,39 @@ int usb_serial_probe(struct usb_interface *interface,
 	for (i = 0; i < num_bulk_in; ++i) {
 		endpoint = bulk_in_endpoint[i];
 		port = serial->port[i];
-		port->read_urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!port->read_urb) {
-			dev_err(&interface->dev, "No free urbs available\n");
-			goto probe_error;
-		}
 		buffer_size = max_t(int, serial->type->bulk_in_size,
 				usb_endpoint_maxp(endpoint));
 		port->bulk_in_size = buffer_size;
 		port->bulk_in_endpointAddress = endpoint->bEndpointAddress;
-		port->bulk_in_buffer = kmalloc(buffer_size, GFP_KERNEL);
-		if (!port->bulk_in_buffer) {
-			dev_err(&interface->dev,
+
+		for (j = 0; j < ARRAY_SIZE(port->read_urbs); ++j) {
+			set_bit(j, &port->read_urbs_free);
+			port->read_urbs[j] = usb_alloc_urb(0, GFP_KERNEL);
+			if (!port->read_urbs[j]) {
+				dev_err(&interface->dev,
+						"No free urbs available\n");
+				goto probe_error;
+			}
+			port->bulk_in_buffers[j] = kmalloc(buffer_size,
+								GFP_KERNEL);
+			if (!port->bulk_in_buffers[j]) {
+				dev_err(&interface->dev,
 					"Couldn't allocate bulk_in_buffer\n");
-			goto probe_error;
-		}
-		usb_fill_bulk_urb(port->read_urb, dev,
-				usb_rcvbulkpipe(dev,
+				goto probe_error;
+			}
+			usb_fill_bulk_urb(port->read_urbs[j], dev,
+					usb_rcvbulkpipe(dev,
 						endpoint->bEndpointAddress),
-				port->bulk_in_buffer, buffer_size,
-				serial->type->read_bulk_callback, port);
+					port->bulk_in_buffers[j], buffer_size,
+					serial->type->read_bulk_callback,
+					port);
+		}
+
+		port->read_urb = port->read_urbs[0];
+		port->bulk_in_buffer = port->bulk_in_buffers[0];
 	}
 
 	for (i = 0; i < num_bulk_out; ++i) {
-		int j;
-
 		endpoint = bulk_out_endpoint[i];
 		port = serial->port[i];
 		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 8ccd405e9005..4267a9c717ba 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -58,6 +58,9 @@ enum port_dev_state {
  * @read_urb: pointer to the bulk in struct urb for this port.
  * @bulk_in_endpointAddress: endpoint address for the bulk in pipe for this
  *	port.
+ * @bulk_in_buffers: pointers to the bulk in buffers for this port
+ * @read_urbs: pointers to the bulk in urbs for this port
+ * @read_urbs_free: status bitmap the for bulk in urbs
  * @bulk_out_buffer: pointer to the bulk out buffer for this port.
  * @bulk_out_size: the size of the bulk_out_buffer, in bytes.
  * @write_urb: pointer to the bulk out struct urb for this port.
@@ -98,6 +101,10 @@ struct usb_serial_port {
 	struct urb		*read_urb;
 	__u8			bulk_in_endpointAddress;
 
+	unsigned char		*bulk_in_buffers[2];
+	struct urb		*read_urbs[2];
+	unsigned long		read_urbs_free;
+
 	unsigned char		*bulk_out_buffer;
 	int			bulk_out_size;
 	struct urb		*write_urb;
@@ -338,7 +345,7 @@ extern void usb_serial_generic_disconnect(struct usb_serial *serial);
 extern void usb_serial_generic_release(struct usb_serial *serial);
 extern int usb_serial_generic_register(int debug);
 extern void usb_serial_generic_deregister(void);
-extern int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
+extern int usb_serial_generic_submit_read_urbs(struct usb_serial_port *port,
 						 gfp_t mem_flags);
 extern void usb_serial_generic_process_read_urb(struct urb *urb);
 extern int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port,
-- 
cgit v1.2.3


From 2c4d6bf295ae10ffcd84f0df6cb642598eb66603 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Thu, 10 Nov 2011 14:58:26 +0100
Subject: USB: move usb_translate_errors to linux/usb.h

Move usb_translate_errors from usb core to linux/usb.h as it is meant to
be accessed from drivers.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/usb.h | 14 --------------
 include/linux/usb.h    | 13 +++++++++++++
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 3888778582c4..45e8479c377d 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -132,20 +132,6 @@ static inline int is_usb_device_driver(struct device_driver *drv)
 			for_devices;
 }
 
-/* translate USB error codes to codes user space understands */
-static inline int usb_translate_errors(int error_code)
-{
-	switch (error_code) {
-	case 0:
-	case -ENOMEM:
-	case -ENODEV:
-		return error_code;
-	default:
-		return -EIO;
-	}
-}
-
-
 /* for labeling diagnostics */
 extern const char *usbcore_name;
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index d3d0c1374334..2f05a7fa1ecf 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1598,6 +1598,19 @@ usb_maxpacket(struct usb_device *udev, int pipe, int is_out)
 
 /* ----------------------------------------------------------------------- */
 
+/* translate USB error codes to codes user space understands */
+static inline int usb_translate_errors(int error_code)
+{
+	switch (error_code) {
+	case 0:
+	case -ENOMEM:
+	case -ENODEV:
+		return error_code;
+	default:
+		return -EIO;
+	}
+}
+
 /* Events from the usb core */
 #define USB_DEVICE_ADD		0x0001
 #define USB_DEVICE_REMOVE	0x0002
-- 
cgit v1.2.3


From 64882709ef07f3eae29c7afc5aa8b84d12733a72 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Date: Tue, 15 Nov 2011 11:54:15 +0000
Subject: mdio-gpio: Add reset functionality to mdio-gpio driver(v2).

This patch adds phy reset functionality to mdio-gpio driver. Now
mdio_gpio_platform_data has new member as function pointer which can be
filled at the bsp level for a callback from phy infrastructure. Also the
mdio-bitbang driver fills-in the reset function of mii_bus structure.

Without this patch the bsp level code has to takecare of the reseting
PHY's on the bus, which become bit hacky for every bsp and
phy-infrastructure is ignored aswell.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-bitbang.c | 9 +++++++++
 drivers/net/phy/mdio-gpio.c    | 1 +
 include/linux/mdio-bitbang.h   | 2 ++
 include/linux/mdio-gpio.h      | 2 ++
 4 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c
index 65391891d8c4..daec9b05d168 100644
--- a/drivers/net/phy/mdio-bitbang.c
+++ b/drivers/net/phy/mdio-bitbang.c
@@ -202,6 +202,14 @@ static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
 	return 0;
 }
 
+static int mdiobb_reset(struct mii_bus *bus)
+{
+	struct mdiobb_ctrl *ctrl = bus->priv;
+	if (ctrl->reset)
+		ctrl->reset(bus);
+	return 0;
+}
+
 struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
 {
 	struct mii_bus *bus;
@@ -214,6 +222,7 @@ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
 
 	bus->read = mdiobb_read;
 	bus->write = mdiobb_write;
+	bus->reset = mdiobb_reset;
 	bus->priv = ctrl;
 
 	return bus;
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 2843c90f712f..89c5a3eccc12 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -95,6 +95,7 @@ static struct mii_bus * __devinit mdio_gpio_bus_init(struct device *dev,
 		goto out;
 
 	bitbang->ctrl.ops = &mdio_gpio_ops;
+	bitbang->ctrl.reset = pdata->reset;
 	bitbang->mdc = pdata->mdc;
 	bitbang->mdio = pdata->mdio;
 
diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h
index 0fe00cd4c93c..76f52bbbb2f4 100644
--- a/include/linux/mdio-bitbang.h
+++ b/include/linux/mdio-bitbang.h
@@ -32,6 +32,8 @@ struct mdiobb_ops {
 
 struct mdiobb_ctrl {
 	const struct mdiobb_ops *ops;
+	/* reset callback */
+	int (*reset)(struct mii_bus *bus);
 };
 
 /* The returned bus is not yet registered with the phy layer. */
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
index e9d3fdfe41d7..7c9fe3c2be73 100644
--- a/include/linux/mdio-gpio.h
+++ b/include/linux/mdio-gpio.h
@@ -20,6 +20,8 @@ struct mdio_gpio_platform_data {
 
 	unsigned int phy_mask;
 	int irqs[PHY_MAX_ADDR];
+	/* reset callback */
+	int (*reset)(struct mii_bus *bus);
 };
 
 #endif /* __LINUX_MDIO_GPIO_H */
-- 
cgit v1.2.3


From 027d7dacf73273dbe07a75b2ef5579616f17272c Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 9 Nov 2011 21:33:43 +0100
Subject: TTY: serial, cleanup include file

There are some functions (uart_handle_dcd_change, _handle_cts_change,
_insert_char) which are big enough to not be inlined. So move them
from .h to .c. We need to export them so that modules can actually use
them.

They will be even bigger when we introduce tty refcounting to them.

While at it, cleanup the "Proud member of Uglyhacks'R'US". It means,
define uart_handle_sysrq_char only when SUPPORT_SYSRQ is set.
Otherwise define it as a macro. This is needed for some arm driver
where the second parameter is undefined if expanded.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/serial_core.c | 82 +++++++++++++++++++++++++++++++++
 include/linux/serial_core.h      | 99 +++++-----------------------------------
 2 files changed, 94 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0406d7ff505e..8d825a36c842 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -22,6 +22,7 @@
  */
 #include <linux/module.h>
 #include <linux/tty.h>
+#include <linux/tty_flip.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/console.h>
@@ -2467,6 +2468,87 @@ int uart_match_port(struct uart_port *port1, struct uart_port *port2)
 }
 EXPORT_SYMBOL(uart_match_port);
 
+/**
+ *	uart_handle_dcd_change - handle a change of carrier detect state
+ *	@uport: uart_port structure for the open port
+ *	@status: new carrier detect status, nonzero if active
+ */
+void uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
+{
+	struct uart_state *state = uport->state;
+	struct tty_port *port = &state->port;
+	struct tty_ldisc *ld = tty_ldisc_ref(port->tty);
+	struct pps_event_time ts;
+
+	if (ld && ld->ops->dcd_change)
+		pps_get_ts(&ts);
+
+	uport->icount.dcd++;
+#ifdef CONFIG_HARD_PPS
+	if ((uport->flags & UPF_HARDPPS_CD) && status)
+		hardpps();
+#endif
+
+	if (port->flags & ASYNC_CHECK_CD) {
+		if (status)
+			wake_up_interruptible(&port->open_wait);
+		else if (port->tty)
+			tty_hangup(port->tty);
+	}
+
+	if (ld && ld->ops->dcd_change)
+		ld->ops->dcd_change(port->tty, status, &ts);
+	if (ld)
+		tty_ldisc_deref(ld);
+}
+EXPORT_SYMBOL_GPL(uart_handle_dcd_change);
+
+/**
+ *	uart_handle_cts_change - handle a change of clear-to-send state
+ *	@uport: uart_port structure for the open port
+ *	@status: new clear to send status, nonzero if active
+ */
+void uart_handle_cts_change(struct uart_port *uport, unsigned int status)
+{
+	struct tty_port *port = &uport->state->port;
+	struct tty_struct *tty = port->tty;
+
+	uport->icount.cts++;
+
+	if (port->flags & ASYNC_CTS_FLOW) {
+		if (tty->hw_stopped) {
+			if (status) {
+				tty->hw_stopped = 0;
+				uport->ops->start_tx(uport);
+				uart_write_wakeup(uport);
+			}
+		} else {
+			if (!status) {
+				tty->hw_stopped = 1;
+				uport->ops->stop_tx(uport);
+			}
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(uart_handle_cts_change);
+
+void uart_insert_char(struct uart_port *port, unsigned int status,
+		 unsigned int overrun, unsigned int ch, unsigned int flag)
+{
+	struct tty_struct *tty = port->state->port.tty;
+
+	if ((status & port->ignore_status_mask & ~overrun) == 0)
+		tty_insert_flip_char(tty, ch, flag);
+
+	/*
+	 * Overrun is special.  Since it's reported immediately,
+	 * it doesn't affect the current character.
+	 */
+	if (status & ~port->ignore_status_mask & overrun)
+		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+}
+EXPORT_SYMBOL_GPL(uart_insert_char);
+
 EXPORT_SYMBOL(uart_write_wakeup);
 EXPORT_SYMBOL(uart_register_driver);
 EXPORT_SYMBOL(uart_unregister_driver);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index eadf33d0abba..945e02cae614 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -483,10 +483,19 @@ static inline int uart_tx_stopped(struct uart_port *port)
 /*
  * The following are helper functions for the low level drivers.
  */
+
+extern void uart_handle_dcd_change(struct uart_port *uport,
+		unsigned int status);
+extern void uart_handle_cts_change(struct uart_port *uport,
+		unsigned int status);
+
+extern void uart_insert_char(struct uart_port *port, unsigned int status,
+		 unsigned int overrun, unsigned int ch, unsigned int flag);
+
+#ifdef SUPPORT_SYSRQ
 static inline int
 uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 {
-#ifdef SUPPORT_SYSRQ
 	if (port->sysrq) {
 		if (ch && time_before(jiffies, port->sysrq)) {
 			handle_sysrq(ch);
@@ -495,11 +504,10 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 		}
 		port->sysrq = 0;
 	}
-#endif
 	return 0;
 }
-#ifndef SUPPORT_SYSRQ
-#define uart_handle_sysrq_char(port,ch) uart_handle_sysrq_char(port, 0)
+#else
+#define uart_handle_sysrq_char(port,ch) ({ (void)port; 0; })
 #endif
 
 /*
@@ -522,89 +530,6 @@ static inline int uart_handle_break(struct uart_port *port)
 	return 0;
 }
 
-/**
- *	uart_handle_dcd_change - handle a change of carrier detect state
- *	@uport: uart_port structure for the open port
- *	@status: new carrier detect status, nonzero if active
- */
-static inline void
-uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
-{
-	struct uart_state *state = uport->state;
-	struct tty_port *port = &state->port;
-	struct tty_ldisc *ld = tty_ldisc_ref(port->tty);
-	struct pps_event_time ts;
-
-	if (ld && ld->ops->dcd_change)
-		pps_get_ts(&ts);
-
-	uport->icount.dcd++;
-#ifdef CONFIG_HARD_PPS
-	if ((uport->flags & UPF_HARDPPS_CD) && status)
-		hardpps();
-#endif
-
-	if (port->flags & ASYNC_CHECK_CD) {
-		if (status)
-			wake_up_interruptible(&port->open_wait);
-		else if (port->tty)
-			tty_hangup(port->tty);
-	}
-
-	if (ld && ld->ops->dcd_change)
-		ld->ops->dcd_change(port->tty, status, &ts);
-	if (ld)
-		tty_ldisc_deref(ld);
-}
-
-/**
- *	uart_handle_cts_change - handle a change of clear-to-send state
- *	@uport: uart_port structure for the open port
- *	@status: new clear to send status, nonzero if active
- */
-static inline void
-uart_handle_cts_change(struct uart_port *uport, unsigned int status)
-{
-	struct tty_port *port = &uport->state->port;
-	struct tty_struct *tty = port->tty;
-
-	uport->icount.cts++;
-
-	if (port->flags & ASYNC_CTS_FLOW) {
-		if (tty->hw_stopped) {
-			if (status) {
-				tty->hw_stopped = 0;
-				uport->ops->start_tx(uport);
-				uart_write_wakeup(uport);
-			}
-		} else {
-			if (!status) {
-				tty->hw_stopped = 1;
-				uport->ops->stop_tx(uport);
-			}
-		}
-	}
-}
-
-#include <linux/tty_flip.h>
-
-static inline void
-uart_insert_char(struct uart_port *port, unsigned int status,
-		 unsigned int overrun, unsigned int ch, unsigned int flag)
-{
-	struct tty_struct *tty = port->state->port.tty;
-
-	if ((status & port->ignore_status_mask & ~overrun) == 0)
-		tty_insert_flip_char(tty, ch, flag);
-
-	/*
-	 * Overrun is special.  Since it's reported immediately,
-	 * it doesn't affect the current character.
-	 */
-	if (status & ~port->ignore_status_mask & overrun)
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
-}
-
 /*
  *	UART_ENABLE_MS - determine if port should enable modem status irqs
  */
-- 
cgit v1.2.3


From 72f8c0bfa0de64c68ee59f40eb9b2683bffffbb0 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Tue, 25 Oct 2011 15:16:47 +0200
Subject: lib: devres: add convenience function to remap a resource

Almost every platform_driver does the three steps get_resource,
request_mem_region, ioremap. This does not only lead to a lot of code
duplication, but also a huge number of similar error strings and
inconsistent error codes on failure. So, introduce a helper function
which simplifies remapping a resource and make it hard to do something
wrong and add documentation for it.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/driver-model/devres.txt |  1 +
 include/linux/device.h                |  3 +++
 lib/devres.c                          | 51 +++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index d79aead9418b..10c64c8a13d4 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -262,6 +262,7 @@ IOMAP
   devm_ioremap()
   devm_ioremap_nocache()
   devm_iounmap()
+  devm_request_and_ioremap() : checks resource, requests region, ioremaps
   pcim_iomap()
   pcim_iounmap()
   pcim_iomap_table()	: array of mapped addresses indexed by BAR
diff --git a/include/linux/device.h b/include/linux/device.h
index ffbcf95cd97d..c6335982774c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -490,6 +490,9 @@ extern int devres_release_group(struct device *dev, void *id);
 extern void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp);
 extern void devm_kfree(struct device *dev, void *p);
 
+void __iomem *devm_request_and_ioremap(struct device *dev,
+			struct resource *res);
+
 struct device_dma_parameters {
 	/*
 	 * a low level driver may set these to teach IOMMU code about
diff --git a/lib/devres.c b/lib/devres.c
index 78777aea5b34..4fbc09e6e9e6 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -85,6 +85,57 @@ void devm_iounmap(struct device *dev, void __iomem *addr)
 }
 EXPORT_SYMBOL(devm_iounmap);
 
+/**
+ * devm_request_and_ioremap() - Check, request region, and ioremap resource
+ * @dev: Generic device to handle the resource for
+ * @res: resource to be handled
+ *
+ * Takes all necessary steps to ioremap a mem resource. Uses managed device, so
+ * everything is undone on driver detach. Checks arguments, so you can feed
+ * it the result from e.g. platform_get_resource() directly. Returns the
+ * remapped pointer or NULL on error. Usage example:
+ *
+ *	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ *	base = devm_request_and_ioremap(&pdev->dev, res);
+ *	if (!base)
+ *		return -EADDRNOTAVAIL;
+ */
+void __iomem *devm_request_and_ioremap(struct device *dev,
+			struct resource *res)
+{
+	resource_size_t size;
+	const char *name;
+	void __iomem *dest_ptr;
+
+	BUG_ON(!dev);
+
+	if (!res || resource_type(res) != IORESOURCE_MEM) {
+		dev_err(dev, "invalid resource\n");
+		return NULL;
+	}
+
+	size = resource_size(res);
+	name = res->name ?: dev_name(dev);
+
+	if (!devm_request_mem_region(dev, res->start, size, name)) {
+		dev_err(dev, "can't request region for resource %pR\n", res);
+		return NULL;
+	}
+
+	if (res->flags & IORESOURCE_CACHEABLE)
+		dest_ptr = devm_ioremap(dev, res->start, size);
+	else
+		dest_ptr = devm_ioremap_nocache(dev, res->start, size);
+
+	if (!dest_ptr) {
+		dev_err(dev, "ioremap failed for resource %pR\n", res);
+		devm_release_mem_region(dev, res->start, size);
+	}
+
+	return dest_ptr;
+}
+EXPORT_SYMBOL(devm_request_and_ioremap);
+
 #ifdef CONFIG_HAS_IOPORT
 /*
  * Generic iomap devres
-- 
cgit v1.2.3


From 1933ca8771585d43d3d2099c0c9ba7ca6b96e303 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 15 Nov 2011 15:11:44 -0800
Subject: include/linux/security.h: fix security_inode_init_security() arg

Make the security_inode_init_security() initxattrs arg const, to match the
non-stubbed version of that function.

Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 19d8e04e1688..051d4e96cb1f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2044,7 +2044,7 @@ static inline void security_inode_free(struct inode *inode)
 static inline int security_inode_init_security(struct inode *inode,
 						struct inode *dir,
 						const struct qstr *qstr,
-						initxattrs initxattrs,
+						const initxattrs initxattrs,
 						void *fs_data)
 {
 	return 0;
-- 
cgit v1.2.3


From 720e4616e8fd85284ef1addd8b8d93d8415e8dbc Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 16 Nov 2011 16:28:17 +0100
Subject: regmap: Make reg_config reg_defaults const

The reg_defaults field usually points to a static per driver array, which should
not be modified. Make requirement this explicit by making reg_defaults const.
To allow this the regcache_init code needs some minor changes. Previoulsy the
reg_config was not available in regcache_init and regmap->reg_defaults was used
to pass the default register set to regcache_init. Now that the reg_config is
available we can work on it directly.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regcache.c | 5 ++---
 include/linux/regmap.h         | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 0ad6cfb2c8cc..d687df6ebdb0 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -108,7 +108,6 @@ int regcache_init(struct regmap *map, const struct regmap_config *config)
 		return -EINVAL;
 	}
 
-	map->reg_defaults = config->reg_defaults;
 	map->num_reg_defaults = config->num_reg_defaults;
 	map->num_reg_defaults_raw = config->num_reg_defaults_raw;
 	map->reg_defaults_raw = config->reg_defaults_raw;
@@ -127,10 +126,10 @@ int regcache_init(struct regmap *map, const struct regmap_config *config)
 	 * won't vanish from under us.  We'll need to make
 	 * a copy of it.
 	 */
-	if (map->reg_defaults) {
+	if (config->reg_defaults) {
 		if (!map->num_reg_defaults)
 			return -EINVAL;
-		tmp_buf = kmemdup(map->reg_defaults, map->num_reg_defaults *
+		tmp_buf = kmemdup(config->reg_defaults, map->num_reg_defaults *
 				  sizeof(struct reg_default), GFP_KERNEL);
 		if (!tmp_buf)
 			return -ENOMEM;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 1e4ec2b6c2ea..458f15f4c37c 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -83,7 +83,7 @@ struct regmap_config {
 	bool (*precious_reg)(struct device *dev, unsigned int reg);
 
 	unsigned int max_register;
-	struct reg_default *reg_defaults;
+	const struct reg_default *reg_defaults;
 	unsigned int num_reg_defaults;
 	enum regcache_type cache_type;
 	const void *reg_defaults_raw;
-- 
cgit v1.2.3


From 66846048f55c6c05a4c46c2daabb773173f8f28d Mon Sep 17 00:00:00 2001
From: Rick Jones <rick.jones2@hp.com>
Date: Mon, 14 Nov 2011 14:17:08 +0000
Subject: enable virtio_net to return bus_info in ethtool -i consistent with
 emulated NICs

Add a new .bus_name to virtio_config_ops then modify virtio_net to
call through to it in an ethtool .get_drvinfo routine to report
bus_info in ethtool -i output which is consistent with other
emulated NICs and the output of lspci.

Signed-off-by: Rick Jones <rick.jones2@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/lguest/lguest_device.c |  6 ++++++
 drivers/net/virtio_net.c       | 15 +++++++++++++++
 drivers/s390/kvm/kvm_virtio.c  |  6 ++++++
 drivers/virtio/virtio_mmio.c   |  6 ++++++
 drivers/virtio/virtio_pci.c    |  8 ++++++++
 include/linux/virtio_config.h  | 14 ++++++++++++++
 6 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 0dc30ffde5ad..595d73197016 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -381,6 +381,11 @@ error:
 	return PTR_ERR(vqs[i]);
 }
 
+static const char *lg_bus_name(struct virtio_device *vdev)
+{
+	return "";
+}
+
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
@@ -392,6 +397,7 @@ static struct virtio_config_ops lguest_config_ops = {
 	.reset = lg_reset,
 	.find_vqs = lg_find_vqs,
 	.del_vqs = lg_del_vqs,
+	.bus_name = lg_bus_name,
 };
 
 /*
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6ee8410443c4..4dc9d842a7a3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -39,6 +39,7 @@ module_param(gso, bool, 0444);
 #define GOOD_COPY_LEN	128
 
 #define VIRTNET_SEND_COMMAND_SG_MAX    2
+#define VIRTNET_DRIVER_VERSION "1.0.0"
 
 struct virtnet_stats {
 	struct u64_stats_sync syncp;
@@ -889,7 +890,21 @@ static void virtnet_get_ringparam(struct net_device *dev,
 
 }
 
+
+static void virtnet_get_drvinfo(struct net_device *dev,
+				struct ethtool_drvinfo *info)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtio_device *vdev = vi->vdev;
+
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+	strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
+	strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
+
+}
+
 static const struct ethtool_ops virtnet_ethtool_ops = {
+	.get_drvinfo = virtnet_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = virtnet_get_ringparam,
 };
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 94f49ffa70ba..8af868bab20b 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -263,6 +263,11 @@ error:
 	return PTR_ERR(vqs[i]);
 }
 
+static const char *kvm_bus_name(struct virtio_device *vdev)
+{
+	return "";
+}
+
 /*
  * The config ops structure as defined by virtio config
  */
@@ -276,6 +281,7 @@ static struct virtio_config_ops kvm_vq_configspace_ops = {
 	.reset = kvm_reset,
 	.find_vqs = kvm_find_vqs,
 	.del_vqs = kvm_del_vqs,
+	.bus_name = kvm_bus_name,
 };
 
 /*
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index acc5e43c373e..2f57380d7ed4 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -361,7 +361,12 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 	return 0;
 }
 
+static const char *vm_bus_name(struct virtio_device *vdev)
+{
+	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
 
+	return vm_dev->pdev->name;
+}
 
 static struct virtio_config_ops virtio_mmio_config_ops = {
 	.get		= vm_get,
@@ -373,6 +378,7 @@ static struct virtio_config_ops virtio_mmio_config_ops = {
 	.del_vqs	= vm_del_vqs,
 	.get_features	= vm_get_features,
 	.finalize_features = vm_finalize_features,
+	.bus_name	= vm_bus_name,
 };
 
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 79a31e5b4b68..764ec05ea3e8 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -580,6 +580,13 @@ static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 				  false, false);
 }
 
+static const char *vp_bus_name(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	return pci_name(vp_dev->pci_dev);
+}
+
 static struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
@@ -590,6 +597,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
 	.del_vqs	= vp_del_vqs,
 	.get_features	= vp_get_features,
 	.finalize_features = vp_finalize_features,
+	.bus_name	= vp_bus_name,
 };
 
 static void virtio_pci_release_dev(struct device *_d)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index add4790b21fe..63f98d0a8efa 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -100,6 +100,10 @@
  *	vdev: the virtio_device
  *	This gives the final feature bits for the device: it can change
  *	the dev->feature bits if it wants.
+ * @bus_name: return the bus name associated with the device
+ *	vdev: the virtio_device
+ *      This returns a pointer to the bus name a la pci_name from which
+ *      the caller can then copy.
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -117,6 +121,7 @@ struct virtio_config_ops {
 	void (*del_vqs)(struct virtio_device *);
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
+	const char *(*bus_name)(struct virtio_device *vdev);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
@@ -182,5 +187,14 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 		return ERR_PTR(err);
 	return vq;
 }
+
+static inline
+const char *virtio_bus_name(struct virtio_device *vdev)
+{
+	if (!vdev->config->bus_name)
+		return "virtio";
+	return vdev->config->bus_name(vdev);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_CONFIG_H */
-- 
cgit v1.2.3


From 7845bc3964756240863ae453ffe4f7ee27ddc954 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 16 Nov 2011 11:15:54 +0000
Subject: KEYS: Give key types their own lockdep class for key->sem

Give keys their own lockdep class to differentiate them from each other in case
a key of one type has to refer to a key of another type.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key-type.h | 1 +
 security/keys/key.c      | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 9efd081bb31e..39e3c082c49d 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -92,6 +92,7 @@ struct key_type {
 
 	/* internal fields */
 	struct list_head	link;		/* link in types list */
+	struct lock_class_key	lock_class;	/* key->sem lock class */
 };
 
 extern struct key_type key_type_keyring;
diff --git a/security/keys/key.c b/security/keys/key.c
index 4414abddcb5b..4f64c7267afb 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -291,6 +291,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 
 	atomic_set(&key->usage, 1);
 	init_rwsem(&key->sem);
+	lockdep_set_class(&key->sem, &type->lock_class);
 	key->type = type;
 	key->user = user;
 	key->quotalen = quotalen;
@@ -946,6 +947,8 @@ int register_key_type(struct key_type *ktype)
 	struct key_type *p;
 	int ret;
 
+	memset(&ktype->lock_class, 0, sizeof(ktype->lock_class));
+
 	ret = -EEXIST;
 	down_write(&key_types_sem);
 
-- 
cgit v1.2.3


From bc5787c6125cc2c868eaace46c46ce6e83dcfcb6 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: remove legacy ethtool ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As all drivers are converted, we may now remove discrete offload setting
callback handling.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h   |  53 ------
 include/linux/netdevice.h |  16 --
 net/8021q/vlan_dev.c      |   6 +-
 net/core/dev.c            |  12 +-
 net/core/ethtool.c        | 418 +++-------------------------------------------
 5 files changed, 26 insertions(+), 479 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index de33de1e2052..20db5b275c3f 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -724,9 +724,6 @@ enum ethtool_sfeatures_retval_bits {
 
 #include <linux/rculist.h>
 
-/* needed by dev_disable_lro() */
-extern int __ethtool_set_flags(struct net_device *dev, u32 flags);
-
 extern int __ethtool_get_settings(struct net_device *dev,
 				  struct ethtool_cmd *cmd);
 
@@ -750,19 +747,6 @@ struct net_device;
 
 /* Some generic methods drivers may use in their ethtool_ops */
 u32 ethtool_op_get_link(struct net_device *dev);
-u32 ethtool_op_get_tx_csum(struct net_device *dev);
-int ethtool_op_set_tx_csum(struct net_device *dev, u32 data);
-int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data);
-int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data);
-u32 ethtool_op_get_sg(struct net_device *dev);
-int ethtool_op_set_sg(struct net_device *dev, u32 data);
-u32 ethtool_op_get_tso(struct net_device *dev);
-int ethtool_op_set_tso(struct net_device *dev, u32 data);
-u32 ethtool_op_get_ufo(struct net_device *dev);
-int ethtool_op_set_ufo(struct net_device *dev, u32 data);
-u32 ethtool_op_get_flags(struct net_device *dev);
-int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported);
-bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 
 /**
  * struct ethtool_ops - optional netdev operations
@@ -807,22 +791,6 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  * @get_pauseparam: Report pause parameters
  * @set_pauseparam: Set pause parameters.  Returns a negative error code
  *	or zero.
- * @get_rx_csum: Deprecated in favour of the netdev feature %NETIF_F_RXCSUM.
- *	Report whether receive checksums are turned on or off.
- * @set_rx_csum: Deprecated in favour of generic netdev features.  Turn
- *	receive checksum on or off.  Returns a negative error code or zero.
- * @get_tx_csum: Deprecated as redundant. Report whether transmit checksums
- *	are turned on or off.
- * @set_tx_csum: Deprecated in favour of generic netdev features.  Turn
- *	transmit checksums on or off.  Returns a negative error code or zero.
- * @get_sg: Deprecated as redundant.  Report whether scatter-gather is
- *	enabled.  
- * @set_sg: Deprecated in favour of generic netdev features.  Turn
- *	scatter-gather on or off. Returns a negative error code or zero.
- * @get_tso: Deprecated as redundant.  Report whether TCP segmentation
- *	offload is enabled.
- * @set_tso: Deprecated in favour of generic netdev features.  Turn TCP
- *	segmentation offload on or off.  Returns a negative error code or zero.
  * @self_test: Run specified self-tests
  * @get_strings: Return a set of strings that describe the requested objects
  * @set_phys_id: Identify the physical devices, e.g. by flashing an LED
@@ -844,15 +812,6 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	negative error code or zero.
  * @complete: Function to be called after any other operation except
  *	@begin.  Will be called even if the other operation failed.
- * @get_ufo: Deprecated as redundant.  Report whether UDP fragmentation
- *	offload is enabled.
- * @set_ufo: Deprecated in favour of generic netdev features.  Turn UDP
- *	fragmentation offload on or off.  Returns a negative error code or zero.
- * @get_flags: Deprecated as redundant.  Report features included in
- *	&enum ethtool_flags that are enabled.  
- * @set_flags: Deprecated in favour of generic netdev features.  Turn
- *	features included in &enum ethtool_flags on or off.  Returns a
- *	negative error code or zero.
  * @get_priv_flags: Report driver-specific feature flags.
  * @set_priv_flags: Set driver-specific feature flags.  Returns a negative
  *	error code or zero.
@@ -917,14 +876,6 @@ struct ethtool_ops {
 				  struct ethtool_pauseparam*);
 	int	(*set_pauseparam)(struct net_device *,
 				  struct ethtool_pauseparam*);
-	u32	(*get_rx_csum)(struct net_device *);
-	int	(*set_rx_csum)(struct net_device *, u32);
-	u32	(*get_tx_csum)(struct net_device *);
-	int	(*set_tx_csum)(struct net_device *, u32);
-	u32	(*get_sg)(struct net_device *);
-	int	(*set_sg)(struct net_device *, u32);
-	u32	(*get_tso)(struct net_device *);
-	int	(*set_tso)(struct net_device *, u32);
 	void	(*self_test)(struct net_device *, struct ethtool_test *, u64 *);
 	void	(*get_strings)(struct net_device *, u32 stringset, u8 *);
 	int	(*set_phys_id)(struct net_device *, enum ethtool_phys_id_state);
@@ -932,10 +883,6 @@ struct ethtool_ops {
 				     struct ethtool_stats *, u64 *);
 	int	(*begin)(struct net_device *);
 	void	(*complete)(struct net_device *);
-	u32	(*get_ufo)(struct net_device *);
-	int	(*set_ufo)(struct net_device *, u32);
-	u32	(*get_flags)(struct net_device *);
-	int	(*set_flags)(struct net_device *, u32);
 	u32	(*get_priv_flags)(struct net_device *);
 	int	(*set_priv_flags)(struct net_device *, u32);
 	int	(*get_sset_count)(struct net_device *, int);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cbeb5867cff7..e34717a792b4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2592,22 +2592,6 @@ static inline int netif_is_bond_slave(struct net_device *dev)
 
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
-static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev)
-{
-	if (dev->features & NETIF_F_RXCSUM)
-		return 1;
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_rx_csum)
-		return 0;
-	return dev->ethtool_ops->get_rx_csum(dev);
-}
-
-static inline u32 dev_ethtool_get_flags(struct net_device *dev)
-{
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_flags)
-		return 0;
-	return dev->ethtool_ops->get_flags(dev);
-}
-
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* netdev_printk helpers, similar to dev_printk */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index bc2528624583..6a4e0cb897b7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -596,13 +596,11 @@ static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
 	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 	u32 old_features = features;
 
-	features &= real_dev->features;
 	features &= real_dev->vlan_features;
+	features |= NETIF_F_RXCSUM;
+	features &= real_dev->features;
 
 	features |= old_features & NETIF_F_SOFT_FEATURES;
-
-	if (dev_ethtool_get_rx_csum(real_dev))
-		features |= NETIF_F_RXCSUM;
 	features |= NETIF_F_LLTX;
 
 	return features;
diff --git a/net/core/dev.c b/net/core/dev.c
index 51f89cd0a3f4..185e246d61fd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1321,8 +1321,6 @@ EXPORT_SYMBOL(dev_close);
  */
 void dev_disable_lro(struct net_device *dev)
 {
-	u32 flags;
-
 	/*
 	 * If we're trying to disable lro on a vlan device
 	 * use the underlying physical device instead
@@ -1330,15 +1328,9 @@ void dev_disable_lro(struct net_device *dev)
 	if (is_vlan_dev(dev))
 		dev = vlan_dev_real_dev(dev);
 
-	if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
-		flags = dev->ethtool_ops->get_flags(dev);
-	else
-		flags = ethtool_op_get_flags(dev);
-
-	if (!(flags & ETH_FLAG_LRO))
-		return;
+	dev->wanted_features &= ~NETIF_F_LRO;
+	netdev_update_features(dev);
 
-	__ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
 	if (unlikely(dev->features & NETIF_F_LRO))
 		netdev_WARN(dev, "failed to disable LRO!\n");
 }
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f44481707124..db8a77bb557b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -36,236 +36,10 @@ u32 ethtool_op_get_link(struct net_device *dev)
 }
 EXPORT_SYMBOL(ethtool_op_get_link);
 
-u32 ethtool_op_get_tx_csum(struct net_device *dev)
-{
-	return (dev->features & NETIF_F_ALL_CSUM) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_tx_csum);
-
-int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_IP_CSUM;
-	else
-		dev->features &= ~NETIF_F_IP_CSUM;
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-
-int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_HW_CSUM;
-	else
-		dev->features &= ~NETIF_F_HW_CSUM;
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-
-int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-	else
-		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-
-u32 ethtool_op_get_sg(struct net_device *dev)
-{
-	return (dev->features & NETIF_F_SG) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_sg);
-
-int ethtool_op_set_sg(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_SG;
-	else
-		dev->features &= ~NETIF_F_SG;
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_sg);
-
-u32 ethtool_op_get_tso(struct net_device *dev)
-{
-	return (dev->features & NETIF_F_TSO) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_tso);
-
-int ethtool_op_set_tso(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_TSO;
-	else
-		dev->features &= ~NETIF_F_TSO;
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tso);
-
-u32 ethtool_op_get_ufo(struct net_device *dev)
-{
-	return (dev->features & NETIF_F_UFO) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-
-int ethtool_op_set_ufo(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_UFO;
-	else
-		dev->features &= ~NETIF_F_UFO;
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-
-/* the following list of flags are the same as their associated
- * NETIF_F_xxx values in include/linux/netdevice.h
- */
-static const u32 flags_dup_features =
-	(ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
-	 ETH_FLAG_RXHASH);
-
-u32 ethtool_op_get_flags(struct net_device *dev)
-{
-	/* in the future, this function will probably contain additional
-	 * handling for flags which are not so easily handled
-	 * by a simple masking operation
-	 */
-
-	return dev->features & flags_dup_features;
-}
-EXPORT_SYMBOL(ethtool_op_get_flags);
-
-/* Check if device can enable (or disable) particular feature coded in "data"
- * argument. Flags "supported" describe features that can be toggled by device.
- * If feature can not be toggled, it state (enabled or disabled) must match
- * hardcoded device features state, otherwise flags are marked as invalid.
- */
-bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
-{
-	u32 features = dev->features & flags_dup_features;
-	/* "data" can contain only flags_dup_features bits,
-	 * see __ethtool_set_flags */
-
-	return (features & ~supported) != (data & ~supported);
-}
-EXPORT_SYMBOL(ethtool_invalid_flags);
-
-int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
-{
-	if (ethtool_invalid_flags(dev, data, supported))
-		return -EINVAL;
-
-	dev->features = ((dev->features & ~flags_dup_features) |
-			 (data & flags_dup_features));
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_flags);
-
 /* Handlers for each ethtool command */
 
 #define ETHTOOL_DEV_FEATURE_WORDS	1
 
-static void ethtool_get_features_compat(struct net_device *dev,
-	struct ethtool_get_features_block *features)
-{
-	if (!dev->ethtool_ops)
-		return;
-
-	/* getting RX checksum */
-	if (dev->ethtool_ops->get_rx_csum)
-		if (dev->ethtool_ops->get_rx_csum(dev))
-			features[0].active |= NETIF_F_RXCSUM;
-
-	/* mark legacy-changeable features */
-	if (dev->ethtool_ops->set_sg)
-		features[0].available |= NETIF_F_SG;
-	if (dev->ethtool_ops->set_tx_csum)
-		features[0].available |= NETIF_F_ALL_CSUM;
-	if (dev->ethtool_ops->set_tso)
-		features[0].available |= NETIF_F_ALL_TSO;
-	if (dev->ethtool_ops->set_rx_csum)
-		features[0].available |= NETIF_F_RXCSUM;
-	if (dev->ethtool_ops->set_flags)
-		features[0].available |= flags_dup_features;
-}
-
-static int ethtool_set_feature_compat(struct net_device *dev,
-	int (*legacy_set)(struct net_device *, u32),
-	struct ethtool_set_features_block *features, u32 mask)
-{
-	u32 do_set;
-
-	if (!legacy_set)
-		return 0;
-
-	if (!(features[0].valid & mask))
-		return 0;
-
-	features[0].valid &= ~mask;
-
-	do_set = !!(features[0].requested & mask);
-
-	if (legacy_set(dev, do_set) < 0)
-		netdev_info(dev,
-			"Legacy feature change (%s) failed for 0x%08x\n",
-			do_set ? "set" : "clear", mask);
-
-	return 1;
-}
-
-static int ethtool_set_flags_compat(struct net_device *dev,
-	int (*legacy_set)(struct net_device *, u32),
-	struct ethtool_set_features_block *features, u32 mask)
-{
-	u32 value;
-
-	if (!legacy_set)
-		return 0;
-
-	if (!(features[0].valid & mask))
-		return 0;
-
-	value = dev->features & ~features[0].valid;
-	value |= features[0].requested;
-
-	features[0].valid &= ~mask;
-
-	if (legacy_set(dev, value & mask) < 0)
-		netdev_info(dev, "Legacy flags change failed\n");
-
-	return 1;
-}
-
-static int ethtool_set_features_compat(struct net_device *dev,
-	struct ethtool_set_features_block *features)
-{
-	int compat;
-
-	if (!dev->ethtool_ops)
-		return 0;
-
-	compat  = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg,
-		features, NETIF_F_SG);
-	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum,
-		features, NETIF_F_ALL_CSUM);
-	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso,
-		features, NETIF_F_ALL_TSO);
-	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
-		features, NETIF_F_RXCSUM);
-	compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags,
-		features, flags_dup_features);
-
-	return compat;
-}
-
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_gfeatures cmd = {
@@ -283,8 +57,6 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
 	u32 __user *sizeaddr;
 	u32 copy_size;
 
-	ethtool_get_features_compat(dev, features);
-
 	sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
 	if (get_user(copy_size, sizeaddr))
 		return -EFAULT;
@@ -320,9 +92,6 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
 	if (features[0].valid & ~NETIF_F_ETHTOOL_BITS)
 		return -EINVAL;
 
-	if (ethtool_set_features_compat(dev, features))
-		ret |= ETHTOOL_F_COMPAT;
-
 	if (features[0].valid & ~dev->hw_features) {
 		features[0].valid &= dev->hw_features;
 		ret |= ETHTOOL_F_UNSUPPORTED;
@@ -433,34 +202,6 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd)
 	}
 }
 
-static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd)
-{
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops)
-		return NULL;
-
-	switch (ethcmd) {
-	case ETHTOOL_GTXCSUM:
-		return ops->get_tx_csum;
-	case ETHTOOL_GRXCSUM:
-		return ops->get_rx_csum;
-	case ETHTOOL_SSG:
-		return ops->get_sg;
-	case ETHTOOL_STSO:
-		return ops->get_tso;
-	case ETHTOOL_SUFO:
-		return ops->get_ufo;
-	default:
-		return NULL;
-	}
-}
-
-static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev)
-{
-	return !!(dev->features & NETIF_F_ALL_CSUM);
-}
-
 static int ethtool_get_one_feature(struct net_device *dev,
 	char __user *useraddr, u32 ethcmd)
 {
@@ -470,31 +211,11 @@ static int ethtool_get_one_feature(struct net_device *dev,
 		.data = !!(dev->features & mask),
 	};
 
-	/* compatibility with discrete get_ ops */
-	if (!(dev->hw_features & mask)) {
-		u32 (*actor)(struct net_device *);
-
-		actor = __ethtool_get_one_feature_actor(dev, ethcmd);
-
-		/* bug compatibility with old get_rx_csum */
-		if (ethcmd == ETHTOOL_GRXCSUM && !actor)
-			actor = __ethtool_get_rx_csum_oldbug;
-
-		if (actor)
-			edata.data = actor(dev);
-	}
-
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
 		return -EFAULT;
 	return 0;
 }
 
-static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
-static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
-static int __ethtool_set_sg(struct net_device *dev, u32 data);
-static int __ethtool_set_tso(struct net_device *dev, u32 data);
-static int __ethtool_set_ufo(struct net_device *dev, u32 data);
-
 static int ethtool_set_one_feature(struct net_device *dev,
 	void __user *useraddr, u32 ethcmd)
 {
@@ -506,56 +227,38 @@ static int ethtool_set_one_feature(struct net_device *dev,
 
 	mask = ethtool_get_feature_mask(ethcmd);
 	mask &= dev->hw_features;
-	if (mask) {
-		if (edata.data)
-			dev->wanted_features |= mask;
-		else
-			dev->wanted_features &= ~mask;
+	if (!mask)
+		return -EOPNOTSUPP;
 
-		__netdev_update_features(dev);
-		return 0;
-	}
+	if (edata.data)
+		dev->wanted_features |= mask;
+	else
+		dev->wanted_features &= ~mask;
 
-	/* Driver is not converted to ndo_fix_features or does not
-	 * support changing this offload. In the latter case it won't
-	 * have corresponding ethtool_ops field set.
-	 *
-	 * Following part is to be removed after all drivers advertise
-	 * their changeable features in netdev->hw_features and stop
-	 * using discrete offload setting ops.
-	 */
+	__netdev_update_features(dev);
 
-	switch (ethcmd) {
-	case ETHTOOL_STXCSUM:
-		return __ethtool_set_tx_csum(dev, edata.data);
-	case ETHTOOL_SRXCSUM:
-		return __ethtool_set_rx_csum(dev, edata.data);
-	case ETHTOOL_SSG:
-		return __ethtool_set_sg(dev, edata.data);
-	case ETHTOOL_STSO:
-		return __ethtool_set_tso(dev, edata.data);
-	case ETHTOOL_SUFO:
-		return __ethtool_set_ufo(dev, edata.data);
-	default:
-		return -EOPNOTSUPP;
-	}
+	return 0;
+}
+
+/* the following list of flags are the same as their associated
+ * NETIF_F_xxx values in include/linux/netdevice.h
+ */
+static const u32 flags_dup_features =
+	(ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
+	 ETH_FLAG_RXHASH);
+
+static u32 __ethtool_get_flags(struct net_device *dev)
+{
+	return dev->features & flags_dup_features;
 }
 
-int __ethtool_set_flags(struct net_device *dev, u32 data)
+static int __ethtool_set_flags(struct net_device *dev, u32 data)
 {
 	u32 changed;
 
 	if (data & ~flags_dup_features)
 		return -EINVAL;
 
-	/* legacy set_flags() op */
-	if (dev->ethtool_ops->set_flags) {
-		if (unlikely(dev->hw_features & flags_dup_features))
-			netdev_warn(dev,
-				"driver BUG: mixed hw_features and set_flags()\n");
-		return dev->ethtool_ops->set_flags(dev, data);
-	}
-
 	/* allow changing only bits set in hw_features */
 	changed = (data ^ dev->features) & flags_dup_features;
 	if (changed & ~dev->hw_features)
@@ -1231,81 +934,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
 }
 
-static int __ethtool_set_sg(struct net_device *dev, u32 data)
-{
-	int err;
-
-	if (!dev->ethtool_ops->set_sg)
-		return -EOPNOTSUPP;
-
-	if (data && !(dev->features & NETIF_F_ALL_CSUM))
-		return -EINVAL;
-
-	if (!data && dev->ethtool_ops->set_tso) {
-		err = dev->ethtool_ops->set_tso(dev, 0);
-		if (err)
-			return err;
-	}
-
-	if (!data && dev->ethtool_ops->set_ufo) {
-		err = dev->ethtool_ops->set_ufo(dev, 0);
-		if (err)
-			return err;
-	}
-	return dev->ethtool_ops->set_sg(dev, data);
-}
-
-static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
-{
-	int err;
-
-	if (!dev->ethtool_ops->set_tx_csum)
-		return -EOPNOTSUPP;
-
-	if (!data && dev->ethtool_ops->set_sg) {
-		err = __ethtool_set_sg(dev, 0);
-		if (err)
-			return err;
-	}
-
-	return dev->ethtool_ops->set_tx_csum(dev, data);
-}
-
-static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
-{
-	if (!dev->ethtool_ops->set_rx_csum)
-		return -EOPNOTSUPP;
-
-	if (!data)
-		dev->features &= ~NETIF_F_GRO;
-
-	return dev->ethtool_ops->set_rx_csum(dev, data);
-}
-
-static int __ethtool_set_tso(struct net_device *dev, u32 data)
-{
-	if (!dev->ethtool_ops->set_tso)
-		return -EOPNOTSUPP;
-
-	if (data && !(dev->features & NETIF_F_SG))
-		return -EINVAL;
-
-	return dev->ethtool_ops->set_tso(dev, data);
-}
-
-static int __ethtool_set_ufo(struct net_device *dev, u32 data)
-{
-	if (!dev->ethtool_ops->set_ufo)
-		return -EOPNOTSUPP;
-	if (data && !(dev->features & NETIF_F_SG))
-		return -EINVAL;
-	if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
-		(dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
-			== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
-		return -EINVAL;
-	return dev->ethtool_ops->set_ufo(dev, data);
-}
-
 static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_test test;
@@ -1771,9 +1399,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		break;
 	case ETHTOOL_GFLAGS:
 		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       (dev->ethtool_ops->get_flags ?
-					dev->ethtool_ops->get_flags :
-					ethtool_op_get_flags));
+					__ethtool_get_flags);
 		break;
 	case ETHTOOL_SFLAGS:
 		rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
-- 
cgit v1.2.3


From a59e2ecb859f2ab03bb2e230709f8039472ad2c3 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: split netdev features to separate header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move features definitions to separate header so that linux/skbuff.h won't
need to include linux/netdevice.h after netdev_features_t is introduced.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 90 +++++++++++++++++++++++++++++++++++++++++
 include/linux/netdevice.h       | 80 +-----------------------------------
 2 files changed, 92 insertions(+), 78 deletions(-)
 create mode 100644 include/linux/netdev_features.h

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
new file mode 100644
index 000000000000..32640edf4d78
--- /dev/null
+++ b/include/linux/netdev_features.h
@@ -0,0 +1,90 @@
+/*
+ * Network device features.
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_NETDEV_FEATURES_H
+#define _LINUX_NETDEV_FEATURES_H
+
+/* Net device feature bits; if you change something,
+ * also update netdev_features_strings[] in ethtool.c */
+
+#define NETIF_F_SG		1	/* Scatter/gather IO. */
+#define NETIF_F_IP_CSUM		2	/* Can checksum TCP/UDP over IPv4. */
+#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
+#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
+#define NETIF_F_IPV6_CSUM	16	/* Can checksum TCP/UDP over IPV6 */
+#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
+#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
+#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
+#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
+#define NETIF_F_GSO		2048	/* Enable software GSO. */
+#define NETIF_F_LLTX		4096	/* LockLess TX - deprecated. Please */
+					/* do not use LLTX in new drivers */
+#define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
+#define NETIF_F_GRO		16384	/* Generic receive offload */
+#define NETIF_F_LRO		32768	/* large receive offload */
+
+/* the GSO_MASK reserves bits 16 through 23 */
+#define NETIF_F_FCOE_CRC	(1 << 24) /* FCoE CRC32 */
+#define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
+#define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
+#define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
+#define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
+#define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
+#define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
+#define NETIF_F_LOOPBACK	(1 << 31) /* Enable loopback */
+
+/* Segmentation offload features */
+#define NETIF_F_GSO_SHIFT	16
+#define NETIF_F_GSO_MASK	0x00ff0000
+#define NETIF_F_TSO		(SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
+#define NETIF_F_UFO		(SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
+#define NETIF_F_GSO_ROBUST	(SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
+#define NETIF_F_TSO_ECN		(SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
+#define NETIF_F_TSO6		(SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
+#define NETIF_F_FSO		(SKB_GSO_FCOE << NETIF_F_GSO_SHIFT)
+
+/* Features valid for ethtool to change */
+/* = all defined minus driver/device-class-related */
+#define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
+				 NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
+#define NETIF_F_ETHTOOL_BITS	(0xff3fffff & ~NETIF_F_NEVER_CHANGE)
+
+/* List of features with software fallbacks. */
+#define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
+				 NETIF_F_TSO6 | NETIF_F_UFO)
+
+#define NETIF_F_GEN_CSUM	(NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)
+#define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
+#define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
+#define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
+
+#define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
+
+#define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
+				 NETIF_F_FSO)
+
+/*
+ * If one device supports one of these features, then enable them
+ * for all in netdev_increment_features.
+ */
+#define NETIF_F_ONE_FOR_ALL	(NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
+				 NETIF_F_SG | NETIF_F_HIGHDMA |		\
+				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED)
+/*
+ * If one device doesn't support one of these features, then disable it
+ * for all in netdev_increment_features.
+ */
+#define NETIF_F_ALL_FOR_ALL	(NETIF_F_NOCACHE_COPY | NETIF_F_FSO)
+
+/* changeable features with no special hardware requirements */
+#define NETIF_F_SOFT_FEATURES	(NETIF_F_GSO | NETIF_F_GRO)
+
+#endif	/* _LINUX_NETDEV_FEATURES_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e34717a792b4..9cf6e90b171d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -51,6 +51,8 @@
 #include <net/dcbnl.h>
 #endif
 
+#include <linux/netdev_features.h>
+
 struct vlan_group;
 struct netpoll_info;
 struct phy_device;
@@ -1005,84 +1007,6 @@ struct net_device {
 	/* mask of features inheritable by VLAN devices */
 	u32			vlan_features;
 
-	/* Net device feature bits; if you change something,
-	 * also update netdev_features_strings[] in ethtool.c */
-
-#define NETIF_F_SG		1	/* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM		2	/* Can checksum TCP/UDP over IPv4. */
-#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
-#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
-#define NETIF_F_IPV6_CSUM	16	/* Can checksum TCP/UDP over IPV6 */
-#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
-#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
-#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
-#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
-#define NETIF_F_GSO		2048	/* Enable software GSO. */
-#define NETIF_F_LLTX		4096	/* LockLess TX - deprecated. Please */
-					/* do not use LLTX in new drivers */
-#define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
-#define NETIF_F_GRO		16384	/* Generic receive offload */
-#define NETIF_F_LRO		32768	/* large receive offload */
-
-/* the GSO_MASK reserves bits 16 through 23 */
-#define NETIF_F_FCOE_CRC	(1 << 24) /* FCoE CRC32 */
-#define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
-#define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
-#define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
-#define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
-#define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
-#define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
-#define NETIF_F_LOOPBACK	(1 << 31) /* Enable loopback */
-
-	/* Segmentation offload features */
-#define NETIF_F_GSO_SHIFT	16
-#define NETIF_F_GSO_MASK	0x00ff0000
-#define NETIF_F_TSO		(SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
-#define NETIF_F_UFO		(SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
-#define NETIF_F_GSO_ROBUST	(SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
-#define NETIF_F_TSO_ECN		(SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
-#define NETIF_F_TSO6		(SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
-#define NETIF_F_FSO		(SKB_GSO_FCOE << NETIF_F_GSO_SHIFT)
-
-	/* Features valid for ethtool to change */
-	/* = all defined minus driver/device-class-related */
-#define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
-				  NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
-#define NETIF_F_ETHTOOL_BITS	(0xff3fffff & ~NETIF_F_NEVER_CHANGE)
-
-	/* List of features with software fallbacks. */
-#define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
-				 NETIF_F_TSO6 | NETIF_F_UFO)
-
-
-#define NETIF_F_GEN_CSUM	(NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
-#define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
-#define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
-#define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
-
-#define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
-
-#define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
-				 NETIF_F_FSO)
-
-	/*
-	 * If one device supports one of these features, then enable them
-	 * for all in netdev_increment_features.
-	 */
-#define NETIF_F_ONE_FOR_ALL	(NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
-				 NETIF_F_SG | NETIF_F_HIGHDMA |		\
-				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED)
-	/*
-	 * If one device doesn't support one of these features, then disable it
-	 * for all in netdev_increment_features.
-	 */
-#define NETIF_F_ALL_FOR_ALL	(NETIF_F_NOCACHE_COPY | NETIF_F_FSO)
-
-	/* changeable features with no special hardware requirements */
-#define NETIF_F_SOFT_FEATURES	(NETIF_F_GSO | NETIF_F_GRO)
-
 	/* Interface index. Unique device identifier	*/
 	int			ifindex;
 	int			iflink;
-- 
cgit v1.2.3


From c8f44affb7244f2ac3e703cab13d55ede27621bb Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: introduce and use netdev_features_t for device features sets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:	add couple missing conversions in drivers
	split unexporting netdev_fix_features()
	implemented %pNF
	convert sock::sk_route_(no?)caps

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c                    |  9 ++---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c    | 13 ++++---
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c    | 13 ++++---
 drivers/net/ethernet/atheros/atlx/atl2.c           | 13 ++++---
 drivers/net/ethernet/atheros/atlx/atlx.c           | 13 ++++---
 drivers/net/ethernet/broadcom/bnx2.c               |  6 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  5 +--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h    |  5 +--
 drivers/net/ethernet/broadcom/tg3.c                | 11 +++---
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c          |  7 ++--
 drivers/net/ethernet/chelsio/cxgb/sge.c            |  2 +-
 drivers/net/ethernet/chelsio/cxgb/sge.h            |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c    |  9 ++---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 12 ++++---
 .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c    |  8 +++--
 drivers/net/ethernet/davicom/dm9000.c              |  5 +--
 drivers/net/ethernet/freescale/gianfar.c           |  2 +-
 drivers/net/ethernet/freescale/gianfar.h           |  4 +--
 drivers/net/ethernet/freescale/gianfar_ethtool.c   |  4 +--
 drivers/net/ethernet/ibm/ibmveth.c                 |  6 ++--
 drivers/net/ethernet/intel/e1000/e1000_main.c      | 14 +++++---
 drivers/net/ethernet/intel/e1000e/netdev.c         |  5 +--
 drivers/net/ethernet/intel/igb/igb_main.c          | 12 ++++---
 drivers/net/ethernet/intel/igbvf/netdev.c          |  3 +-
 drivers/net/ethernet/intel/ixgb/ixgb_main.c        |  8 ++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      |  6 ++--
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c  |  3 +-
 drivers/net/ethernet/jme.c                         |  8 ++---
 drivers/net/ethernet/marvell/mv643xx_eth.c         |  4 +--
 drivers/net/ethernet/marvell/sky2.c                | 13 +++----
 drivers/net/ethernet/micrel/ksz884x.c              |  3 +-
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c   |  5 +--
 drivers/net/ethernet/neterion/s2io.c               |  4 +--
 drivers/net/ethernet/neterion/vxge/vxge-main.c     |  9 ++---
 drivers/net/ethernet/nvidia/forcedeth.c            | 11 +++---
 .../net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c   |  5 +--
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |  6 ++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h        |  5 +--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c     |  9 ++---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |  2 +-
 drivers/net/ethernet/qlogic/qlge/qlge_main.c       | 10 +++---
 drivers/net/ethernet/realtek/8139cp.c              |  2 +-
 drivers/net/ethernet/realtek/r8169.c               |  6 ++--
 drivers/net/ethernet/sfc/efx.c                     |  2 +-
 drivers/net/ethernet/sfc/net_driver.h              |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  3 +-
 drivers/net/tun.c                                  |  7 ++--
 drivers/net/usb/smsc75xx.c                         |  3 +-
 drivers/net/usb/smsc95xx.c                         |  3 +-
 drivers/net/vmxnet3/vmxnet3_ethtool.c              |  4 +--
 drivers/net/vmxnet3/vmxnet3_int.h                  |  2 +-
 drivers/net/xen-netback/interface.c                |  3 +-
 drivers/net/xen-netfront.c                         |  8 +++--
 drivers/s390/net/qeth_l3_main.c                    |  6 ++--
 include/linux/netdev_features.h                    |  4 +++
 include/linux/netdevice.h                          | 41 ++++++++++++----------
 include/linux/skbuff.h                             |  4 ++-
 include/net/protocol.h                             |  4 +--
 include/net/sock.h                                 |  6 ++--
 include/net/tcp.h                                  |  3 +-
 include/net/udp.h                                  |  3 +-
 lib/vsprintf.c                                     | 19 ++++++++++
 net/8021q/vlan_dev.c                               |  3 +-
 net/bridge/br_device.c                             |  3 +-
 net/bridge/br_if.c                                 |  5 +--
 net/bridge/br_private.h                            |  3 +-
 net/core/dev.c                                     | 38 +++++++++++---------
 net/core/ethtool.c                                 |  9 +++--
 net/core/skbuff.c                                  |  2 +-
 net/ipv4/af_inet.c                                 |  3 +-
 net/ipv4/tcp.c                                     |  3 +-
 net/ipv4/udp.c                                     |  3 +-
 net/ipv6/af_inet6.c                                |  3 +-
 net/ipv6/udp.c                                     |  3 +-
 74 files changed, 305 insertions(+), 202 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b0c577256487..ac5337a04639 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1325,11 +1325,12 @@ static int bond_sethwaddr(struct net_device *bond_dev,
 	return 0;
 }
 
-static u32 bond_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t bond_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct slave *slave;
 	struct bonding *bond = netdev_priv(dev);
-	u32 mask;
+	netdev_features_t mask;
 	int i;
 
 	read_lock(&bond->lock);
@@ -1363,7 +1364,7 @@ static void bond_compute_features(struct bonding *bond)
 {
 	struct slave *slave;
 	struct net_device *bond_dev = bond->dev;
-	u32 vlan_features = BOND_VLAN_FEATURES;
+	netdev_features_t vlan_features = BOND_VLAN_FEATURES;
 	unsigned short max_hard_header_len = ETH_HLEN;
 	int i;
 
@@ -1897,7 +1898,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave, *oldcurrent;
 	struct sockaddr addr;
-	u32 old_features = bond_dev->features;
+	netdev_features_t old_features = bond_dev->features;
 
 	/* slave is not a slave or master is not master of this slave */
 	if (!(slave_dev->flags & IFF_SLAVE) ||
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 02c7ed8d9eca..b8591246eb4c 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -411,7 +411,7 @@ static void atl1c_set_multi(struct net_device *netdev)
 	}
 }
 
-static void __atl1c_vlan_mode(u32 features, u32 *mac_ctrl_data)
+static void __atl1c_vlan_mode(netdev_features_t features, u32 *mac_ctrl_data)
 {
 	if (features & NETIF_F_HW_VLAN_RX) {
 		/* enable VLAN tag insert/strip */
@@ -422,7 +422,8 @@ static void __atl1c_vlan_mode(u32 features, u32 *mac_ctrl_data)
 	}
 }
 
-static void atl1c_vlan_mode(struct net_device *netdev, u32 features)
+static void atl1c_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct pci_dev *pdev = adapter->pdev;
@@ -482,7 +483,8 @@ static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter,
 		roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE;
 }
 
-static u32 atl1c_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t atl1c_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -499,9 +501,10 @@ static u32 atl1c_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int atl1c_set_features(struct net_device *netdev, u32 features)
+static int atl1c_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		atl1c_vlan_mode(netdev, features);
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 95483bcac1d0..c915c0873810 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -313,7 +313,7 @@ static void atl1e_set_multi(struct net_device *netdev)
 	}
 }
 
-static void __atl1e_vlan_mode(u32 features, u32 *mac_ctrl_data)
+static void __atl1e_vlan_mode(netdev_features_t features, u32 *mac_ctrl_data)
 {
 	if (features & NETIF_F_HW_VLAN_RX) {
 		/* enable VLAN tag insert/strip */
@@ -324,7 +324,8 @@ static void __atl1e_vlan_mode(u32 features, u32 *mac_ctrl_data)
 	}
 }
 
-static void atl1e_vlan_mode(struct net_device *netdev, u32 features)
+static void atl1e_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
 	u32 mac_ctrl_data = 0;
@@ -370,7 +371,8 @@ static int atl1e_set_mac_addr(struct net_device *netdev, void *p)
 	return 0;
 }
 
-static u32 atl1e_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t atl1e_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -384,9 +386,10 @@ static u32 atl1e_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int atl1e_set_features(struct net_device *netdev, u32 features)
+static int atl1e_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		atl1e_vlan_mode(netdev, features);
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index db3f43046d32..071f4c858969 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -361,7 +361,7 @@ static inline void atl2_irq_disable(struct atl2_adapter *adapter)
     synchronize_irq(adapter->pdev->irq);
 }
 
-static void __atl2_vlan_mode(u32 features, u32 *ctrl)
+static void __atl2_vlan_mode(netdev_features_t features, u32 *ctrl)
 {
 	if (features & NETIF_F_HW_VLAN_RX) {
 		/* enable VLAN tag insert/strip */
@@ -372,7 +372,8 @@ static void __atl2_vlan_mode(u32 features, u32 *ctrl)
 	}
 }
 
-static void atl2_vlan_mode(struct net_device *netdev, u32 features)
+static void atl2_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct atl2_adapter *adapter = netdev_priv(netdev);
 	u32 ctrl;
@@ -391,7 +392,8 @@ static void atl2_restore_vlan(struct atl2_adapter *adapter)
 	atl2_vlan_mode(adapter->netdev, adapter->netdev->features);
 }
 
-static u32 atl2_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t atl2_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -405,9 +407,10 @@ static u32 atl2_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int atl2_set_features(struct net_device *netdev, u32 features)
+static int atl2_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		atl2_vlan_mode(netdev, features);
diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c
index aabcf4b5745a..8ff7411094d5 100644
--- a/drivers/net/ethernet/atheros/atlx/atlx.c
+++ b/drivers/net/ethernet/atheros/atlx/atlx.c
@@ -211,7 +211,7 @@ static void atlx_link_chg_task(struct work_struct *work)
 	spin_unlock_irqrestore(&adapter->lock, flags);
 }
 
-static void __atlx_vlan_mode(u32 features, u32 *ctrl)
+static void __atlx_vlan_mode(netdev_features_t features, u32 *ctrl)
 {
 	if (features & NETIF_F_HW_VLAN_RX) {
 		/* enable VLAN tag insert/strip */
@@ -222,7 +222,8 @@ static void __atlx_vlan_mode(u32 features, u32 *ctrl)
 	}
 }
 
-static void atlx_vlan_mode(struct net_device *netdev, u32 features)
+static void atlx_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct atlx_adapter *adapter = netdev_priv(netdev);
 	unsigned long flags;
@@ -242,7 +243,8 @@ static void atlx_restore_vlan(struct atlx_adapter *adapter)
 	atlx_vlan_mode(adapter->netdev, adapter->netdev->features);
 }
 
-static u32 atlx_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t atlx_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -256,9 +258,10 @@ static u32 atlx_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int atlx_set_features(struct net_device *netdev, u32 features)
+static int atlx_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		atlx_vlan_mode(netdev, features);
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 32d1f92a2479..7203f37d2ef3 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -7571,8 +7571,8 @@ bnx2_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state)
 	return 0;
 }
 
-static u32
-bnx2_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t
+bnx2_fix_features(struct net_device *dev, netdev_features_t features)
 {
 	struct bnx2 *bp = netdev_priv(dev);
 
@@ -7583,7 +7583,7 @@ bnx2_fix_features(struct net_device *dev, u32 features)
 }
 
 static int
-bnx2_set_features(struct net_device *dev, u32 features)
+bnx2_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct bnx2 *bp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 0d60b9e633ad..8336c784db49 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3398,7 +3398,8 @@ int bnx2x_change_mtu(struct net_device *dev, int new_mtu)
 	return bnx2x_reload_if_running(dev);
 }
 
-u32 bnx2x_fix_features(struct net_device *dev, u32 features)
+netdev_features_t bnx2x_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 
@@ -3409,7 +3410,7 @@ u32 bnx2x_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-int bnx2x_set_features(struct net_device *dev, u32 features)
+int bnx2x_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	u32 flags = bp->flags;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 41eb17e7720f..80c5ed08e419 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -533,8 +533,9 @@ int bnx2x_change_mtu(struct net_device *dev, int new_mtu);
  */
 int bnx2x_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type);
 #endif
-u32 bnx2x_fix_features(struct net_device *dev, u32 features);
-int bnx2x_set_features(struct net_device *dev, u32 features);
+netdev_features_t bnx2x_fix_features(struct net_device *dev,
+	netdev_features_t features);
+int bnx2x_set_features(struct net_device *dev, netdev_features_t features);
 
 /**
  * bnx2x_tx_timeout - tx timeout netdev callback
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index cd3623416a4e..365cd47e2298 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6968,7 +6968,7 @@ static int tg3_phy_lpbk_set(struct tg3 *tp, u32 speed, bool extlpbk)
 	return 0;
 }
 
-static void tg3_set_loopback(struct net_device *dev, u32 features)
+static void tg3_set_loopback(struct net_device *dev, netdev_features_t features)
 {
 	struct tg3 *tp = netdev_priv(dev);
 
@@ -6994,7 +6994,8 @@ static void tg3_set_loopback(struct net_device *dev, u32 features)
 	}
 }
 
-static u32 tg3_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t tg3_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct tg3 *tp = netdev_priv(dev);
 
@@ -7004,9 +7005,9 @@ static u32 tg3_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int tg3_set_features(struct net_device *dev, u32 features)
+static int tg3_set_features(struct net_device *dev, netdev_features_t features)
 {
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if ((changed & NETIF_F_LOOPBACK) && netif_running(dev))
 		tg3_set_loopback(dev, features);
@@ -15313,7 +15314,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	u32 sndmbx, rcvmbx, intmbx;
 	char str[40];
 	u64 dma_mask, persist_dma_mask;
-	u32 features = 0;
+	netdev_features_t features = 0;
 
 	printk_once(KERN_INFO "%s\n", version);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 26d0fd2d9c9d..a971796b2262 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -850,7 +850,8 @@ static int t1_set_mac_addr(struct net_device *dev, void *p)
 	return 0;
 }
 
-static u32 t1_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t t1_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -864,9 +865,9 @@ static u32 t1_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int t1_set_features(struct net_device *dev, u32 features)
+static int t1_set_features(struct net_device *dev, netdev_features_t features)
 {
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 	struct adapter *adapter = dev->ml_priv;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index f9b602300040..47a84359d4e4 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -742,7 +742,7 @@ static inline void setup_ring_params(struct adapter *adapter, u64 addr,
 /*
  * Enable/disable VLAN acceleration.
  */
-void t1_vlan_mode(struct adapter *adapter, u32 features)
+void t1_vlan_mode(struct adapter *adapter, netdev_features_t features)
 {
 	struct sge *sge = adapter->sge;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.h b/drivers/net/ethernet/chelsio/cxgb/sge.h
index e03980bcdd65..b9bf16b385f7 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.h
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.h
@@ -79,7 +79,7 @@ irqreturn_t t1_interrupt(int irq, void *cookie);
 int t1_poll(struct napi_struct *, int);
 
 netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
-void t1_vlan_mode(struct adapter *adapter, u32 features);
+void t1_vlan_mode(struct adapter *adapter, netdev_features_t features);
 void t1_sge_start(struct sge *);
 void t1_sge_stop(struct sge *);
 int t1_sge_intr_error_handler(struct sge *);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 053560da6347..63ffaa7e255f 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -2532,7 +2532,7 @@ static void t3_synchronize_rx(struct adapter *adap, const struct port_info *p)
 	}
 }
 
-static void cxgb_vlan_mode(struct net_device *dev, u32 features)
+static void cxgb_vlan_mode(struct net_device *dev, netdev_features_t features)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -2553,7 +2553,8 @@ static void cxgb_vlan_mode(struct net_device *dev, u32 features)
 	t3_synchronize_rx(adapter, pi);
 }
 
-static u32 cxgb_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t cxgb_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -2567,9 +2568,9 @@ static u32 cxgb_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int cxgb_set_features(struct net_device *dev, u32 features)
+static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
 {
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		cxgb_vlan_mode(dev, features);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 48ffe11d9aa9..fd6d460ea475 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1856,10 +1856,10 @@ static int set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	return err;
 }
 
-static int cxgb_set_features(struct net_device *dev, u32 features)
+static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
 {
 	const struct port_info *pi = netdev_priv(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 	int err;
 
 	if (!(changed & NETIF_F_HW_VLAN_RX))
@@ -3538,7 +3538,7 @@ static int __devinit init_one(struct pci_dev *pdev,
 {
 	int func, i, err;
 	struct port_info *pi;
-	unsigned int highdma = 0;
+	bool highdma = false;
 	struct adapter *adapter = NULL;
 
 	printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
@@ -3564,7 +3564,7 @@ static int __devinit init_one(struct pci_dev *pdev,
 	}
 
 	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		highdma = NETIF_F_HIGHDMA;
+		highdma = true;
 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 		if (err) {
 			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
@@ -3638,7 +3638,9 @@ static int __devinit init_one(struct pci_dev *pdev,
 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			NETIF_F_RXCSUM | NETIF_F_RXHASH |
 			NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
-		netdev->features |= netdev->hw_features | highdma;
+		if (highdma)
+			netdev->hw_features |= NETIF_F_HIGHDMA;
+		netdev->features |= netdev->hw_features;
 		netdev->vlan_features = netdev->features & VLAN_FEAT;
 
 		netdev->priv_flags |= IFF_UNICAST_FLT;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index ee81d8e798ea..8155cfecae19 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -1092,7 +1092,8 @@ static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
 	return ret;
 }
 
-static u32 cxgb4vf_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -1106,10 +1107,11 @@ static u32 cxgb4vf_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int cxgb4vf_set_features(struct net_device *dev, u32 features)
+static int cxgb4vf_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct port_info *pi = netdev_priv(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 438f4580bf66..26be1dfc1577 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -474,10 +474,11 @@ static int dm9000_nway_reset(struct net_device *dev)
 	return mii_nway_restart(&dm->mii);
 }
 
-static int dm9000_set_features(struct net_device *dev, u32 features)
+static int dm9000_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	board_info_t *dm = to_dm9000_board(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 	unsigned long flags;
 
 	if (!(changed & NETIF_F_RXCSUM))
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 83199fd0d62b..ff3e8b0f0da3 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2306,7 +2306,7 @@ void gfar_check_rx_parser_mode(struct gfar_private *priv)
 }
 
 /* Enables and disables VLAN insertion/extraction */
-void gfar_vlan_mode(struct net_device *dev, u32 features)
+void gfar_vlan_mode(struct net_device *dev, netdev_features_t features)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 	struct gfar __iomem *regs = NULL;
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 9aa43773e8e3..cda6cb2eb1d2 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -1179,9 +1179,9 @@ extern void gfar_phy_test(struct mii_bus *bus, struct phy_device *phydev,
 extern void gfar_configure_coalescing(struct gfar_private *priv,
 		unsigned long tx_mask, unsigned long rx_mask);
 void gfar_init_sysfs(struct net_device *dev);
-int gfar_set_features(struct net_device *dev, u32 features);
+int gfar_set_features(struct net_device *dev, netdev_features_t features);
 extern void gfar_check_rx_parser_mode(struct gfar_private *priv);
-extern void gfar_vlan_mode(struct net_device *dev, u32 features);
+extern void gfar_vlan_mode(struct net_device *dev, netdev_features_t features);
 
 extern const struct ethtool_ops gfar_ethtool_ops;
 
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 212736bab6bb..1ea0eb9ee643 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -519,12 +519,12 @@ static int gfar_sringparam(struct net_device *dev, struct ethtool_ringparam *rva
 	return err;
 }
 
-int gfar_set_features(struct net_device *dev, u32 features)
+int gfar_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 	unsigned long flags;
 	int err = 0, i = 0;
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (changed & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX))
 		gfar_vlan_mode(dev, features);
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index b1cd41b9c61c..e877371680a9 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -735,7 +735,8 @@ static void netdev_get_drvinfo(struct net_device *dev,
 		sizeof(info->version) - 1);
 }
 
-static u32 ibmveth_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t ibmveth_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/*
 	 * Since the ibmveth firmware interface does not have the
@@ -838,7 +839,8 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
 	return rc1 ? rc1 : rc2;
 }
 
-static int ibmveth_set_features(struct net_device *dev, u32 features)
+static int ibmveth_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct ibmveth_adapter *adapter = netdev_priv(dev);
 	int rx_csum = !!(features & NETIF_F_RXCSUM);
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index cf480b554622..82f4ef142259 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -167,7 +167,8 @@ static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
                                        struct sk_buff *skb);
 
 static bool e1000_vlan_used(struct e1000_adapter *adapter);
-static void e1000_vlan_mode(struct net_device *netdev, u32 features);
+static void e1000_vlan_mode(struct net_device *netdev,
+			    netdev_features_t features);
 static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
 static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
 static void e1000_restore_vlan(struct e1000_adapter *adapter);
@@ -806,7 +807,8 @@ static int e1000_is_need_ioport(struct pci_dev *pdev)
 	}
 }
 
-static u32 e1000_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t e1000_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -820,10 +822,11 @@ static u32 e1000_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int e1000_set_features(struct net_device *netdev, u32 features)
+static int e1000_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		e1000_vlan_mode(netdev, features);
@@ -4577,7 +4580,8 @@ static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter,
 		e1000_irq_enable(adapter);
 }
 
-static void e1000_vlan_mode(struct net_device *netdev, u32 features)
+static void e1000_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index a855db1ad249..d85fac626a80 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5859,10 +5859,11 @@ static void e1000_eeprom_checks(struct e1000_adapter *adapter)
 	}
 }
 
-static int e1000_set_features(struct net_device *netdev, u32 features)
+static int e1000_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (changed & (NETIF_F_TSO | NETIF_F_TSO6))
 		adapter->flags |= FLAG_TSO_FORCE;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ced544499f1b..1fcba22c6403 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -145,7 +145,7 @@ static bool igb_clean_rx_irq(struct igb_q_vector *, int);
 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 static void igb_tx_timeout(struct net_device *);
 static void igb_reset_task(struct work_struct *);
-static void igb_vlan_mode(struct net_device *netdev, u32 features);
+static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
 static void igb_vlan_rx_add_vid(struct net_device *, u16);
 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
 static void igb_restore_vlan(struct igb_adapter *);
@@ -1742,7 +1742,8 @@ void igb_reset(struct igb_adapter *adapter)
 	igb_get_phy_info(hw);
 }
 
-static u32 igb_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t igb_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -1756,9 +1757,10 @@ static u32 igb_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int igb_set_features(struct net_device *netdev, u32 features)
+static int igb_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		igb_vlan_mode(netdev, features);
@@ -6467,7 +6469,7 @@ s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
 	return 0;
 }
 
-static void igb_vlan_mode(struct net_device *netdev, u32 features)
+static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index cca78124be31..2a05658938bd 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2532,7 +2532,8 @@ static void igbvf_print_device_info(struct igbvf_adapter *adapter)
 	dev_info(&pdev->dev, "Address: %pM\n", netdev->dev_addr);
 }
 
-static int igbvf_set_features(struct net_device *netdev, u32 features)
+static int igbvf_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index e21148f8b160..247cf9219e03 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -325,8 +325,8 @@ ixgb_reset(struct ixgb_adapter *adapter)
 	}
 }
 
-static u32
-ixgb_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t
+ixgb_fix_features(struct net_device *netdev, netdev_features_t features)
 {
 	/*
 	 * Tx VLAN insertion does not work per HW design when Rx stripping is
@@ -339,10 +339,10 @@ ixgb_fix_features(struct net_device *netdev, u32 features)
 }
 
 static int
-ixgb_set_features(struct net_device *netdev, u32 features)
+ixgb_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (!(changed & (NETIF_F_RXCSUM|NETIF_F_HW_VLAN_RX)))
 		return 0;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 8ef92d1a6aa1..820fc040c241 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7174,7 +7174,8 @@ void ixgbe_do_reset(struct net_device *netdev)
 		ixgbe_reset(adapter);
 }
 
-static u32 ixgbe_fix_features(struct net_device *netdev, u32 data)
+static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
+	netdev_features_t data)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -7204,7 +7205,8 @@ static u32 ixgbe_fix_features(struct net_device *netdev, u32 data)
 	return data;
 }
 
-static int ixgbe_set_features(struct net_device *netdev, u32 data)
+static int ixgbe_set_features(struct net_device *netdev,
+	netdev_features_t data)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	bool need_reset = false;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 4c8e19951d57..3e6ec088c50d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -3249,7 +3249,8 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev,
 	return stats;
 }
 
-static int ixgbevf_set_features(struct net_device *netdev, u32 features)
+static int ixgbevf_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 7d88c7c28a7c..df3ab831b1ad 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1917,7 +1917,7 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx)
 	struct jme_ring *txring = &(jme->txring[0]);
 	struct txdesc *txdesc = txring->desc, *ctxdesc;
 	struct jme_buffer_info *txbi = txring->bufinf, *ctxbi;
-	u8 hidma = jme->dev->features & NETIF_F_HIGHDMA;
+	u8 hidma = !!(jme->dev->features & NETIF_F_HIGHDMA);
 	int i, nr_frags = skb_shinfo(skb)->nr_frags;
 	int mask = jme->tx_ring_mask;
 	const struct skb_frag_struct *frag;
@@ -2620,8 +2620,8 @@ jme_set_msglevel(struct net_device *netdev, u32 value)
 	jme->msg_enable = value;
 }
 
-static u32
-jme_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t
+jme_fix_features(struct net_device *netdev, netdev_features_t features)
 {
 	if (netdev->mtu > 1900)
 		features &= ~(NETIF_F_ALL_TSO | NETIF_F_ALL_CSUM);
@@ -2629,7 +2629,7 @@ jme_fix_features(struct net_device *netdev, u32 features)
 }
 
 static int
-jme_set_features(struct net_device *netdev, u32 features)
+jme_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct jme_adapter *jme = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index f6b4304ca459..157c5c17fdcc 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1579,10 +1579,10 @@ mv643xx_eth_set_ringparam(struct net_device *dev, struct ethtool_ringparam *er)
 
 
 static int
-mv643xx_eth_set_features(struct net_device *dev, u32 features)
+mv643xx_eth_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
-	u32 rx_csum = features & NETIF_F_RXCSUM;
+	int rx_csum = !!(features & NETIF_F_RXCSUM);
 
 	wrlp(mp, PORT_CONFIG, rx_csum ? 0x02000000 : 0x00000000);
 
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 553d1a315b3a..c79dc5447658 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -1275,7 +1275,7 @@ static void rx_set_checksum(struct sky2_port *sky2)
 }
 
 /* Enable/disable receive hash calculation (RSS) */
-static void rx_set_rss(struct net_device *dev, u32 features)
+static void rx_set_rss(struct net_device *dev, netdev_features_t features)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
 	struct sky2_hw *hw = sky2->hw;
@@ -1396,7 +1396,7 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 #define SKY2_VLAN_OFFLOADS (NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO)
 
-static void sky2_vlan_mode(struct net_device *dev, u32 features)
+static void sky2_vlan_mode(struct net_device *dev, netdev_features_t features)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
 	struct sky2_hw *hw = sky2->hw;
@@ -4282,7 +4282,8 @@ static int sky2_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom
 	return sky2_vpd_write(sky2->hw, cap, data, eeprom->offset, eeprom->len);
 }
 
-static u32 sky2_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t sky2_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	const struct sky2_port *sky2 = netdev_priv(dev);
 	const struct sky2_hw *hw = sky2->hw;
@@ -4306,13 +4307,13 @@ static u32 sky2_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int sky2_set_features(struct net_device *dev, u32 features)
+static int sky2_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (changed & NETIF_F_RXCSUM) {
-		u32 on = features & NETIF_F_RXCSUM;
+		int on = !!(features & NETIF_F_RXCSUM);
 		sky2_write32(sky2->hw, Q_ADDR(rxqaddr[sky2->port], Q_CSR),
 			     on ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM);
 	}
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 3b67fe65404a..8d846bd09711 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -6588,7 +6588,8 @@ static void netdev_get_ethtool_stats(struct net_device *dev,
  *
  * Return 0 if successful; otherwise an error code.
  */
-static int netdev_set_features(struct net_device *dev, u32 features)
+static int netdev_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct dev_priv *priv = netdev_priv(dev);
 	struct dev_info *hw_priv = priv->adapter;
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 0778edcf7b9a..20b72ecb020a 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -1491,7 +1491,7 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget)
 	 * access to avoid theoretical race condition with functions that
 	 * change NETIF_F_LRO flag at runtime.
 	 */
-	bool lro_enabled = ACCESS_ONCE(mgp->dev->features) & NETIF_F_LRO;
+	bool lro_enabled = !!(ACCESS_ONCE(mgp->dev->features) & NETIF_F_LRO);
 
 	while (rx_done->entry[idx].length != 0 && work_done < budget) {
 		length = ntohs(rx_done->entry[idx].length);
@@ -3149,7 +3149,8 @@ static int myri10ge_set_mac_address(struct net_device *dev, void *addr)
 	return 0;
 }
 
-static u32 myri10ge_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t myri10ge_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	if (!(features & NETIF_F_RXCSUM))
 		features &= ~NETIF_F_LRO;
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index e6c90a5ac5d4..76ae47627200 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -6616,10 +6616,10 @@ static void s2io_ethtool_get_strings(struct net_device *dev,
 	}
 }
 
-static int s2io_set_features(struct net_device *dev, u32 features)
+static int s2io_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct s2io_nic *sp = netdev_priv(dev);
-	u32 changed = (features ^ dev->features) & NETIF_F_LRO;
+	netdev_features_t changed = (features ^ dev->features) & NETIF_F_LRO;
 
 	if (changed && netif_running(dev)) {
 		int rc;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index a83197d757c1..16d4d8e913c3 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -2662,9 +2662,10 @@ static void vxge_poll_vp_lockup(unsigned long data)
 	mod_timer(&vdev->vp_lockup_timer, jiffies + HZ / 1000);
 }
 
-static u32 vxge_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t vxge_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	/* Enabling RTH requires some of the logic in vxge_device_register and a
 	 * vpath reset.  Due to these restrictions, only allow modification
@@ -2676,10 +2677,10 @@ static u32 vxge_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int vxge_set_features(struct net_device *dev, u32 features)
+static int vxge_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct vxgedev *vdev = netdev_priv(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (!(changed & NETIF_F_RXHASH))
 		return 0;
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index e8a5ae356407..01bb7bfe14e6 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -4536,7 +4536,7 @@ static int nv_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam*
 	return 0;
 }
 
-static int nv_set_loopback(struct net_device *dev, u32 features)
+static int nv_set_loopback(struct net_device *dev, netdev_features_t features)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	unsigned long flags;
@@ -4591,7 +4591,8 @@ static int nv_set_loopback(struct net_device *dev, u32 features)
 	return retval;
 }
 
-static u32 nv_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t nv_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/* vlan is dependent on rx checksum offload */
 	if (features & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX))
@@ -4600,7 +4601,7 @@ static u32 nv_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static void nv_vlan_mode(struct net_device *dev, u32 features)
+static void nv_vlan_mode(struct net_device *dev, netdev_features_t features)
 {
 	struct fe_priv *np = get_nvpriv(dev);
 
@@ -4621,11 +4622,11 @@ static void nv_vlan_mode(struct net_device *dev, u32 features)
 	spin_unlock_irq(&np->lock);
 }
 
-static int nv_set_features(struct net_device *dev, u32 features)
+static int nv_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 	int retval;
 
 	if ((changed & NETIF_F_LOOPBACK) && netif_running(dev)) {
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 48406ca382f1..964e9c0948bc 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -2109,10 +2109,11 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu)
  * Returns
  *	0:		HW state updated successfully
  */
-static int pch_gbe_set_features(struct net_device *netdev, u32 features)
+static int pch_gbe_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct pch_gbe_adapter *adapter = netdev_priv(netdev);
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (!(changed & NETIF_F_RXCSUM))
 		return 0;
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8cf3173ba488..7dd9a4b107e6 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -544,7 +544,8 @@ static void netxen_set_multicast_list(struct net_device *dev)
 	adapter->set_multi(dev);
 }
 
-static u32 netxen_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t netxen_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	if (!(features & NETIF_F_RXCSUM)) {
 		netdev_info(dev, "disabling LRO as RXCSUM is off\n");
@@ -555,7 +556,8 @@ static u32 netxen_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int netxen_set_features(struct net_device *dev, u32 features)
+static int netxen_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct netxen_adapter *adapter = netdev_priv(dev);
 	int hw_lro;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 7ed53dbb8646..60976fc4ccc6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1466,8 +1466,9 @@ void qlcnic_advert_link_change(struct qlcnic_adapter *adapter, int linkup);
 
 int qlcnic_fw_cmd_set_mtu(struct qlcnic_adapter *adapter, int mtu);
 int qlcnic_change_mtu(struct net_device *netdev, int new_mtu);
-u32 qlcnic_fix_features(struct net_device *netdev, u32 features);
-int qlcnic_set_features(struct net_device *netdev, u32 features);
+netdev_features_t qlcnic_fix_features(struct net_device *netdev,
+	netdev_features_t features);
+int qlcnic_set_features(struct net_device *netdev, netdev_features_t features);
 int qlcnic_config_hw_lro(struct qlcnic_adapter *adapter, int enable);
 int qlcnic_config_bridged_mode(struct qlcnic_adapter *adapter, u32 enable);
 int qlcnic_send_lro_cleanup(struct qlcnic_adapter *adapter);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index bcb81e47543a..b528e52a8ee1 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -817,12 +817,13 @@ int qlcnic_change_mtu(struct net_device *netdev, int mtu)
 }
 
 
-u32 qlcnic_fix_features(struct net_device *netdev, u32 features)
+netdev_features_t qlcnic_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
 	if ((adapter->flags & QLCNIC_ESWITCH_ENABLED)) {
-		u32 changed = features ^ netdev->features;
+		netdev_features_t changed = features ^ netdev->features;
 		features ^= changed & (NETIF_F_ALL_CSUM | NETIF_F_RXCSUM);
 	}
 
@@ -833,10 +834,10 @@ u32 qlcnic_fix_features(struct net_device *netdev, u32 features)
 }
 
 
-int qlcnic_set_features(struct net_device *netdev, u32 features)
+int qlcnic_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 	int hw_lro = (features & NETIF_F_LRO) ? QLCNIC_LRO_ENABLED : 0;
 
 	if (!(changed & NETIF_F_LRO))
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 0bd163828e33..823f845ddc04 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -792,7 +792,7 @@ qlcnic_set_netdev_features(struct qlcnic_adapter *adapter,
 		struct qlcnic_esw_func_cfg *esw_cfg)
 {
 	struct net_device *netdev = adapter->netdev;
-	unsigned long features, vlan_features;
+	netdev_features_t features, vlan_features;
 
 	features = (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 			NETIF_F_IPV6_CSUM | NETIF_F_GRO);
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index c92afcd912e2..1ce4e08037b8 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2307,7 +2307,7 @@ static int ql_napi_poll_msix(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static void qlge_vlan_mode(struct net_device *ndev, u32 features)
+static void qlge_vlan_mode(struct net_device *ndev, netdev_features_t features)
 {
 	struct ql_adapter *qdev = netdev_priv(ndev);
 
@@ -2323,7 +2323,8 @@ static void qlge_vlan_mode(struct net_device *ndev, u32 features)
 	}
 }
 
-static u32 qlge_fix_features(struct net_device *ndev, u32 features)
+static netdev_features_t qlge_fix_features(struct net_device *ndev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -2337,9 +2338,10 @@ static u32 qlge_fix_features(struct net_device *ndev, u32 features)
 	return features;
 }
 
-static int qlge_set_features(struct net_device *ndev, u32 features)
+static int qlge_set_features(struct net_device *ndev,
+	netdev_features_t features)
 {
-	u32 changed = ndev->features ^ features;
+	netdev_features_t changed = ndev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		qlge_vlan_mode(ndev, features);
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 6cfc5dc0f76e..87cff10f7be7 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -1392,7 +1392,7 @@ static void cp_set_msglevel(struct net_device *dev, u32 value)
 	cp->msg_enable = value;
 }
 
-static int cp_set_features(struct net_device *dev, u32 features)
+static int cp_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct cp_private *cp = netdev_priv(dev);
 	unsigned long flags;
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index cdf66d68d849..2dfb0c0ea01b 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -1553,7 +1553,8 @@ static int rtl8169_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	return ret;
 }
 
-static u32 rtl8169_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t rtl8169_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
@@ -1567,7 +1568,8 @@ static u32 rtl8169_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int rtl8169_set_features(struct net_device *dev, u32 features)
+static int rtl8169_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index d5731f1fe6d6..14e134d3b4d7 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1900,7 +1900,7 @@ static void efx_set_multicast_list(struct net_device *net_dev)
 	/* Otherwise efx_start_port() will do this */
 }
 
-static int efx_set_features(struct net_device *net_dev, u32 data)
+static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index b8e251a1ee48..c49502bab6a3 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -908,7 +908,7 @@ struct efx_nic_type {
 	unsigned int phys_addr_channels;
 	unsigned int tx_dc_base;
 	unsigned int rx_dc_base;
-	u32 offload_features;
+	netdev_features_t offload_features;
 };
 
 /**************************************************************************
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 20546bbbb8db..643ca97a2d9a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1419,7 +1419,8 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static u32 stmmac_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t stmmac_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8592523b0bb5..3dd13d606d00 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -123,7 +123,7 @@ struct tun_struct {
 	gid_t			group;
 
 	struct net_device	*dev;
-	u32			set_features;
+	netdev_features_t	set_features;
 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
 			  NETIF_F_TSO6|NETIF_F_UFO)
 	struct fasync_struct	*fasync;
@@ -454,7 +454,8 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static u32 tun_net_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t tun_net_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 
@@ -1196,7 +1197,7 @@ static int tun_get_iff(struct net *net, struct tun_struct *tun,
  * privs required. */
 static int set_offload(struct tun_struct *tun, unsigned long arg)
 {
-	u32 features = 0;
+	netdev_features_t features = 0;
 
 	if (arg & TUN_F_CSUM) {
 		features |= NETIF_F_HW_CSUM;
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index a5b9b12ef268..7d62c39f65cf 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -728,7 +728,8 @@ static int smsc75xx_change_mtu(struct net_device *netdev, int new_mtu)
 }
 
 /* Enable or disable Rx checksum offload engine */
-static int smsc75xx_set_features(struct net_device *netdev, u32 features)
+static int smsc75xx_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct usbnet *dev = netdev_priv(netdev);
 	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index eff67678c5a6..56f3894d701a 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -516,7 +516,8 @@ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
 }
 
 /* Enable or disable Tx & Rx checksum offload engines */
-static int smsc95xx_set_features(struct net_device *netdev, u32 features)
+static int smsc95xx_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct usbnet *dev = netdev_priv(netdev);
 	u32 read_buf;
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index e662cbc8bfbd..77f723415c9c 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -262,11 +262,11 @@ vmxnet3_get_strings(struct net_device *netdev, u32 stringset, u8 *buf)
 	}
 }
 
-int vmxnet3_set_features(struct net_device *netdev, u32 features)
+int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	unsigned long flags;
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (changed & (NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_HW_VLAN_RX)) {
 		if (features & NETIF_F_RXCSUM)
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index b18eac1dccaa..ed54797db191 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -401,7 +401,7 @@ void
 vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter);
 
 int
-vmxnet3_set_features(struct net_device *netdev, u32 features);
+vmxnet3_set_features(struct net_device *netdev, netdev_features_t features);
 
 int
 vmxnet3_create_queues(struct vmxnet3_adapter *adapter,
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 182562952c79..0b5c18feb303 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -165,7 +165,8 @@ static int xenvif_change_mtu(struct net_device *dev, int mtu)
 	return 0;
 }
 
-static u32 xenvif_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t xenvif_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct xenvif *vif = netdev_priv(dev);
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 226faab23603..a6e379fbf377 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -203,7 +203,7 @@ static void xennet_sysfs_delif(struct net_device *netdev);
 
 static int xennet_can_sg(struct net_device *dev)
 {
-	return dev->features & NETIF_F_SG;
+	return !!(dev->features & NETIF_F_SG);
 }
 
 
@@ -1190,7 +1190,8 @@ static void xennet_uninit(struct net_device *dev)
 	gnttab_free_grant_references(np->gref_rx_head);
 }
 
-static u32 xennet_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t xennet_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct netfront_info *np = netdev_priv(dev);
 	int val;
@@ -1216,7 +1217,8 @@ static u32 xennet_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int xennet_set_features(struct net_device *dev, u32 features)
+static int xennet_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
 		netdev_info(dev, "Reducing MTU because no SG offload");
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index e4c1176ee25b..a64f9e789b0a 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3202,7 +3202,8 @@ static int qeth_l3_stop(struct net_device *dev)
 	return 0;
 }
 
-static u32 qeth_l3_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t qeth_l3_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct qeth_card *card = dev->ml_priv;
 
@@ -3216,7 +3217,8 @@ static u32 qeth_l3_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int qeth_l3_set_features(struct net_device *dev, u32 features)
+static int qeth_l3_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct qeth_card *card = dev->ml_priv;
 	u32 changed = dev->features ^ features;
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 32640edf4d78..af5238121826 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -10,6 +10,10 @@
 #ifndef _LINUX_NETDEV_FEATURES_H
 #define _LINUX_NETDEV_FEATURES_H
 
+#include <linux/types.h>
+
+typedef u32 netdev_features_t;
+
 /* Net device feature bits; if you change something,
  * also update netdev_features_strings[] in ethtool.c */
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9cf6e90b171d..b35ffd735ecc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -847,12 +847,13 @@ struct netdev_tc_txq {
  *	Called to release previously enslaved netdev.
  *
  *      Feature/offload setting functions.
- * u32 (*ndo_fix_features)(struct net_device *dev, u32 features);
+ * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
+ *		netdev_features_t features);
  *	Adjusts the requested feature flags according to device-specific
  *	constraints, and returns the resulting flags. Must not modify
  *	the device state.
  *
- * int (*ndo_set_features)(struct net_device *dev, u32 features);
+ * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
  *	Called to update device configuration to new features. Passed
  *	feature set might be less than what was returned by ndo_fix_features()).
  *	Must return >0 or -errno if it changed dev->features itself.
@@ -946,10 +947,10 @@ struct net_device_ops {
 						 struct net_device *slave_dev);
 	int			(*ndo_del_slave)(struct net_device *dev,
 						 struct net_device *slave_dev);
-	u32			(*ndo_fix_features)(struct net_device *dev,
-						    u32 features);
+	netdev_features_t	(*ndo_fix_features)(struct net_device *dev,
+						    netdev_features_t features);
 	int			(*ndo_set_features)(struct net_device *dev,
-						    u32 features);
+						    netdev_features_t features);
 };
 
 /*
@@ -999,13 +1000,13 @@ struct net_device {
 	struct list_head	unreg_list;
 
 	/* currently active device features */
-	u32			features;
+	netdev_features_t	features;
 	/* user-changeable features */
-	u32			hw_features;
+	netdev_features_t	hw_features;
 	/* user-requested features */
-	u32			wanted_features;
+	netdev_features_t	wanted_features;
 	/* mask of features inheritable by VLAN devices */
-	u32			vlan_features;
+	netdev_features_t	vlan_features;
 
 	/* Interface index. Unique device identifier	*/
 	int			ifindex;
@@ -1439,7 +1440,7 @@ struct packet_type {
 					 struct packet_type *,
 					 struct net_device *);
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
-						u32 features);
+						netdev_features_t features);
 	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
@@ -2444,7 +2445,8 @@ extern int		netdev_set_master(struct net_device *dev, struct net_device *master)
 extern int netdev_set_bond_master(struct net_device *dev,
 				  struct net_device *master);
 extern int skb_checksum_help(struct sk_buff *skb);
-extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features);
+extern struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+	netdev_features_t features);
 #ifdef CONFIG_BUG
 extern void netdev_rx_csum_fault(struct net_device *dev);
 #else
@@ -2471,11 +2473,13 @@ extern const char *netdev_drivername(const struct net_device *dev);
 
 extern void linkwatch_run_queue(void);
 
-static inline u32 netdev_get_wanted_features(struct net_device *dev)
+static inline netdev_features_t netdev_get_wanted_features(
+	struct net_device *dev)
 {
 	return (dev->features & ~dev->hw_features) | dev->wanted_features;
 }
-u32 netdev_increment_features(u32 all, u32 one, u32 mask);
+netdev_features_t netdev_increment_features(netdev_features_t all,
+	netdev_features_t one, netdev_features_t mask);
 int __netdev_update_features(struct net_device *dev);
 void netdev_update_features(struct net_device *dev);
 void netdev_change_features(struct net_device *dev);
@@ -2483,21 +2487,22 @@ void netdev_change_features(struct net_device *dev);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
-u32 netif_skb_features(struct sk_buff *skb);
+netdev_features_t netif_skb_features(struct sk_buff *skb);
 
-static inline int net_gso_ok(u32 features, int gso_type)
+static inline int net_gso_ok(netdev_features_t features, int gso_type)
 {
-	int feature = gso_type << NETIF_F_GSO_SHIFT;
+	netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT;
 	return (features & feature) == feature;
 }
 
-static inline int skb_gso_ok(struct sk_buff *skb, u32 features)
+static inline int skb_gso_ok(struct sk_buff *skb, netdev_features_t features)
 {
 	return net_gso_ok(features, skb_shinfo(skb)->gso_type) &&
 	       (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
 }
 
-static inline int netif_needs_gso(struct sk_buff *skb, int features)
+static inline int netif_needs_gso(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
 		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index abad8a0941e8..a10e487c0864 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -30,6 +30,7 @@
 #include <linux/dmaengine.h>
 #include <linux/hrtimer.h>
 #include <linux/dma-mapping.h>
+#include <linux/netdev_features.h>
 
 /* Don't change this without changing skb_csum_unnecessary! */
 #define CHECKSUM_NONE 0
@@ -2106,7 +2107,8 @@ extern void	       skb_split(struct sk_buff *skb,
 extern int	       skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
 				 int shiftlen);
 
-extern struct sk_buff *skb_segment(struct sk_buff *skb, u32 features);
+extern struct sk_buff *skb_segment(struct sk_buff *skb,
+				   netdev_features_t features);
 
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 6f7eb800974a..e182e13d6391 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -38,7 +38,7 @@ struct net_protocol {
 	void			(*err_handler)(struct sk_buff *skb, u32 info);
 	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff	       *(*gso_segment)(struct sk_buff *skb,
-					       u32 features);
+					       netdev_features_t features);
 	struct sk_buff	      **(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb);
@@ -57,7 +57,7 @@ struct inet6_protocol {
 
 	int	(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff *(*gso_segment)(struct sk_buff *skb,
-				       u32 features);
+				       netdev_features_t features);
 	struct sk_buff **(*gro_receive)(struct sk_buff **head,
 					struct sk_buff *skb);
 	int	(*gro_complete)(struct sk_buff *skb);
diff --git a/include/net/sock.h b/include/net/sock.h
index 67cd4581b6da..1331008ad885 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -306,8 +306,8 @@ struct sock {
 	kmemcheck_bitfield_end(flags);
 	int			sk_wmem_queued;
 	gfp_t			sk_allocation;
-	int			sk_route_caps;
-	int			sk_route_nocaps;
+	netdev_features_t	sk_route_caps;
+	netdev_features_t	sk_route_nocaps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	int			sk_rcvlowat;
@@ -1393,7 +1393,7 @@ static inline int sk_can_gso(const struct sock *sk)
 
 extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
 
-static inline void sk_nocaps_add(struct sock *sk, int flags)
+static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
 {
 	sk->sk_route_nocaps |= flags;
 	sk->sk_route_caps &= ~flags;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bb18c4d69aba..113160b84588 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1430,7 +1430,8 @@ extern struct request_sock_ops tcp6_request_sock_ops;
 extern void tcp_v4_destroy_sock(struct sock *sk);
 
 extern int tcp_v4_gso_send_check(struct sk_buff *skb);
-extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features);
+extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+				       netdev_features_t features);
 extern struct sk_buff **tcp_gro_receive(struct sk_buff **head,
 					struct sk_buff *skb);
 extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head,
diff --git a/include/net/udp.h b/include/net/udp.h
index 3b285f402f48..f54a5156b248 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -258,5 +258,6 @@ extern void udp4_proc_exit(void);
 extern void udp_init(void);
 
 extern int udp4_ufo_send_check(struct sk_buff *skb);
-extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features);
+extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
+	netdev_features_t features);
 #endif	/* _UDP_H */
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 993599e66e5a..8e75003d62f6 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -777,6 +777,18 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
+static
+char *netdev_feature_string(char *buf, char *end, const u8 *addr,
+		      struct printf_spec spec)
+{
+	spec.flags |= SPECIAL | SMALL | ZEROPAD;
+	if (spec.field_width == -1)
+		spec.field_width = 2 + 2 * sizeof(netdev_features_t);
+	spec.base = 16;
+
+	return number(buf, end, *(const netdev_features_t *)addr, spec);
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -824,6 +836,7 @@ int kptr_restrict __read_mostly;
  *       Do not use this feature without some mechanism to verify the
  *       correctness of the format string and va_list arguments.
  * - 'K' For a kernel pointer that should be hidden from unprivileged users
+ * - 'NF' For a netdev_features_t
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
@@ -896,6 +909,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		       has_capability_noaudit(current, CAP_SYSLOG))))
 			ptr = NULL;
 		break;
+	case 'N':
+		switch (fmt[1]) {
+		case 'F':
+			return netdev_feature_string(buf, end, ptr, spec);
+		}
+		break;
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 6a4e0cb897b7..2b5fcde1f629 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -591,7 +591,8 @@ static void vlan_dev_uninit(struct net_device *dev)
 	}
 }
 
-static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 	u32 old_features = features;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index feb77ea7b58e..772bad34794c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -186,7 +186,8 @@ static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 	strcpy(info->bus_info, "N/A");
 }
 
-static u32 br_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t br_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f603e5b0b930..0a942fbccc9a 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -296,10 +296,11 @@ int br_min_mtu(const struct net_bridge *br)
 /*
  * Recomputes features using slave's features
  */
-u32 br_features_recompute(struct net_bridge *br, u32 features)
+netdev_features_t br_features_recompute(struct net_bridge *br,
+	netdev_features_t features)
 {
 	struct net_bridge_port *p;
-	u32 mask;
+	netdev_features_t mask;
 
 	if (list_empty(&br->port_list))
 		return features;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d7d6fb05411f..4027029aa5e4 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -387,7 +387,8 @@ extern int br_add_if(struct net_bridge *br,
 extern int br_del_if(struct net_bridge *br,
 	      struct net_device *dev);
 extern int br_min_mtu(const struct net_bridge *br);
-extern u32 br_features_recompute(struct net_bridge *br, u32 features);
+extern netdev_features_t br_features_recompute(struct net_bridge *br,
+	netdev_features_t features);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 185e246d61fd..f1cca59c4638 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1914,7 +1914,8 @@ EXPORT_SYMBOL(skb_checksum_help);
  *	It may return NULL if the skb requires no segmentation.  This is
  *	only possible when GSO is used for verifying header integrity.
  */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_type *ptype;
@@ -1944,9 +1945,9 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
 			dev->ethtool_ops->get_drvinfo(dev, &info);
 
-		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
-		     info.driver, dev ? dev->features : 0L,
-		     skb->sk ? skb->sk->sk_route_caps : 0L,
+		WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d ip_summed=%d\n",
+		     info.driver, dev ? &dev->features : NULL,
+		     skb->sk ? &skb->sk->sk_route_caps : NULL,
 		     skb->len, skb->data_len, skb->ip_summed);
 
 		if (skb_header_cloned(skb) &&
@@ -2055,7 +2056,7 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
  *	This function segments the given skb and stores the list of segments
  *	in skb->next.
  */
-static int dev_gso_segment(struct sk_buff *skb, int features)
+static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
 {
 	struct sk_buff *segs;
 
@@ -2094,7 +2095,7 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
 {
 	return ((features & NETIF_F_GEN_CSUM) ||
 		((features & NETIF_F_V4_CSUM) &&
@@ -2105,7 +2106,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 		 protocol == htons(ETH_P_FCOE)));
 }
 
-static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
+static netdev_features_t harmonize_features(struct sk_buff *skb,
+	__be16 protocol, netdev_features_t features)
 {
 	if (!can_checksum_protocol(features, protocol)) {
 		features &= ~NETIF_F_ALL_CSUM;
@@ -2117,10 +2119,10 @@ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features
 	return features;
 }
 
-u32 netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
-	u32 features = skb->dev->features;
+	netdev_features_t features = skb->dev->features;
 
 	if (protocol == htons(ETH_P_8021Q)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2166,7 +2168,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 	unsigned int skb_len;
 
 	if (likely(!skb->next)) {
-		u32 features;
+		netdev_features_t features;
 
 		/*
 		 * If device doesn't need skb->dst, release it right now while
@@ -5350,7 +5352,8 @@ static void rollback_registered(struct net_device *dev)
 	list_del(&single);
 }
 
-static u32 netdev_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t netdev_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/* Fix illegal checksum combinations */
 	if ((features & NETIF_F_HW_CSUM) &&
@@ -5412,7 +5415,7 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
 
 int __netdev_update_features(struct net_device *dev)
 {
-	u32 features;
+	netdev_features_t features;
 	int err = 0;
 
 	ASSERT_RTNL();
@@ -5428,16 +5431,16 @@ int __netdev_update_features(struct net_device *dev)
 	if (dev->features == features)
 		return 0;
 
-	netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n",
-		dev->features, features);
+	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
+		&dev->features, &features);
 
 	if (dev->netdev_ops->ndo_set_features)
 		err = dev->netdev_ops->ndo_set_features(dev, features);
 
 	if (unlikely(err < 0)) {
 		netdev_err(dev,
-			"set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
-			err, features, dev->features);
+			"set_features() failed (%d); wanted %pNF, left %pNF\n",
+			err, &features, &dev->features);
 		return -1;
 	}
 
@@ -6361,7 +6364,8 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  *	@one to the master device with current feature set @all.  Will not
  *	enable anything that is off in @mask. Returns the new feature set.
  */
-u32 netdev_increment_features(u32 all, u32 one, u32 mask)
+netdev_features_t netdev_increment_features(netdev_features_t all,
+	netdev_features_t one, netdev_features_t mask)
 {
 	if (mask & NETIF_F_GEN_CSUM)
 		mask |= NETIF_F_ALL_CSUM;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a354919a32ac..f135f1c92c9d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -171,7 +171,7 @@ static void __ethtool_get_strings(struct net_device *dev,
 		ops->get_strings(dev, stringset, data);
 }
 
-static u32 ethtool_get_feature_mask(u32 eth_cmd)
+static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
 {
 	/* feature masks of legacy discrete ethtool ops */
 
@@ -205,7 +205,7 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd)
 static int ethtool_get_one_feature(struct net_device *dev,
 	char __user *useraddr, u32 ethcmd)
 {
-	u32 mask = ethtool_get_feature_mask(ethcmd);
+	netdev_features_t mask = ethtool_get_feature_mask(ethcmd);
 	struct ethtool_value edata = {
 		.cmd = ethcmd,
 		.data = !!(dev->features & mask),
@@ -220,7 +220,7 @@ static int ethtool_set_one_feature(struct net_device *dev,
 	void __user *useraddr, u32 ethcmd)
 {
 	struct ethtool_value edata;
-	u32 mask;
+	netdev_features_t mask;
 
 	if (copy_from_user(&edata, useraddr, sizeof(edata)))
 		return -EFAULT;
@@ -260,8 +260,7 @@ static u32 __ethtool_get_flags(struct net_device *dev)
 
 static int __ethtool_set_flags(struct net_device *dev, u32 data)
 {
-	u32 features = 0;
-	u32 changed;
+	netdev_features_t features = 0, changed;
 
 	if (data & ~ETH_ALL_FLAGS)
 		return -EINVAL;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8d2c5b32f172..cbc003b2914a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2670,7 +2670,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
  *	a pointer to the first in a list of new skbs for the segments.
  *	In case of error it returns ERR_PTR(err).
  */
-struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 {
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b2bbcd0ebd19..15dc4c4828de 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1250,7 +1250,8 @@ out:
 	return err;
 }
 
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct iphdr *iph;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34f5db1e1c8b..50c359645665 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2653,7 +2653,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct tcphdr *th;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6854f581313f..b867ea23ece9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2247,7 +2247,8 @@ int udp4_ufo_send_check(struct sk_buff *skb)
 	return 0;
 }
 
-struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 282dc7a91f32..ee3319487c4f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -769,7 +769,8 @@ out:
 	return err;
 }
 
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features)
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct ipv6hdr *ipv6h;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b4a4a15fa96f..ccfb0451b1c3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1300,7 +1300,8 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
 	return 0;
 }
 
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
-- 
cgit v1.2.3


From a19f2a6df28e0ccb4103b77cc17c03b62f4d573e Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: Define enum for net device features.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define feature values by bit position instead of direct 2**i values
and force the values to be of type netdev_features_t.

Cleaned and extended from patch by Mahesh Bandewar <maheshb@google.com>:
+ added netdev_features_t casts
+ included bits under NETIF_F_GSO_MASK
+ moved feature #defines out of struct net_device definition

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 133 ++++++++++++++++++++++++++++------------
 1 file changed, 93 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index af5238121826..04ac8f8433e9 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -14,52 +14,105 @@
 
 typedef u32 netdev_features_t;
 
-/* Net device feature bits; if you change something,
- * also update netdev_features_strings[] in ethtool.c */
-
-#define NETIF_F_SG		1	/* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM		2	/* Can checksum TCP/UDP over IPv4. */
-#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
-#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
-#define NETIF_F_IPV6_CSUM	16	/* Can checksum TCP/UDP over IPV6 */
-#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
-#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
-#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
-#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
-#define NETIF_F_GSO		2048	/* Enable software GSO. */
-#define NETIF_F_LLTX		4096	/* LockLess TX - deprecated. Please */
+enum {
+	NETIF_F_SG_BIT,			/* Scatter/gather IO. */
+	NETIF_F_IP_CSUM_BIT,		/* Can checksum TCP/UDP over IPv4. */
+	NETIF_F_NO_CSUM_BIT,		/* Does not require checksum. F.e. loopack. */
+	NETIF_F_HW_CSUM_BIT,		/* Can checksum all the packets. */
+	NETIF_F_IPV6_CSUM_BIT,		/* Can checksum TCP/UDP over IPV6 */
+	NETIF_F_HIGHDMA_BIT,		/* Can DMA to high memory. */
+	NETIF_F_FRAGLIST_BIT,		/* Scatter/gather IO. */
+	NETIF_F_HW_VLAN_TX_BIT,		/* Transmit VLAN hw acceleration */
+	NETIF_F_HW_VLAN_RX_BIT,		/* Receive VLAN hw acceleration */
+	NETIF_F_HW_VLAN_FILTER_BIT,	/* Receive filtering on VLAN */
+	NETIF_F_VLAN_CHALLENGED_BIT,	/* Device cannot handle VLAN packets */
+	NETIF_F_GSO_BIT,		/* Enable software GSO. */
+	NETIF_F_LLTX_BIT,		/* LockLess TX - deprecated. Please */
 					/* do not use LLTX in new drivers */
-#define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
-#define NETIF_F_GRO		16384	/* Generic receive offload */
-#define NETIF_F_LRO		32768	/* large receive offload */
-
-/* the GSO_MASK reserves bits 16 through 23 */
-#define NETIF_F_FCOE_CRC	(1 << 24) /* FCoE CRC32 */
-#define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
-#define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
-#define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
-#define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
-#define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
-#define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
-#define NETIF_F_LOOPBACK	(1 << 31) /* Enable loopback */
-
-/* Segmentation offload features */
-#define NETIF_F_GSO_SHIFT	16
-#define NETIF_F_GSO_MASK	0x00ff0000
-#define NETIF_F_TSO		(SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
-#define NETIF_F_UFO		(SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
-#define NETIF_F_GSO_ROBUST	(SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
-#define NETIF_F_TSO_ECN		(SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
-#define NETIF_F_TSO6		(SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
-#define NETIF_F_FSO		(SKB_GSO_FCOE << NETIF_F_GSO_SHIFT)
+	NETIF_F_NETNS_LOCAL_BIT,	/* Does not change network namespaces */
+	NETIF_F_GRO_BIT,		/* Generic receive offload */
+	NETIF_F_LRO_BIT,		/* large receive offload */
+
+	/**/NETIF_F_GSO_SHIFT,		/* keep the order of SKB_GSO_* bits */
+	NETIF_F_TSO_BIT			/* ... TCPv4 segmentation */
+		= NETIF_F_GSO_SHIFT,
+	NETIF_F_UFO_BIT,		/* ... UDPv4 fragmentation */
+	NETIF_F_GSO_ROBUST_BIT,		/* ... ->SKB_GSO_DODGY */
+	NETIF_F_TSO_ECN_BIT,		/* ... TCP ECN support */
+	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
+	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
+	NETIF_F_GSO_RESERVED1,		/* ... free (fill GSO_MASK to 8 bits) */
+	/**/NETIF_F_GSO_LAST,		/* [can't be last bit, see GSO_MASK] */
+	NETIF_F_GSO_RESERVED2		/* ... free (fill GSO_MASK to 8 bits) */
+		= NETIF_F_GSO_LAST,
+
+	NETIF_F_FCOE_CRC_BIT,		/* FCoE CRC32 */
+	NETIF_F_SCTP_CSUM_BIT,		/* SCTP checksum offload */
+	NETIF_F_FCOE_MTU_BIT,		/* Supports max FCoE MTU, 2158 bytes*/
+	NETIF_F_NTUPLE_BIT,		/* N-tuple filters supported */
+	NETIF_F_RXHASH_BIT,		/* Receive hashing offload */
+	NETIF_F_RXCSUM_BIT,		/* Receive checksumming offload */
+	NETIF_F_NOCACHE_COPY_BIT,	/* Use no-cache copyfromuser */
+	NETIF_F_LOOPBACK_BIT,		/* Enable loopback */
+
+	/*
+	 * Add your fresh new feature above and remember to update
+	 * netdev_features_strings[] in net/core/ethtool.c and maybe
+	 * some feature mask #defines below. Please also describe it
+	 * in Documentation/networking/netdev-features.txt.
+	 */
+
+	/**/NETDEV_FEATURE_COUNT
+};
+
+/* copy'n'paste compression ;) */
+#define __NETIF_F_BIT(bit)	((netdev_features_t)1 << (bit))
+#define __NETIF_F(name)		__NETIF_F_BIT(NETIF_F_##name##_BIT)
+
+#define NETIF_F_FCOE_CRC	__NETIF_F(FCOE_CRC)
+#define NETIF_F_FCOE_MTU	__NETIF_F(FCOE_MTU)
+#define NETIF_F_FRAGLIST	__NETIF_F(FRAGLIST)
+#define NETIF_F_FSO		__NETIF_F(FSO)
+#define NETIF_F_GRO		__NETIF_F(GRO)
+#define NETIF_F_GSO		__NETIF_F(GSO)
+#define NETIF_F_GSO_ROBUST	__NETIF_F(GSO_ROBUST)
+#define NETIF_F_HIGHDMA		__NETIF_F(HIGHDMA)
+#define NETIF_F_HW_CSUM		__NETIF_F(HW_CSUM)
+#define NETIF_F_HW_VLAN_FILTER	__NETIF_F(HW_VLAN_FILTER)
+#define NETIF_F_HW_VLAN_RX	__NETIF_F(HW_VLAN_RX)
+#define NETIF_F_HW_VLAN_TX	__NETIF_F(HW_VLAN_TX)
+#define NETIF_F_IP_CSUM		__NETIF_F(IP_CSUM)
+#define NETIF_F_IPV6_CSUM	__NETIF_F(IPV6_CSUM)
+#define NETIF_F_LLTX		__NETIF_F(LLTX)
+#define NETIF_F_LOOPBACK	__NETIF_F(LOOPBACK)
+#define NETIF_F_LRO		__NETIF_F(LRO)
+#define NETIF_F_NETNS_LOCAL	__NETIF_F(NETNS_LOCAL)
+#define NETIF_F_NOCACHE_COPY	__NETIF_F(NOCACHE_COPY)
+#define NETIF_F_NO_CSUM		__NETIF_F(NO_CSUM)
+#define NETIF_F_NTUPLE		__NETIF_F(NTUPLE)
+#define NETIF_F_RXCSUM		__NETIF_F(RXCSUM)
+#define NETIF_F_RXHASH		__NETIF_F(RXHASH)
+#define NETIF_F_SCTP_CSUM	__NETIF_F(SCTP_CSUM)
+#define NETIF_F_SG		__NETIF_F(SG)
+#define NETIF_F_TSO6		__NETIF_F(TSO6)
+#define NETIF_F_TSO_ECN		__NETIF_F(TSO_ECN)
+#define NETIF_F_TSO		__NETIF_F(TSO)
+#define NETIF_F_UFO		__NETIF_F(UFO)
+#define NETIF_F_VLAN_CHALLENGED	__NETIF_F(VLAN_CHALLENGED)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
 #define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
 				 NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
-#define NETIF_F_ETHTOOL_BITS	(0xff3fffff & ~NETIF_F_NEVER_CHANGE)
+
+/* remember that ((t)1 << t_BITS) is undefined in C99 */
+#define NETIF_F_ETHTOOL_BITS	((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \
+		(__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) - 1)) & \
+		~NETIF_F_NEVER_CHANGE)
+
+/* Segmentation offload feature mask */
+#define NETIF_F_GSO_MASK	(__NETIF_F_BIT(NETIF_F_GSO_LAST + 1) - \
+		__NETIF_F_BIT(NETIF_F_GSO_SHIFT))
 
 /* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
-- 
cgit v1.2.3


From a861a8b233e9024303fb8e73e465e81ad7119d5a Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: extend netdev_features_t to 64 bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 04ac8f8433e9..20e3a1f9892d 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -12,7 +12,7 @@
 
 #include <linux/types.h>
 
-typedef u32 netdev_features_t;
+typedef u64 netdev_features_t;
 
 enum {
 	NETIF_F_SG_BIT,			/* Scatter/gather IO. */
-- 
cgit v1.2.3


From 34324dc2bf27c1773045fea63cb11f7e2a6ad2b9 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: remove NETIF_F_NO_CSUM feature bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only distinct use is checking if NETIF_F_NOCACHE_COPY should be
enabled by default. The check heuristics is altered a bit here,
so it hits other people than before. The default shouldn't be
trusted for performance-critical cases anyway.

For all other uses NETIF_F_NO_CSUM is equivalent to NETIF_F_HW_CSUM.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ieee802154/fakehard.c   |  2 +-
 drivers/misc/sgi-xp/xpnet.c     |  2 +-
 drivers/net/bonding/bond_main.c |  2 +-
 drivers/net/can/dev.c           |  2 +-
 drivers/net/can/slcan.c         |  2 +-
 drivers/net/dummy.c             |  2 +-
 drivers/net/ifb.c               |  2 +-
 drivers/net/loopback.c          |  2 +-
 drivers/net/veth.c              |  2 +-
 include/linux/netdev_features.h |  5 ++---
 include/linux/skbuff.h          |  1 -
 net/bridge/br_device.c          |  4 ++--
 net/core/dev.c                  | 21 ++++++---------------
 net/core/ethtool.c              |  1 -
 14 files changed, 19 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c
index eb0e2ccc79ae..73d453159408 100644
--- a/drivers/ieee802154/fakehard.c
+++ b/drivers/ieee802154/fakehard.c
@@ -343,7 +343,7 @@ static void ieee802154_fake_setup(struct net_device *dev)
 {
 	dev->addr_len		= IEEE802154_ADDR_LEN;
 	memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
-	dev->features		= NETIF_F_NO_CSUM;
+	dev->features		= NETIF_F_HW_CSUM;
 	dev->needed_tailroom	= 2; /* FCS */
 	dev->mtu		= 127;
 	dev->tx_queue_len	= 10;
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 42f067347bc7..3fac67a5204c 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -576,7 +576,7 @@ xpnet_init(void)
 	 * report an error if the data is not retrievable and the
 	 * packet will be dropped.
 	 */
-	xpnet_device->features = NETIF_F_NO_CSUM;
+	xpnet_device->features = NETIF_F_HW_CSUM;
 
 	result = register_netdev(xpnet_device);
 	if (result != 0) {
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ac5337a04639..25a44d94be17 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4361,7 +4361,7 @@ static void bond_setup(struct net_device *bond_dev)
 				NETIF_F_HW_VLAN_RX |
 				NETIF_F_HW_VLAN_FILTER;
 
-	bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM);
+	bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM);
 	bond_dev->features |= bond_dev->hw_features;
 }
 
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 25695bde0549..120f1ab5a2ce 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -454,7 +454,7 @@ static void can_setup(struct net_device *dev)
 
 	/* New-style flags. */
 	dev->flags = IFF_NOARP;
-	dev->features = NETIF_F_NO_CSUM;
+	dev->features = NETIF_F_HW_CSUM;
 }
 
 struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index a979b006f459..3f1ebcc2cb83 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -387,7 +387,7 @@ static void slc_setup(struct net_device *dev)
 
 	/* New-style flags. */
 	dev->flags		= IFF_NOARP;
-	dev->features           = NETIF_F_NO_CSUM;
+	dev->features           = NETIF_F_HW_CSUM;
 }
 
 /******************************************
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index a7c5e8831e8c..087648ea1edb 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -134,7 +134,7 @@ static void dummy_setup(struct net_device *dev)
 	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
 	dev->features	|= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO;
-	dev->features	|= NETIF_F_NO_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
+	dev->features	|= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
 	random_ether_addr(dev->dev_addr);
 }
 
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 46b5f5fd686b..e05b645bbc32 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -164,7 +164,7 @@ static const struct net_device_ops ifb_netdev_ops = {
 	.ndo_validate_addr = eth_validate_addr,
 };
 
-#define IFB_FEATURES (NETIF_F_NO_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST	| \
+#define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST	| \
 		      NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6	| \
 		      NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX)
 
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 4ce9e5f2c069..b71998d0b5b4 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -169,7 +169,7 @@ static void loopback_setup(struct net_device *dev)
 	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
 		| NETIF_F_ALL_TSO
 		| NETIF_F_UFO
-		| NETIF_F_NO_CSUM
+		| NETIF_F_HW_CSUM
 		| NETIF_F_RXCSUM
 		| NETIF_F_HIGHDMA
 		| NETIF_F_LLTX
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index d32a75fb6d21..b576812bdc59 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -271,7 +271,7 @@ static void veth_setup(struct net_device *dev)
 	dev->features |= NETIF_F_LLTX;
 	dev->destructor = veth_dev_free;
 
-	dev->hw_features = NETIF_F_NO_CSUM | NETIF_F_SG | NETIF_F_RXCSUM;
+	dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_RXCSUM;
 }
 
 /*
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 20e3a1f9892d..77f5202977ce 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -17,7 +17,7 @@ typedef u64 netdev_features_t;
 enum {
 	NETIF_F_SG_BIT,			/* Scatter/gather IO. */
 	NETIF_F_IP_CSUM_BIT,		/* Can checksum TCP/UDP over IPv4. */
-	NETIF_F_NO_CSUM_BIT,		/* Does not require checksum. F.e. loopack. */
+	__UNUSED_NETIF_F_1,
 	NETIF_F_HW_CSUM_BIT,		/* Can checksum all the packets. */
 	NETIF_F_IPV6_CSUM_BIT,		/* Can checksum TCP/UDP over IPV6 */
 	NETIF_F_HIGHDMA_BIT,		/* Can DMA to high memory. */
@@ -88,7 +88,6 @@ enum {
 #define NETIF_F_LRO		__NETIF_F(LRO)
 #define NETIF_F_NETNS_LOCAL	__NETIF_F(NETNS_LOCAL)
 #define NETIF_F_NOCACHE_COPY	__NETIF_F(NOCACHE_COPY)
-#define NETIF_F_NO_CSUM		__NETIF_F(NO_CSUM)
 #define NETIF_F_NTUPLE		__NETIF_F(NTUPLE)
 #define NETIF_F_RXCSUM		__NETIF_F(RXCSUM)
 #define NETIF_F_RXHASH		__NETIF_F(RXHASH)
@@ -118,7 +117,7 @@ enum {
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
 				 NETIF_F_TSO6 | NETIF_F_UFO)
 
-#define NETIF_F_GEN_CSUM	(NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)
+#define NETIF_F_GEN_CSUM	NETIF_F_HW_CSUM
 #define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
 #define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
 #define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a10e487c0864..b93117389cfe 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -88,7 +88,6 @@
  *	at device setup time.
  *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
  *			  everything.
- *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
  *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
  *			  TCP/UDP over IPv4. Sigh. Vendors like this
  *			  way by an unknown reason. Though, see comment above
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 772bad34794c..a3754ac262c3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -342,10 +342,10 @@ void br_dev_setup(struct net_device *dev)
 	dev->priv_flags = IFF_EBRIDGE;
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
-			NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
+			NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX |
 			NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX;
 	dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
-			   NETIF_F_GSO_MASK | NETIF_F_NO_CSUM |
+			   NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
 			   NETIF_F_HW_VLAN_TX;
 
 	br->dev = dev;
diff --git a/net/core/dev.c b/net/core/dev.c
index f1cca59c4638..26c49d55e79d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5362,12 +5362,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
 	}
 
-	if ((features & NETIF_F_NO_CSUM) &&
-	    (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-		netdev_warn(dev, "mixed no checksumming and other settings.\n");
-		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
-	}
-
 	/* Fix illegal SG+CSUM combinations. */
 	if ((features & NETIF_F_SG) &&
 	    !(features & NETIF_F_ALL_CSUM)) {
@@ -5624,11 +5618,12 @@ int register_netdevice(struct net_device *dev)
 	dev->wanted_features = dev->features & dev->hw_features;
 
 	/* Turn on no cache copy if HW is doing checksum */
-	dev->hw_features |= NETIF_F_NOCACHE_COPY;
-	if ((dev->features & NETIF_F_ALL_CSUM) &&
-	    !(dev->features & NETIF_F_NO_CSUM)) {
-		dev->wanted_features |= NETIF_F_NOCACHE_COPY;
-		dev->features |= NETIF_F_NOCACHE_COPY;
+	if (!(dev->flags & IFF_LOOPBACK)) {
+		dev->hw_features |= NETIF_F_NOCACHE_COPY;
+		if (dev->features & NETIF_F_ALL_CSUM) {
+			dev->wanted_features |= NETIF_F_NOCACHE_COPY;
+			dev->features |= NETIF_F_NOCACHE_COPY;
+		}
 	}
 
 	/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6374,10 +6369,6 @@ netdev_features_t netdev_increment_features(netdev_features_t all,
 	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
 	all &= one | ~NETIF_F_ALL_FOR_ALL;
 
-	/* If device needs checksumming, downgrade to it. */
-	if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
-		all &= ~NETIF_F_NO_CSUM;
-
 	/* If one device supports hw checksumming, set for all. */
 	if (all & NETIF_F_GEN_CSUM)
 		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index bbf84fe0096e..d2eff9ec88be 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -43,7 +43,6 @@ EXPORT_SYMBOL(ethtool_op_get_link);
 static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
 	[NETIF_F_SG_BIT] =               "tx-scatter-gather",
 	[NETIF_F_IP_CSUM_BIT] =          "tx-checksum-ipv4",
-	[NETIF_F_NO_CSUM_BIT] =          "tx-checksum-unneeded",
 	[NETIF_F_HW_CSUM_BIT] =          "tx-checksum-ip-generic",
 	[NETIF_F_IPV6_CSUM_BIT] =        "tx-checksum-ipv6",
 	[NETIF_F_HIGHDMA_BIT] =          "highdma",
-- 
cgit v1.2.3


From 61dc3461b9549bc10a2f16d254250680cadafcce Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 16 Nov 2011 11:09:08 +0000
Subject: team: convert overall spinlock to mutex

No need to have spinlock for this purpose. So convert this to mutex and
avoid current schedule while atomic problems in netlink code.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 32 ++++++++++++++++----------------
 include/linux/if_team.h |  2 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index e5390c73a75d..7db219cd3153 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -443,9 +443,9 @@ static void __team_compute_features(struct team *team)
 
 static void team_compute_features(struct team *team)
 {
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	__team_compute_features(team);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 }
 
 static int team_port_enter(struct team *team, struct team_port *port)
@@ -647,7 +647,7 @@ static int team_init(struct net_device *dev)
 	int i;
 
 	team->dev = dev;
-	spin_lock_init(&team->lock);
+	mutex_init(&team->lock);
 
 	team->pcpu_stats = alloc_percpu(struct team_pcpu_stats);
 	if (!team->pcpu_stats)
@@ -672,13 +672,13 @@ static void team_uninit(struct net_device *dev)
 	struct team_port *port;
 	struct team_port *tmp;
 
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	list_for_each_entry_safe(port, tmp, &team->port_list, list)
 		team_port_del(team, port->dev);
 
 	__team_change_mode(team, NULL); /* cleanup */
 	__team_options_unregister(team, team_options, ARRAY_SIZE(team_options));
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 }
 
 static void team_destructor(struct net_device *dev)
@@ -784,7 +784,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 	 * Alhough this is reader, it's guarded by team lock. It's not possible
 	 * to traverse list in reverse under rcu_read_lock
 	 */
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	list_for_each_entry(port, &team->port_list, list) {
 		err = dev_set_mtu(port->dev, new_mtu);
 		if (err) {
@@ -793,7 +793,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 			goto unwind;
 		}
 	}
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 
 	dev->mtu = new_mtu;
 
@@ -802,7 +802,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 unwind:
 	list_for_each_entry_continue_reverse(port, &team->port_list, list)
 		dev_set_mtu(port->dev, dev->mtu);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 
 	return err;
 }
@@ -880,9 +880,9 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
 	struct team *team = netdev_priv(dev);
 	int err;
 
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	err = team_port_add(team, port_dev);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 	return err;
 }
 
@@ -891,9 +891,9 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
 	struct team *team = netdev_priv(dev);
 	int err;
 
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	err = team_port_del(team, port_dev);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 	return err;
 }
 
@@ -1064,13 +1064,13 @@ static struct team *team_nl_team_get(struct genl_info *info)
 	}
 
 	team = netdev_priv(dev);
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	return team;
 }
 
 static void team_nl_team_put(struct team *team)
 {
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 	dev_put(team->dev);
 }
 
@@ -1486,9 +1486,9 @@ static void team_port_change_check(struct team_port *port, bool linkup)
 {
 	struct team *team = port->team;
 
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	__team_port_change_check(port, linkup);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 }
 
 /************************************
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 14f6388f5460..a6eac126a99a 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -92,7 +92,7 @@ struct team {
 	struct net_device *dev; /* associated netdevice */
 	struct team_pcpu_stats __percpu *pcpu_stats;
 
-	spinlock_t lock; /* used for overall locking, e.g. port lists write */
+	struct mutex lock; /* used for overall locking, e.g. port lists write */
 
 	/*
 	 * port lists with port count
-- 
cgit v1.2.3


From 358b838291f618278080bbed435b755f9b46748e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 16 Nov 2011 11:09:09 +0000
Subject: team: replicate options on register

Since multiple team instances are putting defined options into their
option list, during register each option must be cloned before added
into list. This resolves uncool memory corruptions when using multiple
teams.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c                   | 76 +++++++++++++++++++++++++++----
 drivers/net/team/team_mode_activebackup.c |  5 +-
 include/linux/if_team.h                   |  8 ++--
 3 files changed, 72 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 7db219cd3153..f3092749b072 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -80,30 +80,78 @@ EXPORT_SYMBOL(team_port_set_team_mac);
  * Options handling
  *******************/
 
-void team_options_register(struct team *team, struct team_option *option,
-			   size_t option_count)
+struct team_option *__team_find_option(struct team *team, const char *opt_name)
+{
+	struct team_option *option;
+
+	list_for_each_entry(option, &team->option_list, list) {
+		if (strcmp(option->name, opt_name) == 0)
+			return option;
+	}
+	return NULL;
+}
+
+int team_options_register(struct team *team,
+			  const struct team_option *option,
+			  size_t option_count)
 {
 	int i;
+	struct team_option *dst_opts[option_count];
+	int err;
+
+	memset(dst_opts, 0, sizeof(dst_opts));
+	for (i = 0; i < option_count; i++, option++) {
+		struct team_option *dst_opt;
+
+		if (__team_find_option(team, option->name)) {
+			err = -EEXIST;
+			goto rollback;
+		}
+		dst_opt = kmalloc(sizeof(*option), GFP_KERNEL);
+		if (!dst_opt) {
+			err = -ENOMEM;
+			goto rollback;
+		}
+		memcpy(dst_opt, option, sizeof(*option));
+		dst_opts[i] = dst_opt;
+	}
+
+	for (i = 0; i < option_count; i++)
+		list_add_tail(&dst_opts[i]->list, &team->option_list);
 
-	for (i = 0; i < option_count; i++, option++)
-		list_add_tail(&option->list, &team->option_list);
+	return 0;
+
+rollback:
+	for (i = 0; i < option_count; i++)
+		kfree(dst_opts[i]);
+
+	return err;
 }
+
 EXPORT_SYMBOL(team_options_register);
 
 static void __team_options_change_check(struct team *team,
 					struct team_option *changed_option);
 
 static void __team_options_unregister(struct team *team,
-				      struct team_option *option,
+				      const struct team_option *option,
 				      size_t option_count)
 {
 	int i;
 
-	for (i = 0; i < option_count; i++, option++)
-		list_del(&option->list);
+	for (i = 0; i < option_count; i++, option++) {
+		struct team_option *del_opt;
+
+		del_opt = __team_find_option(team, option->name);
+		if (del_opt) {
+			list_del(&del_opt->list);
+			kfree(del_opt);
+		}
+	}
 }
 
-void team_options_unregister(struct team *team, struct team_option *option,
+void team_options_unregister(struct team *team,
+			     const struct team_option *option,
 			     size_t option_count)
 {
 	__team_options_unregister(team, option, option_count);
@@ -632,7 +680,7 @@ static int team_mode_option_set(struct team *team, void *arg)
 	return team_change_mode(team, *str);
 }
 
-static struct team_option team_options[] = {
+static const struct team_option team_options[] = {
 	{
 		.name = "mode",
 		.type = TEAM_OPTION_TYPE_STRING,
@@ -645,6 +693,7 @@ static int team_init(struct net_device *dev)
 {
 	struct team *team = netdev_priv(dev);
 	int i;
+	int err;
 
 	team->dev = dev;
 	mutex_init(&team->lock);
@@ -660,10 +709,17 @@ static int team_init(struct net_device *dev)
 	team_adjust_ops(team);
 
 	INIT_LIST_HEAD(&team->option_list);
-	team_options_register(team, team_options, ARRAY_SIZE(team_options));
+	err = team_options_register(team, team_options, ARRAY_SIZE(team_options));
+	if (err)
+		goto err_options_register;
 	netif_carrier_off(dev);
 
 	return 0;
+
+err_options_register:
+	free_percpu(team->pcpu_stats);
+
+	return err;
 }
 
 static void team_uninit(struct net_device *dev)
diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
index 6fe920c440b3..b34427502b54 100644
--- a/drivers/net/team/team_mode_activebackup.c
+++ b/drivers/net/team/team_mode_activebackup.c
@@ -83,7 +83,7 @@ static int ab_active_port_set(struct team *team, void *arg)
 	return -ENOENT;
 }
 
-static struct team_option ab_options[] = {
+static const struct team_option ab_options[] = {
 	{
 		.name = "activeport",
 		.type = TEAM_OPTION_TYPE_U32,
@@ -94,8 +94,7 @@ static struct team_option ab_options[] = {
 
 int ab_init(struct team *team)
 {
-	team_options_register(team, ab_options, ARRAY_SIZE(ab_options));
-	return 0;
+	return team_options_register(team, ab_options, ARRAY_SIZE(ab_options));
 }
 
 void ab_exit(struct team *team)
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index a6eac126a99a..828181fbad5d 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -140,11 +140,11 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
 }
 
 extern int team_port_set_team_mac(struct team_port *port);
-extern void team_options_register(struct team *team,
-				  struct team_option *option,
-				  size_t option_count);
+extern int team_options_register(struct team *team,
+				 const struct team_option *option,
+				 size_t option_count);
 extern void team_options_unregister(struct team *team,
-				    struct team_option *option,
+				    const struct team_option *option,
 				    size_t option_count);
 extern int team_mode_register(struct team_mode *mode);
 extern int team_mode_unregister(struct team_mode *mode);
-- 
cgit v1.2.3


From 28011cf19b75df9d3f35489a7599a97ec0b3f1a0 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Wed, 16 Nov 2011 18:36:59 -0500
Subject: net: Add ethtool to mii advertisment conversion helpers

Translating between ethtool advertisement settings and MII
advertisements are common operations for ethernet drivers.  This patch
adds a set of helper functions that implements the conversion.  The
patch then modifies a couple of the drivers to use the new functions.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2.c |  15 +---
 drivers/net/ethernet/broadcom/tg3.c  |  53 +++--------
 drivers/net/ethernet/sun/niu.c       |  15 +---
 drivers/net/mii.c                    |  48 +++-------
 drivers/net/phy/phy_device.c         |  20 +----
 include/linux/mii.h                  | 166 +++++++++++++++++++++++++++++++++++
 6 files changed, 197 insertions(+), 120 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 7203f37d2ef3..6b7cd1e80ada 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -2064,21 +2064,12 @@ __acquires(&bp->phy_lock)
 		bnx2_read_phy(bp, MII_CTRL1000, &adv1000_reg);
 		adv1000_reg &= PHY_ALL_1000_SPEED;
 
-		if (bp->advertising & ADVERTISED_10baseT_Half)
-			new_adv_reg |= ADVERTISE_10HALF;
-		if (bp->advertising & ADVERTISED_10baseT_Full)
-			new_adv_reg |= ADVERTISE_10FULL;
-		if (bp->advertising & ADVERTISED_100baseT_Half)
-			new_adv_reg |= ADVERTISE_100HALF;
-		if (bp->advertising & ADVERTISED_100baseT_Full)
-			new_adv_reg |= ADVERTISE_100FULL;
-		if (bp->advertising & ADVERTISED_1000baseT_Full)
-			new_adv1000_reg |= ADVERTISE_1000FULL;
-
+		new_adv_reg = ethtool_adv_to_mii_100bt(bp->advertising);
 		new_adv_reg |= ADVERTISE_CSMA;
-
 		new_adv_reg |= bnx2_phy_get_pause_adv(bp);
 
+		new_adv1000_reg |= ethtool_adv_to_mii_1000T(bp->advertising);
+
 		if ((adv1000_reg != new_adv1000_reg) ||
 			(adv_reg != new_adv_reg) ||
 			((bmcr & BMCR_ANENABLE) == 0)) {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 365cd47e2298..024ca1d4d028 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -3594,15 +3594,7 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl)
 	u32 val, new_adv;
 
 	new_adv = ADVERTISE_CSMA;
-	if (advertise & ADVERTISED_10baseT_Half)
-		new_adv |= ADVERTISE_10HALF;
-	if (advertise & ADVERTISED_10baseT_Full)
-		new_adv |= ADVERTISE_10FULL;
-	if (advertise & ADVERTISED_100baseT_Half)
-		new_adv |= ADVERTISE_100HALF;
-	if (advertise & ADVERTISED_100baseT_Full)
-		new_adv |= ADVERTISE_100FULL;
-
+	new_adv |= ethtool_adv_to_mii_100bt(advertise);
 	new_adv |= tg3_advert_flowctrl_1000T(flowctrl);
 
 	err = tg3_writephy(tp, MII_ADVERTISE, new_adv);
@@ -3612,11 +3604,7 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl)
 	if (tp->phy_flags & TG3_PHYFLG_10_100_ONLY)
 		goto done;
 
-	new_adv = 0;
-	if (advertise & ADVERTISED_1000baseT_Half)
-		new_adv |= ADVERTISE_1000HALF;
-	if (advertise & ADVERTISED_1000baseT_Full)
-		new_adv |= ADVERTISE_1000FULL;
+	new_adv = ethtool_adv_to_mii_1000T(advertise);
 
 	if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
 	    tp->pci_chip_rev_id == CHIPREV_ID_5701_B0)
@@ -3790,14 +3778,7 @@ static int tg3_copper_is_advertising_all(struct tg3 *tp, u32 mask)
 {
 	u32 adv_reg, all_mask = 0;
 
-	if (mask & ADVERTISED_10baseT_Half)
-		all_mask |= ADVERTISE_10HALF;
-	if (mask & ADVERTISED_10baseT_Full)
-		all_mask |= ADVERTISE_10FULL;
-	if (mask & ADVERTISED_100baseT_Half)
-		all_mask |= ADVERTISE_100HALF;
-	if (mask & ADVERTISED_100baseT_Full)
-		all_mask |= ADVERTISE_100FULL;
+	all_mask = ethtool_adv_to_mii_100bt(mask);
 
 	if (tg3_readphy(tp, MII_ADVERTISE, &adv_reg))
 		return 0;
@@ -3808,11 +3789,7 @@ static int tg3_copper_is_advertising_all(struct tg3 *tp, u32 mask)
 	if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) {
 		u32 tg3_ctrl;
 
-		all_mask = 0;
-		if (mask & ADVERTISED_1000baseT_Half)
-			all_mask |= ADVERTISE_1000HALF;
-		if (mask & ADVERTISED_1000baseT_Full)
-			all_mask |= ADVERTISE_1000FULL;
+		all_mask = ethtool_adv_to_mii_1000T(mask);
 
 		if (tg3_readphy(tp, MII_CTRL1000, &tg3_ctrl))
 			return 0;
@@ -4903,23 +4880,19 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, int force_reset)
 	    (tp->phy_flags & TG3_PHYFLG_PARALLEL_DETECT)) {
 		/* do nothing, just check for link up at the end */
 	} else if (tp->link_config.autoneg == AUTONEG_ENABLE) {
-		u32 adv, new_adv;
+		u32 adv, newadv;
 
 		err |= tg3_readphy(tp, MII_ADVERTISE, &adv);
-		new_adv = adv & ~(ADVERTISE_1000XFULL | ADVERTISE_1000XHALF |
-				  ADVERTISE_1000XPAUSE |
-				  ADVERTISE_1000XPSE_ASYM |
-				  ADVERTISE_SLCT);
-
-		new_adv |= tg3_advert_flowctrl_1000X(tp->link_config.flowctrl);
+		newadv = adv & ~(ADVERTISE_1000XFULL | ADVERTISE_1000XHALF |
+				 ADVERTISE_1000XPAUSE |
+				 ADVERTISE_1000XPSE_ASYM |
+				 ADVERTISE_SLCT);
 
-		if (tp->link_config.advertising & ADVERTISED_1000baseT_Half)
-			new_adv |= ADVERTISE_1000XHALF;
-		if (tp->link_config.advertising & ADVERTISED_1000baseT_Full)
-			new_adv |= ADVERTISE_1000XFULL;
+		newadv |= tg3_advert_flowctrl_1000X(tp->link_config.flowctrl);
+		newadv |= ethtool_adv_to_mii_1000X(tp->link_config.advertising);
 
-		if ((new_adv != adv) || !(bmcr & BMCR_ANENABLE)) {
-			tg3_writephy(tp, MII_ADVERTISE, new_adv);
+		if ((newadv != adv) || !(bmcr & BMCR_ANENABLE)) {
+			tg3_writephy(tp, MII_ADVERTISE, newadv);
 			bmcr |= BMCR_ANENABLE | BMCR_ANRESTART;
 			tg3_writephy(tp, MII_BMCR, bmcr);
 
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 3ebeb9d400fb..9997be525089 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -1151,19 +1151,8 @@ static int link_status_mii(struct niu *np, int *link_up_p)
 		supported |= SUPPORTED_1000baseT_Full;
 	lp->supported = supported;
 
-	advertising = 0;
-	if (advert & ADVERTISE_10HALF)
-		advertising |= ADVERTISED_10baseT_Half;
-	if (advert & ADVERTISE_10FULL)
-		advertising |= ADVERTISED_10baseT_Full;
-	if (advert & ADVERTISE_100HALF)
-		advertising |= ADVERTISED_100baseT_Half;
-	if (advert & ADVERTISE_100FULL)
-		advertising |= ADVERTISED_100baseT_Full;
-	if (ctrl1000 & ADVERTISE_1000HALF)
-		advertising |= ADVERTISED_1000baseT_Half;
-	if (ctrl1000 & ADVERTISE_1000FULL)
-		advertising |= ADVERTISED_1000baseT_Full;
+	advertising = mii_adv_to_ethtool_100bt(advert);
+	advertising |= mii_adv_to_ethtool_1000T(ctrl1000);
 
 	if (bmcr & BMCR_ANENABLE) {
 		int neg, neg1000;
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index c62e7816d548..d0a296272713 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -41,20 +41,8 @@ static u32 mii_get_an(struct mii_if_info *mii, u16 addr)
 	advert = mii->mdio_read(mii->dev, mii->phy_id, addr);
 	if (advert & LPA_LPACK)
 		result |= ADVERTISED_Autoneg;
-	if (advert & ADVERTISE_10HALF)
-		result |= ADVERTISED_10baseT_Half;
-	if (advert & ADVERTISE_10FULL)
-		result |= ADVERTISED_10baseT_Full;
-	if (advert & ADVERTISE_100HALF)
-		result |= ADVERTISED_100baseT_Half;
-	if (advert & ADVERTISE_100FULL)
-		result |= ADVERTISED_100baseT_Full;
-	if (advert & ADVERTISE_PAUSE_CAP)
-		result |= ADVERTISED_Pause;
-	if (advert & ADVERTISE_PAUSE_ASYM)
-		result |= ADVERTISED_Asym_Pause;
-
-	return result;
+
+	return result | mii_adv_to_ethtool_100bt(advert);
 }
 
 /**
@@ -104,19 +92,13 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 		ecmd->autoneg = AUTONEG_ENABLE;
 
 		ecmd->advertising |= mii_get_an(mii, MII_ADVERTISE);
-		if (ctrl1000 & ADVERTISE_1000HALF)
-			ecmd->advertising |= ADVERTISED_1000baseT_Half;
-		if (ctrl1000 & ADVERTISE_1000FULL)
-			ecmd->advertising |= ADVERTISED_1000baseT_Full;
+		if (mii->supports_gmii)
+			ecmd->advertising |= mii_adv_to_ethtool_1000T(ctrl1000);
 
 		if (bmsr & BMSR_ANEGCOMPLETE) {
 			ecmd->lp_advertising = mii_get_an(mii, MII_LPA);
-			if (stat1000 & LPA_1000HALF)
-				ecmd->lp_advertising |=
-					ADVERTISED_1000baseT_Half;
-			if (stat1000 & LPA_1000FULL)
-				ecmd->lp_advertising |=
-					ADVERTISED_1000baseT_Full;
+			ecmd->lp_advertising |=
+					     mii_lpa_to_ethtool_1000T(stat1000);
 		} else {
 			ecmd->lp_advertising = 0;
 		}
@@ -204,20 +186,10 @@ int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 			advert2 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000);
 			tmp2 = advert2 & ~(ADVERTISE_1000HALF | ADVERTISE_1000FULL);
 		}
-		if (ecmd->advertising & ADVERTISED_10baseT_Half)
-			tmp |= ADVERTISE_10HALF;
-		if (ecmd->advertising & ADVERTISED_10baseT_Full)
-			tmp |= ADVERTISE_10FULL;
-		if (ecmd->advertising & ADVERTISED_100baseT_Half)
-			tmp |= ADVERTISE_100HALF;
-		if (ecmd->advertising & ADVERTISED_100baseT_Full)
-			tmp |= ADVERTISE_100FULL;
-		if (mii->supports_gmii) {
-			if (ecmd->advertising & ADVERTISED_1000baseT_Half)
-				tmp2 |= ADVERTISE_1000HALF;
-			if (ecmd->advertising & ADVERTISED_1000baseT_Full)
-				tmp2 |= ADVERTISE_1000FULL;
-		}
+		tmp |= ethtool_adv_to_mii_100bt(ecmd->advertising);
+
+		if (mii->supports_gmii)
+			tmp2 |= ethtool_adv_to_mii_1000T(ecmd->advertising);
 		if (advert != tmp) {
 			mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
 			mii->advertising = tmp;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 83a5a5afec67..edb905f80115 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -563,20 +563,9 @@ static int genphy_config_advert(struct phy_device *phydev)
 	if (adv < 0)
 		return adv;
 
-	adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | 
+	adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP |
 		 ADVERTISE_PAUSE_ASYM);
-	if (advertise & ADVERTISED_10baseT_Half)
-		adv |= ADVERTISE_10HALF;
-	if (advertise & ADVERTISED_10baseT_Full)
-		adv |= ADVERTISE_10FULL;
-	if (advertise & ADVERTISED_100baseT_Half)
-		adv |= ADVERTISE_100HALF;
-	if (advertise & ADVERTISED_100baseT_Full)
-		adv |= ADVERTISE_100FULL;
-	if (advertise & ADVERTISED_Pause)
-		adv |= ADVERTISE_PAUSE_CAP;
-	if (advertise & ADVERTISED_Asym_Pause)
-		adv |= ADVERTISE_PAUSE_ASYM;
+	adv |= ethtool_adv_to_mii_100bt(advertise);
 
 	if (adv != oldadv) {
 		err = phy_write(phydev, MII_ADVERTISE, adv);
@@ -595,10 +584,7 @@ static int genphy_config_advert(struct phy_device *phydev)
 			return adv;
 
 		adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
-		if (advertise & SUPPORTED_1000baseT_Half)
-			adv |= ADVERTISE_1000HALF;
-		if (advertise & SUPPORTED_1000baseT_Full)
-			adv |= ADVERTISE_1000FULL;
+		adv |= ethtool_adv_to_mii_1000T(advertise);
 
 		if (adv != oldadv) {
 			err = phy_write(phydev, MII_CTRL1000, adv);
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 27748230aa69..6697b9112014 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -9,6 +9,7 @@
 #define __LINUX_MII_H__
 
 #include <linux/types.h>
+#include <linux/ethtool.h>
 
 /* Generic MII registers. */
 #define MII_BMCR		0x00	/* Basic mode control register */
@@ -239,6 +240,171 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock,
 	return 0;
 }
 
+/**
+ * ethtool_adv_to_mii_100bt
+ * @ethadv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_ADVERTISE register.
+ */
+static inline u32 ethtool_adv_to_mii_100bt(u32 ethadv)
+{
+	u32 result = 0;
+
+	if (ethadv & ADVERTISED_10baseT_Half)
+		result |= ADVERTISE_10HALF;
+	if (ethadv & ADVERTISED_10baseT_Full)
+		result |= ADVERTISE_10FULL;
+	if (ethadv & ADVERTISED_100baseT_Half)
+		result |= ADVERTISE_100HALF;
+	if (ethadv & ADVERTISED_100baseT_Full)
+		result |= ADVERTISE_100FULL;
+	if (ethadv & ADVERTISED_Pause)
+		result |= ADVERTISE_PAUSE_CAP;
+	if (ethadv & ADVERTISED_Asym_Pause)
+		result |= ADVERTISE_PAUSE_ASYM;
+
+	return result;
+}
+
+/**
+ * mii_adv_to_ethtool_100bt
+ * @adv: value of the MII_ADVERTISE register
+ *
+ * A small helper function that translates MII_ADVERTISE bits
+ * to ethtool advertisement settings.
+ */
+static inline u32 mii_adv_to_ethtool_100bt(u32 adv)
+{
+	u32 result = 0;
+
+	if (adv & ADVERTISE_10HALF)
+		result |= ADVERTISED_10baseT_Half;
+	if (adv & ADVERTISE_10FULL)
+		result |= ADVERTISED_10baseT_Full;
+	if (adv & ADVERTISE_100HALF)
+		result |= ADVERTISED_100baseT_Half;
+	if (adv & ADVERTISE_100FULL)
+		result |= ADVERTISED_100baseT_Full;
+	if (adv & ADVERTISE_PAUSE_CAP)
+		result |= ADVERTISED_Pause;
+	if (adv & ADVERTISE_PAUSE_ASYM)
+		result |= ADVERTISED_Asym_Pause;
+
+	return result;
+}
+
+/**
+ * ethtool_adv_to_mii_1000T
+ * @ethadv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_CTRL1000 register when in 1000T mode.
+ */
+static inline u32 ethtool_adv_to_mii_1000T(u32 ethadv)
+{
+	u32 result = 0;
+
+	if (ethadv & ADVERTISED_1000baseT_Half)
+		result |= ADVERTISE_1000HALF;
+	if (ethadv & ADVERTISED_1000baseT_Full)
+		result |= ADVERTISE_1000FULL;
+
+	return result;
+}
+
+/**
+ * mii_adv_to_ethtool_1000T
+ * @adv: value of the MII_CTRL1000 register
+ *
+ * A small helper function that translates MII_CTRL1000
+ * bits, when in 1000Base-T mode, to ethtool
+ * advertisement settings.
+ */
+static inline u32 mii_adv_to_ethtool_1000T(u32 adv)
+{
+	u32 result = 0;
+
+	if (adv & ADVERTISE_1000HALF)
+		result |= ADVERTISED_1000baseT_Half;
+	if (adv & ADVERTISE_1000FULL)
+		result |= ADVERTISED_1000baseT_Full;
+
+	return result;
+}
+
+#define mii_lpa_to_ethtool_100bt(lpa)	mii_adv_to_ethtool_100bt(lpa)
+
+/**
+ * mii_lpa_to_ethtool_1000T
+ * @adv: value of the MII_STAT1000 register
+ *
+ * A small helper function that translates MII_STAT1000
+ * bits, when in 1000Base-T mode, to ethtool
+ * advertisement settings.
+ */
+static inline u32 mii_lpa_to_ethtool_1000T(u32 lpa)
+{
+	u32 result = 0;
+
+	if (lpa & LPA_1000HALF)
+		result |= ADVERTISED_1000baseT_Half;
+	if (lpa & LPA_1000FULL)
+		result |= ADVERTISED_1000baseT_Full;
+
+	return result;
+}
+
+/**
+ * ethtool_adv_to_mii_1000X
+ * @ethadv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_CTRL1000 register when in 1000Base-X mode.
+ */
+static inline u32 ethtool_adv_to_mii_1000X(u32 ethadv)
+{
+	u32 result = 0;
+
+	if (ethadv & ADVERTISED_1000baseT_Half)
+		result |= ADVERTISE_1000XHALF;
+	if (ethadv & ADVERTISED_1000baseT_Full)
+		result |= ADVERTISE_1000XFULL;
+	if (ethadv & ADVERTISED_Pause)
+		result |= ADVERTISE_1000XPAUSE;
+	if (ethadv & ADVERTISED_Asym_Pause)
+		result |= ADVERTISE_1000XPSE_ASYM;
+
+	return result;
+}
+
+/**
+ * mii_adv_to_ethtool_1000X
+ * @adv: value of the MII_CTRL1000 register
+ *
+ * A small helper function that translates MII_CTRL1000
+ * bits, when in 1000Base-X mode, to ethtool
+ * advertisement settings.
+ */
+static inline u32 mii_adv_to_ethtool_1000X(u32 adv)
+{
+	u32 result = 0;
+
+	if (adv & ADVERTISE_1000XHALF)
+		result |= ADVERTISED_1000baseT_Half;
+	if (adv & ADVERTISE_1000XFULL)
+		result |= ADVERTISED_1000baseT_Full;
+	if (adv & ADVERTISE_1000XPAUSE)
+		result |= ADVERTISED_Pause;
+	if (adv & ADVERTISE_1000XPSE_ASYM)
+		result |= ADVERTISED_Asym_Pause;
+
+	return result;
+}
+
 /**
  * mii_advertise_flowctrl - get flow control advertisement flags
  * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both)
-- 
cgit v1.2.3


From 0345e1864283207bc236120dd3e13ff2391fa85f Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Wed, 16 Nov 2011 14:05:33 +0000
Subject: net: verify GSO flag bits against netdev features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b35ffd735ecc..31da3bbe7b1b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2492,6 +2492,15 @@ netdev_features_t netif_skb_features(struct sk_buff *skb);
 static inline int net_gso_ok(netdev_features_t features, int gso_type)
 {
 	netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT;
+
+	/* check flags correspondence */
+	BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_UDP     != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_DODGY   != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_TCPV6   != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_FCOE    != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT));
+
 	return (features & feature) == feature;
 }
 
-- 
cgit v1.2.3


From ccf5ff69fbbd8d877377f5786369cf5aa78a15fc Mon Sep 17 00:00:00 2001
From: david decotigny <david.decotigny@google.com>
Date: Wed, 16 Nov 2011 12:15:10 +0000
Subject: net: new counter for tx_timeout errors in sysfs

This adds the /sys/class/net/DEV/queues/Q/tx_timeout attribute
containing the total number of timeout events on the given queue. It
is always available with CONFIG_SYSFS, independently of
CONFIG_RPS/XPS.

Credits to Stephen Hemminger for a preliminary version of this patch.

Tested:
  without CONFIG_SYSFS (compilation only)
  with sysfs and without CONFIG_RPS & CONFIG_XPS
  with sysfs and without CONFIG_RPS
  with sysfs and without CONFIG_XPS
  with defaults

Signed-off-by: David Decotigny <david.decotigny@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 12 ++++++++++--
 net/core/net-sysfs.c      | 37 +++++++++++++++++++++++++++++++------
 net/sched/sch_generic.c   |  1 +
 3 files changed, 42 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 31da3bbe7b1b..4d5698aa828b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -532,7 +532,7 @@ struct netdev_queue {
 	struct Qdisc		*qdisc;
 	unsigned long		state;
 	struct Qdisc		*qdisc_sleeping;
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
 	struct kobject		kobj;
 #endif
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
@@ -547,6 +547,12 @@ struct netdev_queue {
 	 * please use this field instead of dev->trans_start
 	 */
 	unsigned long		trans_start;
+
+	/*
+	 * Number of TX timeouts for this queue
+	 * (/sys/class/net/DEV/Q/trans_timeout)
+	 */
+	unsigned long		trans_timeout;
 } ____cacheline_aligned_in_smp;
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -1109,9 +1115,11 @@ struct net_device {
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
 	struct kset		*queues_kset;
+#endif
 
+#ifdef CONFIG_RPS
 	struct netdev_rx_queue	*_rx;
 
 	/* Number of RX queues allocated at register_netdev() time */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a64382f201b8..602b1419998c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -780,7 +780,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 #endif
 }
 
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
 /*
  * netdev_queue sysfs structures and functions.
  */
@@ -826,6 +826,23 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
 	.store = netdev_queue_attr_store,
 };
 
+static ssize_t show_trans_timeout(struct netdev_queue *queue,
+				  struct netdev_queue_attribute *attribute,
+				  char *buf)
+{
+	unsigned long trans_timeout;
+
+	spin_lock_irq(&queue->_xmit_lock);
+	trans_timeout = queue->trans_timeout;
+	spin_unlock_irq(&queue->_xmit_lock);
+
+	return sprintf(buf, "%lu", trans_timeout);
+}
+
+static struct netdev_queue_attribute queue_trans_timeout =
+	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+
+#ifdef CONFIG_XPS
 static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 {
 	struct net_device *dev = queue->dev;
@@ -1020,12 +1037,17 @@ error:
 
 static struct netdev_queue_attribute xps_cpus_attribute =
     __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+#endif /* CONFIG_XPS */
 
 static struct attribute *netdev_queue_default_attrs[] = {
+	&queue_trans_timeout.attr,
+#ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
+#endif
 	NULL
 };
 
+#ifdef CONFIG_XPS
 static void netdev_queue_release(struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
@@ -1076,10 +1098,13 @@ static void netdev_queue_release(struct kobject *kobj)
 	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
 }
+#endif /* CONFIG_XPS */
 
 static struct kobj_type netdev_queue_ktype = {
 	.sysfs_ops = &netdev_queue_sysfs_ops,
+#ifdef CONFIG_XPS
 	.release = netdev_queue_release,
+#endif
 	.default_attrs = netdev_queue_default_attrs,
 };
 
@@ -1102,12 +1127,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
 
 	return error;
 }
-#endif /* CONFIG_XPS */
+#endif /* CONFIG_SYSFS */
 
 int
 netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 {
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
 	int i;
 	int error = 0;
 
@@ -1125,14 +1150,14 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 	return error;
 #else
 	return 0;
-#endif
+#endif /* CONFIG_SYSFS */
 }
 
 static int register_queue_kobjects(struct net_device *net)
 {
 	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
 
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
 	net->queues_kset = kset_create_and_add("queues",
 	    NULL, &net->dev.kobj);
 	if (!net->queues_kset)
@@ -1173,7 +1198,7 @@ static void remove_queue_kobjects(struct net_device *net)
 
 	net_rx_queue_update_kobjects(net, real_rx, 0);
 	netdev_queue_update_kobjects(net, real_tx, 0);
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
 	kset_unregister(net->queues_kset);
 #endif
 }
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69fca2798804..79ac1458c2ba 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -246,6 +246,7 @@ static void dev_watchdog(unsigned long arg)
 				    time_after(jiffies, (trans_start +
 							 dev->watchdog_timeo))) {
 					some_queue_timedout = 1;
+					txq->trans_timeout++;
 					break;
 				}
 			}
-- 
cgit v1.2.3


From 029632fbb7b7c9d85063cc9eb470de6c54873df3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 25 Oct 2011 10:00:11 +0200
Subject: sched: Make separate sched*.c translation units

Since once needs to do something at conferences and fixing compile
warnings doesn't actually require much if any attention I decided
to break up the sched.c #include "*.c" fest.

This further modularizes the scheduler code.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-x0fcd3mnp8f9c99grcpewmhi@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/latencytop.h |    3 +-
 include/linux/sched.h      |    9 +
 kernel/Makefile            |   10 +-
 kernel/sched.c             | 1878 ++------------------------------------------
 kernel/sched.h             | 1064 +++++++++++++++++++++++++
 kernel/sched_autogroup.c   |   33 +-
 kernel/sched_autogroup.h   |   26 +-
 kernel/sched_debug.c       |    4 +-
 kernel/sched_fair.c        |  580 +++++++++++++-
 kernel/sched_idletask.c    |    4 +-
 kernel/sched_rt.c          |  209 ++++-
 kernel/sched_stats.c       |  111 +++
 kernel/sched_stats.h       |  103 ---
 kernel/sched_stoptask.c    |    4 +-
 14 files changed, 2059 insertions(+), 1979 deletions(-)
 create mode 100644 kernel/sched.h
 create mode 100644 kernel/sched_stats.c

(limited to 'include/linux')

diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
index b0e99898527c..e23121f9d82a 100644
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -10,6 +10,8 @@
 #define _INCLUDE_GUARD_LATENCYTOP_H_
 
 #include <linux/compiler.h>
+struct task_struct;
+
 #ifdef CONFIG_LATENCYTOP
 
 #define LT_SAVECOUNT		32
@@ -23,7 +25,6 @@ struct latency_record {
 };
 
 
-struct task_struct;
 
 extern int latencytop_enabled;
 void __account_scheduler_latency(struct task_struct *task, int usecs, int inter);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68daf4f27e2c..8db17b7622ec 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -925,6 +925,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 	return to_cpumask(sg->cpumask);
 }
 
+/**
+ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
+ * @group: The group whose first cpu is to be returned.
+ */
+static inline unsigned int group_first_cpu(struct sched_group *group)
+{
+	return cpumask_first(sched_group_cpus(group));
+}
+
 struct sched_domain_attr {
 	int relax_domain_level;
 };
diff --git a/kernel/Makefile b/kernel/Makefile
index e898c5b9d02c..1a4d37d7f39a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
+obj-y     = fork.o exec_domain.o panic.o printk.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
@@ -10,8 +10,12 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o sched_clock.o cred.o \
-	    async.o range.o
-obj-y += groups.o
+	    async.o range.o groups.o
+
+obj-y += sched.o sched_idletask.o sched_fair.o sched_rt.o sched_stoptask.o
+obj-$(CONFIG_SCHED_AUTOGROUP) += sched_autogroup.o
+obj-$(CONFIG_SCHEDSTATS) += sched_stats.o
+obj-$(CONFIG_SCHED_DEBUG) += sched_debug.o
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace debug files and internal ftrace files
diff --git a/kernel/sched.c b/kernel/sched.c
index c9e3ab6e299e..2ffcceed8862 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -56,7 +56,6 @@
 #include <linux/percpu.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/stop_machine.h>
 #include <linux/sysctl.h>
 #include <linux/syscalls.h>
 #include <linux/times.h>
@@ -72,133 +71,20 @@
 #include <linux/ftrace.h>
 #include <linux/slab.h>
 #include <linux/init_task.h>
-#include <linux/jump_label.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
-#include <asm/mutex.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #endif
 
-#include "sched_cpupri.h"
+#include "sched.h"
 #include "workqueue_sched.h"
-#include "sched_autogroup.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
 
-/*
- * Convert user-nice values [ -20 ... 0 ... 19 ]
- * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
- * and back.
- */
-#define NICE_TO_PRIO(nice)	(MAX_RT_PRIO + (nice) + 20)
-#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
-#define TASK_NICE(p)		PRIO_TO_NICE((p)->static_prio)
-
-/*
- * 'User priority' is the nice value converted to something we
- * can work with better when scaling various scheduler parameters,
- * it's a [ 0 ... 39 ] range.
- */
-#define USER_PRIO(p)		((p)-MAX_RT_PRIO)
-#define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
-#define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
-
-/*
- * Helpers for converting nanosecond timing to jiffy resolution
- */
-#define NS_TO_JIFFIES(TIME)	((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
-
-#define NICE_0_LOAD		SCHED_LOAD_SCALE
-#define NICE_0_SHIFT		SCHED_LOAD_SHIFT
-
-/*
- * These are the 'tuning knobs' of the scheduler:
- *
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define DEF_TIMESLICE		(100 * HZ / 1000)
-
-/*
- * single value that denotes runtime == period, ie unlimited time.
- */
-#define RUNTIME_INF	((u64)~0ULL)
-
-static inline int rt_policy(int policy)
-{
-	if (policy == SCHED_FIFO || policy == SCHED_RR)
-		return 1;
-	return 0;
-}
-
-static inline int task_has_rt_policy(struct task_struct *p)
-{
-	return rt_policy(p->policy);
-}
-
-/*
- * This is the priority-queue data structure of the RT scheduling class:
- */
-struct rt_prio_array {
-	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
-	struct list_head queue[MAX_RT_PRIO];
-};
-
-struct rt_bandwidth {
-	/* nests inside the rq lock: */
-	raw_spinlock_t		rt_runtime_lock;
-	ktime_t			rt_period;
-	u64			rt_runtime;
-	struct hrtimer		rt_period_timer;
-};
-
-static struct rt_bandwidth def_rt_bandwidth;
-
-static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
-
-static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
-{
-	struct rt_bandwidth *rt_b =
-		container_of(timer, struct rt_bandwidth, rt_period_timer);
-	ktime_t now;
-	int overrun;
-	int idle = 0;
-
-	for (;;) {
-		now = hrtimer_cb_get_time(timer);
-		overrun = hrtimer_forward(timer, now, rt_b->rt_period);
-
-		if (!overrun)
-			break;
-
-		idle = do_sched_rt_period_timer(rt_b, overrun);
-	}
-
-	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
-}
-
-static
-void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
-{
-	rt_b->rt_period = ns_to_ktime(period);
-	rt_b->rt_runtime = runtime;
-
-	raw_spin_lock_init(&rt_b->rt_runtime_lock);
-
-	hrtimer_init(&rt_b->rt_period_timer,
-			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	rt_b->rt_period_timer.function = sched_rt_period_timer;
-}
-
-static inline int rt_bandwidth_enabled(void)
-{
-	return sysctl_sched_rt_runtime >= 0;
-}
-
-static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
+void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
 {
 	unsigned long delta;
 	ktime_t soft, hard, now;
@@ -218,609 +104,12 @@ static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
 	}
 }
 
-static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
-{
-	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
-		return;
-
-	if (hrtimer_active(&rt_b->rt_period_timer))
-		return;
-
-	raw_spin_lock(&rt_b->rt_runtime_lock);
-	start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
-	raw_spin_unlock(&rt_b->rt_runtime_lock);
-}
-
-#ifdef CONFIG_RT_GROUP_SCHED
-static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
-{
-	hrtimer_cancel(&rt_b->rt_period_timer);
-}
-#endif
-
-/*
- * sched_domains_mutex serializes calls to init_sched_domains,
- * detach_destroy_domains and partition_sched_domains.
- */
-static DEFINE_MUTEX(sched_domains_mutex);
-
-#ifdef CONFIG_CGROUP_SCHED
-
-#include <linux/cgroup.h>
-
-struct cfs_rq;
-
-static LIST_HEAD(task_groups);
-
-struct cfs_bandwidth {
-#ifdef CONFIG_CFS_BANDWIDTH
-	raw_spinlock_t lock;
-	ktime_t period;
-	u64 quota, runtime;
-	s64 hierarchal_quota;
-	u64 runtime_expires;
-
-	int idle, timer_active;
-	struct hrtimer period_timer, slack_timer;
-	struct list_head throttled_cfs_rq;
-
-	/* statistics */
-	int nr_periods, nr_throttled;
-	u64 throttled_time;
-#endif
-};
-
-/* task group related information */
-struct task_group {
-	struct cgroup_subsys_state css;
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	/* schedulable entities of this group on each cpu */
-	struct sched_entity **se;
-	/* runqueue "owned" by this group on each cpu */
-	struct cfs_rq **cfs_rq;
-	unsigned long shares;
-
-	atomic_t load_weight;
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
-	struct sched_rt_entity **rt_se;
-	struct rt_rq **rt_rq;
-
-	struct rt_bandwidth rt_bandwidth;
-#endif
-
-	struct rcu_head rcu;
-	struct list_head list;
-
-	struct task_group *parent;
-	struct list_head siblings;
-	struct list_head children;
-
-#ifdef CONFIG_SCHED_AUTOGROUP
-	struct autogroup *autogroup;
-#endif
-
-	struct cfs_bandwidth cfs_bandwidth;
-};
-
-/* task_group_lock serializes the addition/removal of task groups */
-static DEFINE_SPINLOCK(task_group_lock);
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
-# define ROOT_TASK_GROUP_LOAD	NICE_0_LOAD
-
-/*
- * A weight of 0 or 1 can cause arithmetics problems.
- * A weight of a cfs_rq is the sum of weights of which entities
- * are queued on this cfs_rq, so a weight of a entity should not be
- * too large, so as the shares value of a task group.
- * (The default weight is 1024 - so there's no practical
- *  limitation from this.)
- */
-#define MIN_SHARES	(1UL <<  1)
-#define MAX_SHARES	(1UL << 18)
-
-static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
-#endif
-
-/* Default task group.
- *	Every task in system belong to this group at bootup.
- */
-struct task_group root_task_group;
-
-#endif	/* CONFIG_CGROUP_SCHED */
-
-/* CFS-related fields in a runqueue */
-struct cfs_rq {
-	struct load_weight load;
-	unsigned long nr_running, h_nr_running;
-
-	u64 exec_clock;
-	u64 min_vruntime;
-#ifndef CONFIG_64BIT
-	u64 min_vruntime_copy;
-#endif
-
-	struct rb_root tasks_timeline;
-	struct rb_node *rb_leftmost;
-
-	struct list_head tasks;
-	struct list_head *balance_iterator;
-
-	/*
-	 * 'curr' points to currently running entity on this cfs_rq.
-	 * It is set to NULL otherwise (i.e when none are currently running).
-	 */
-	struct sched_entity *curr, *next, *last, *skip;
-
-#ifdef	CONFIG_SCHED_DEBUG
-	unsigned int nr_spread_over;
-#endif
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
-
-	/*
-	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
-	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
-	 * (like users, containers etc.)
-	 *
-	 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
-	 * list is used during load balance.
-	 */
-	int on_list;
-	struct list_head leaf_cfs_rq_list;
-	struct task_group *tg;	/* group that "owns" this runqueue */
-
-#ifdef CONFIG_SMP
-	/*
-	 * the part of load.weight contributed by tasks
-	 */
-	unsigned long task_weight;
-
-	/*
-	 *   h_load = weight * f(tg)
-	 *
-	 * Where f(tg) is the recursive weight fraction assigned to
-	 * this group.
-	 */
-	unsigned long h_load;
-
-	/*
-	 * Maintaining per-cpu shares distribution for group scheduling
-	 *
-	 * load_stamp is the last time we updated the load average
-	 * load_last is the last time we updated the load average and saw load
-	 * load_unacc_exec_time is currently unaccounted execution time
-	 */
-	u64 load_avg;
-	u64 load_period;
-	u64 load_stamp, load_last, load_unacc_exec_time;
-
-	unsigned long load_contribution;
-#endif
-#ifdef CONFIG_CFS_BANDWIDTH
-	int runtime_enabled;
-	u64 runtime_expires;
-	s64 runtime_remaining;
-
-	u64 throttled_timestamp;
-	int throttled, throttle_count;
-	struct list_head throttled_list;
-#endif
-#endif
-};
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-#ifdef CONFIG_CFS_BANDWIDTH
-static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
-{
-	return &tg->cfs_bandwidth;
-}
-
-static inline u64 default_cfs_period(void);
-static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
-static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b);
-
-static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
-{
-	struct cfs_bandwidth *cfs_b =
-		container_of(timer, struct cfs_bandwidth, slack_timer);
-	do_sched_cfs_slack_timer(cfs_b);
-
-	return HRTIMER_NORESTART;
-}
-
-static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
-{
-	struct cfs_bandwidth *cfs_b =
-		container_of(timer, struct cfs_bandwidth, period_timer);
-	ktime_t now;
-	int overrun;
-	int idle = 0;
-
-	for (;;) {
-		now = hrtimer_cb_get_time(timer);
-		overrun = hrtimer_forward(timer, now, cfs_b->period);
-
-		if (!overrun)
-			break;
-
-		idle = do_sched_cfs_period_timer(cfs_b, overrun);
-	}
-
-	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
-}
-
-static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
-{
-	raw_spin_lock_init(&cfs_b->lock);
-	cfs_b->runtime = 0;
-	cfs_b->quota = RUNTIME_INF;
-	cfs_b->period = ns_to_ktime(default_cfs_period());
-
-	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
-	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	cfs_b->period_timer.function = sched_cfs_period_timer;
-	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	cfs_b->slack_timer.function = sched_cfs_slack_timer;
-}
-
-static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
-{
-	cfs_rq->runtime_enabled = 0;
-	INIT_LIST_HEAD(&cfs_rq->throttled_list);
-}
-
-/* requires cfs_b->lock, may release to reprogram timer */
-static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
-{
-	/*
-	 * The timer may be active because we're trying to set a new bandwidth
-	 * period or because we're racing with the tear-down path
-	 * (timer_active==0 becomes visible before the hrtimer call-back
-	 * terminates).  In either case we ensure that it's re-programmed
-	 */
-	while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
-		raw_spin_unlock(&cfs_b->lock);
-		/* ensure cfs_b->lock is available while we wait */
-		hrtimer_cancel(&cfs_b->period_timer);
-
-		raw_spin_lock(&cfs_b->lock);
-		/* if someone else restarted the timer then we're done */
-		if (cfs_b->timer_active)
-			return;
-	}
-
-	cfs_b->timer_active = 1;
-	start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
-}
-
-static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
-{
-	hrtimer_cancel(&cfs_b->period_timer);
-	hrtimer_cancel(&cfs_b->slack_timer);
-}
-
-#ifdef HAVE_JUMP_LABEL
-static struct jump_label_key __cfs_bandwidth_used;
-
-static inline bool cfs_bandwidth_used(void)
-{
-	return static_branch(&__cfs_bandwidth_used);
-}
-
-static void account_cfs_bandwidth_used(int enabled, int was_enabled)
-{
-	/* only need to count groups transitioning between enabled/!enabled */
-	if (enabled && !was_enabled)
-		jump_label_inc(&__cfs_bandwidth_used);
-	else if (!enabled && was_enabled)
-		jump_label_dec(&__cfs_bandwidth_used);
-}
-#else /* !HAVE_JUMP_LABEL */
-/* static_branch doesn't help unless supported */
-static int cfs_bandwidth_used(void)
-{
-	return 1;
-}
-static void account_cfs_bandwidth_used(int enabled, int was_enabled) {}
-#endif /* HAVE_JUMP_LABEL */
-#else /* !CONFIG_CFS_BANDWIDTH */
-static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
-static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
-static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
-
-static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
-{
-	return NULL;
-}
-#endif /* CONFIG_CFS_BANDWIDTH */
-#endif /* CONFIG_FAIR_GROUP_SCHED */
-
-/* Real-Time classes' related field in a runqueue: */
-struct rt_rq {
-	struct rt_prio_array active;
-	unsigned long rt_nr_running;
-#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-	struct {
-		int curr; /* highest queued rt task prio */
-#ifdef CONFIG_SMP
-		int next; /* next highest */
-#endif
-	} highest_prio;
-#endif
-#ifdef CONFIG_SMP
-	unsigned long rt_nr_migratory;
-	unsigned long rt_nr_total;
-	int overloaded;
-	struct plist_head pushable_tasks;
-#endif
-	int rt_throttled;
-	u64 rt_time;
-	u64 rt_runtime;
-	/* Nests inside the rq lock: */
-	raw_spinlock_t rt_runtime_lock;
-
-#ifdef CONFIG_RT_GROUP_SCHED
-	unsigned long rt_nr_boosted;
-
-	struct rq *rq;
-	struct list_head leaf_rt_rq_list;
-	struct task_group *tg;
-#endif
-};
-
-#ifdef CONFIG_SMP
-
-/*
- * We add the notion of a root-domain which will be used to define per-domain
- * variables. Each exclusive cpuset essentially defines an island domain by
- * fully partitioning the member cpus from any other cpuset. Whenever a new
- * exclusive cpuset is created, we also create and attach a new root-domain
- * object.
- *
- */
-struct root_domain {
-	atomic_t refcount;
-	atomic_t rto_count;
-	struct rcu_head rcu;
-	cpumask_var_t span;
-	cpumask_var_t online;
-
-	/*
-	 * The "RT overload" flag: it gets set if a CPU has more than
-	 * one runnable RT task.
-	 */
-	cpumask_var_t rto_mask;
-	struct cpupri cpupri;
-};
-
-/*
- * By default the system creates a single root-domain with all cpus as
- * members (mimicking the global state we have today).
- */
-static struct root_domain def_root_domain;
-
-#endif /* CONFIG_SMP */
-
-/*
- * This is the main, per-CPU runqueue data structure.
- *
- * Locking rule: those places that want to lock multiple runqueues
- * (such as the load balancing or the thread migration code), lock
- * acquire operations must be ordered by ascending &runqueue.
- */
-struct rq {
-	/* runqueue lock: */
-	raw_spinlock_t lock;
-
-	/*
-	 * nr_running and cpu_load should be in the same cacheline because
-	 * remote CPUs use both these fields when doing load calculation.
-	 */
-	unsigned long nr_running;
-	#define CPU_LOAD_IDX_MAX 5
-	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
-	unsigned long last_load_update_tick;
-#ifdef CONFIG_NO_HZ
-	u64 nohz_stamp;
-	unsigned char nohz_balance_kick;
-#endif
-	int skip_clock_update;
-
-	/* capture load from *all* tasks on this cpu: */
-	struct load_weight load;
-	unsigned long nr_load_updates;
-	u64 nr_switches;
-
-	struct cfs_rq cfs;
-	struct rt_rq rt;
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	/* list of leaf cfs_rq on this cpu: */
-	struct list_head leaf_cfs_rq_list;
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-	struct list_head leaf_rt_rq_list;
-#endif
-
-	/*
-	 * This is part of a global counter where only the total sum
-	 * over all CPUs matters. A task can increase this counter on
-	 * one CPU and if it got migrated afterwards it may decrease
-	 * it on another CPU. Always updated under the runqueue lock:
-	 */
-	unsigned long nr_uninterruptible;
-
-	struct task_struct *curr, *idle, *stop;
-	unsigned long next_balance;
-	struct mm_struct *prev_mm;
-
-	u64 clock;
-	u64 clock_task;
-
-	atomic_t nr_iowait;
-
-#ifdef CONFIG_SMP
-	struct root_domain *rd;
-	struct sched_domain *sd;
-
-	unsigned long cpu_power;
-
-	unsigned char idle_balance;
-	/* For active balancing */
-	int post_schedule;
-	int active_balance;
-	int push_cpu;
-	struct cpu_stop_work active_balance_work;
-	/* cpu of this runqueue: */
-	int cpu;
-	int online;
-
-	u64 rt_avg;
-	u64 age_stamp;
-	u64 idle_stamp;
-	u64 avg_idle;
-#endif
-
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-	u64 prev_irq_time;
-#endif
-#ifdef CONFIG_PARAVIRT
-	u64 prev_steal_time;
-#endif
-#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-	u64 prev_steal_time_rq;
-#endif
-
-	/* calc_load related fields */
-	unsigned long calc_load_update;
-	long calc_load_active;
-
-#ifdef CONFIG_SCHED_HRTICK
-#ifdef CONFIG_SMP
-	int hrtick_csd_pending;
-	struct call_single_data hrtick_csd;
-#endif
-	struct hrtimer hrtick_timer;
-#endif
-
-#ifdef CONFIG_SCHEDSTATS
-	/* latency stats */
-	struct sched_info rq_sched_info;
-	unsigned long long rq_cpu_time;
-	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-
-	/* sys_sched_yield() stats */
-	unsigned int yld_count;
-
-	/* schedule() stats */
-	unsigned int sched_switch;
-	unsigned int sched_count;
-	unsigned int sched_goidle;
-
-	/* try_to_wake_up() stats */
-	unsigned int ttwu_count;
-	unsigned int ttwu_local;
-#endif
-
-#ifdef CONFIG_SMP
-	struct llist_head wake_list;
-#endif
-};
-
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-
-
-static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
-
-static inline int cpu_of(struct rq *rq)
-{
-#ifdef CONFIG_SMP
-	return rq->cpu;
-#else
-	return 0;
-#endif
-}
-
-#define rcu_dereference_check_sched_domain(p) \
-	rcu_dereference_check((p), \
-			      lockdep_is_held(&sched_domains_mutex))
-
-/*
- * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
- * See detach_destroy_domains: synchronize_sched for details.
- *
- * The domain tree of any CPU may only be accessed from within
- * preempt-disabled sections.
- */
-#define for_each_domain(cpu, __sd) \
-	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
-
-#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-#define this_rq()		(&__get_cpu_var(runqueues))
-#define task_rq(p)		cpu_rq(task_cpu(p))
-#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-#define raw_rq()		(&__raw_get_cpu_var(runqueues))
-
-#ifdef CONFIG_CGROUP_SCHED
-
-/*
- * Return the group to which this tasks belongs.
- *
- * We use task_subsys_state_check() and extend the RCU verification with
- * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
- * task it moves into the cgroup. Therefore by holding either of those locks,
- * we pin the task to the current cgroup.
- */
-static inline struct task_group *task_group(struct task_struct *p)
-{
-	struct task_group *tg;
-	struct cgroup_subsys_state *css;
-
-	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
-			lockdep_is_held(&p->pi_lock) ||
-			lockdep_is_held(&task_rq(p)->lock));
-	tg = container_of(css, struct task_group, css);
-
-	return autogroup_task_group(p, tg);
-}
-
-/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
-{
-#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
-	struct task_group *tg = task_group(p);
-#endif
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	p->se.cfs_rq = tg->cfs_rq[cpu];
-	p->se.parent = tg->se[cpu];
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
-	p->rt.rt_rq  = tg->rt_rq[cpu];
-	p->rt.parent = tg->rt_se[cpu];
-#endif
-}
-
-#else /* CONFIG_CGROUP_SCHED */
-
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
-static inline struct task_group *task_group(struct task_struct *p)
-{
-	return NULL;
-}
-
-#endif /* CONFIG_CGROUP_SCHED */
+DEFINE_MUTEX(sched_domains_mutex);
+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
 static void update_rq_clock_task(struct rq *rq, s64 delta);
 
-static void update_rq_clock(struct rq *rq)
+void update_rq_clock(struct rq *rq)
 {
 	s64 delta;
 
@@ -832,40 +121,10 @@ static void update_rq_clock(struct rq *rq)
 	update_rq_clock_task(rq, delta);
 }
 
-/*
- * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
- */
-#ifdef CONFIG_SCHED_DEBUG
-# define const_debug __read_mostly
-#else
-# define const_debug static const
-#endif
-
-/**
- * runqueue_is_locked - Returns true if the current cpu runqueue is locked
- * @cpu: the processor in question.
- *
- * This interface allows printk to be called with the runqueue lock
- * held and know whether or not it is OK to wake up the klogd.
- */
-int runqueue_is_locked(int cpu)
-{
-	return raw_spin_is_locked(&cpu_rq(cpu)->lock);
-}
-
 /*
  * Debugging: various feature bits
  */
 
-#define SCHED_FEAT(name, enabled)	\
-	__SCHED_FEAT_##name ,
-
-enum {
-#include "sched_features.h"
-};
-
-#undef SCHED_FEAT
-
 #define SCHED_FEAT(name, enabled)	\
 	(1UL << __SCHED_FEAT_##name) * enabled |
 
@@ -965,8 +224,6 @@ late_initcall(sched_init_debug);
 
 #endif
 
-#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
-
 /*
  * Number of tasks to iterate in a single balance run.
  * Limited because this is done with IRQs disabled.
@@ -981,126 +238,21 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
  */
 const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
 
-/*
- * period over which we measure -rt task cpu usage in us.
- * default: 1s
- */
-unsigned int sysctl_sched_rt_period = 1000000;
-
-static __read_mostly int scheduler_running;
-
-/*
- * part of the period that we allow rt tasks to run in us.
- * default: 0.95s
- */
-int sysctl_sched_rt_runtime = 950000;
-
-static inline u64 global_rt_period(void)
-{
-	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
-}
-
-static inline u64 global_rt_runtime(void)
-{
-	if (sysctl_sched_rt_runtime < 0)
-		return RUNTIME_INF;
-
-	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
-}
-
-#ifndef prepare_arch_switch
-# define prepare_arch_switch(next)	do { } while (0)
-#endif
-#ifndef finish_arch_switch
-# define finish_arch_switch(prev)	do { } while (0)
-#endif
-
-static inline int task_current(struct rq *rq, struct task_struct *p)
-{
-	return rq->curr == p;
-}
-
-static inline int task_running(struct rq *rq, struct task_struct *p)
-{
-#ifdef CONFIG_SMP
-	return p->on_cpu;
-#else
-	return task_current(rq, p);
-#endif
-}
-
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
-static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
-{
-#ifdef CONFIG_SMP
-	/*
-	 * We can optimise this out completely for !SMP, because the
-	 * SMP rebalancing from interrupt is the only thing that cares
-	 * here.
-	 */
-	next->on_cpu = 1;
-#endif
-}
-
-static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-{
-#ifdef CONFIG_SMP
-	/*
-	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-	 * We must ensure this doesn't happen until the switch is completely
-	 * finished.
-	 */
-	smp_wmb();
-	prev->on_cpu = 0;
-#endif
-#ifdef CONFIG_DEBUG_SPINLOCK
-	/* this is a valid case when another task releases the spinlock */
-	rq->lock.owner = current;
-#endif
-	/*
-	 * If we are tracking spinlock dependencies then we have to
-	 * fix up the runqueue lock - which gets 'carried over' from
-	 * prev into current:
-	 */
-	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+/*
+ * period over which we measure -rt task cpu usage in us.
+ * default: 1s
+ */
+unsigned int sysctl_sched_rt_period = 1000000;
 
-	raw_spin_unlock_irq(&rq->lock);
-}
+__read_mostly int scheduler_running;
+
+/*
+ * part of the period that we allow rt tasks to run in us.
+ * default: 0.95s
+ */
+int sysctl_sched_rt_runtime = 950000;
 
-#else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
-{
-#ifdef CONFIG_SMP
-	/*
-	 * We can optimise this out completely for !SMP, because the
-	 * SMP rebalancing from interrupt is the only thing that cares
-	 * here.
-	 */
-	next->on_cpu = 1;
-#endif
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
-	raw_spin_unlock_irq(&rq->lock);
-#else
-	raw_spin_unlock(&rq->lock);
-#endif
-}
 
-static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
-{
-#ifdef CONFIG_SMP
-	/*
-	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
-	 * We must ensure this doesn't happen until the switch is completely
-	 * finished.
-	 */
-	smp_wmb();
-	prev->on_cpu = 0;
-#endif
-#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
-	local_irq_enable();
-#endif
-}
-#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
 
 /*
  * __task_rq_lock - lock the rq @p resides on.
@@ -1183,20 +335,6 @@ static struct rq *this_rq_lock(void)
  * rq->lock.
  */
 
-/*
- * Use hrtick when:
- *  - enabled by features
- *  - hrtimer is actually high res
- */
-static inline int hrtick_enabled(struct rq *rq)
-{
-	if (!sched_feat(HRTICK))
-		return 0;
-	if (!cpu_active(cpu_of(rq)))
-		return 0;
-	return hrtimer_is_hres_active(&rq->hrtick_timer);
-}
-
 static void hrtick_clear(struct rq *rq)
 {
 	if (hrtimer_active(&rq->hrtick_timer))
@@ -1240,7 +378,7 @@ static void __hrtick_start(void *arg)
  *
  * called with rq->lock held and irqs disabled
  */
-static void hrtick_start(struct rq *rq, u64 delay)
+void hrtick_start(struct rq *rq, u64 delay)
 {
 	struct hrtimer *timer = &rq->hrtick_timer;
 	ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
@@ -1284,7 +422,7 @@ static __init void init_hrtick(void)
  *
  * called with rq->lock held and irqs disabled
  */
-static void hrtick_start(struct rq *rq, u64 delay)
+void hrtick_start(struct rq *rq, u64 delay)
 {
 	__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
 			HRTIMER_MODE_REL_PINNED, 0);
@@ -1335,7 +473,7 @@ static inline void init_hrtick(void)
 #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
 #endif
 
-static void resched_task(struct task_struct *p)
+void resched_task(struct task_struct *p)
 {
 	int cpu;
 
@@ -1356,7 +494,7 @@ static void resched_task(struct task_struct *p)
 		smp_send_reschedule(cpu);
 }
 
-static void resched_cpu(int cpu)
+void resched_cpu(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
@@ -1449,12 +587,7 @@ static inline bool got_nohz_idle_kick(void)
 
 #endif /* CONFIG_NO_HZ */
 
-static u64 sched_avg_period(void)
-{
-	return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
-}
-
-static void sched_avg_update(struct rq *rq)
+void sched_avg_update(struct rq *rq)
 {
 	s64 period = sched_avg_period();
 
@@ -1470,193 +603,23 @@ static void sched_avg_update(struct rq *rq)
 	}
 }
 
-static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
-{
-	rq->rt_avg += rt_delta;
-	sched_avg_update(rq);
-}
-
 #else /* !CONFIG_SMP */
-static void resched_task(struct task_struct *p)
+void resched_task(struct task_struct *p)
 {
 	assert_raw_spin_locked(&task_rq(p)->lock);
 	set_tsk_need_resched(p);
 }
-
-static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
-{
-}
-
-static void sched_avg_update(struct rq *rq)
-{
-}
 #endif /* CONFIG_SMP */
 
-#if BITS_PER_LONG == 32
-# define WMULT_CONST	(~0UL)
-#else
-# define WMULT_CONST	(1UL << 32)
-#endif
-
-#define WMULT_SHIFT	32
-
-/*
- * Shift right and round:
- */
-#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
-
-/*
- * delta *= weight / lw
- */
-static unsigned long
-calc_delta_mine(unsigned long delta_exec, unsigned long weight,
-		struct load_weight *lw)
-{
-	u64 tmp;
-
-	/*
-	 * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
-	 * entities since MIN_SHARES = 2. Treat weight as 1 if less than
-	 * 2^SCHED_LOAD_RESOLUTION.
-	 */
-	if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
-		tmp = (u64)delta_exec * scale_load_down(weight);
-	else
-		tmp = (u64)delta_exec;
-
-	if (!lw->inv_weight) {
-		unsigned long w = scale_load_down(lw->weight);
-
-		if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
-			lw->inv_weight = 1;
-		else if (unlikely(!w))
-			lw->inv_weight = WMULT_CONST;
-		else
-			lw->inv_weight = WMULT_CONST / w;
-	}
-
-	/*
-	 * Check whether we'd overflow the 64-bit multiplication:
-	 */
-	if (unlikely(tmp > WMULT_CONST))
-		tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
-			WMULT_SHIFT/2);
-	else
-		tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
-
-	return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
-}
-
-static inline void update_load_add(struct load_weight *lw, unsigned long inc)
-{
-	lw->weight += inc;
-	lw->inv_weight = 0;
-}
-
-static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
-{
-	lw->weight -= dec;
-	lw->inv_weight = 0;
-}
-
-static inline void update_load_set(struct load_weight *lw, unsigned long w)
-{
-	lw->weight = w;
-	lw->inv_weight = 0;
-}
-
-/*
- * To aid in avoiding the subversion of "niceness" due to uneven distribution
- * of tasks with abnormal "nice" values across CPUs the contribution that
- * each task makes to its run queue's load is weighted according to its
- * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
- * scaled version of the new time slice allocation that they receive on time
- * slice expiry etc.
- */
-
-#define WEIGHT_IDLEPRIO                3
-#define WMULT_IDLEPRIO         1431655765
-
-/*
- * Nice levels are multiplicative, with a gentle 10% change for every
- * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
- * nice 1, it will get ~10% less CPU time than another CPU-bound task
- * that remained on nice 0.
- *
- * The "10% effect" is relative and cumulative: from _any_ nice level,
- * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
- * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
- * If a task goes up by ~10% and another task goes down by ~10% then
- * the relative distance between them is ~25%.)
- */
-static const int prio_to_weight[40] = {
- /* -20 */     88761,     71755,     56483,     46273,     36291,
- /* -15 */     29154,     23254,     18705,     14949,     11916,
- /* -10 */      9548,      7620,      6100,      4904,      3906,
- /*  -5 */      3121,      2501,      1991,      1586,      1277,
- /*   0 */      1024,       820,       655,       526,       423,
- /*   5 */       335,       272,       215,       172,       137,
- /*  10 */       110,        87,        70,        56,        45,
- /*  15 */        36,        29,        23,        18,        15,
-};
-
-/*
- * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
- *
- * In cases where the weight does not change often, we can use the
- * precalculated inverse to speed up arithmetics by turning divisions
- * into multiplications:
- */
-static const u32 prio_to_wmult[40] = {
- /* -20 */     48388,     59856,     76040,     92818,    118348,
- /* -15 */    147320,    184698,    229616,    287308,    360437,
- /* -10 */    449829,    563644,    704093,    875809,   1099582,
- /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
- /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
- /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
- /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
- /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
-};
-
-/* Time spent by the tasks of the cpu accounting group executing in ... */
-enum cpuacct_stat_index {
-	CPUACCT_STAT_USER,	/* ... user mode */
-	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
-
-	CPUACCT_STAT_NSTATS,
-};
-
-#ifdef CONFIG_CGROUP_CPUACCT
-static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
-static void cpuacct_update_stats(struct task_struct *tsk,
-		enum cpuacct_stat_index idx, cputime_t val);
-#else
-static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
-static inline void cpuacct_update_stats(struct task_struct *tsk,
-		enum cpuacct_stat_index idx, cputime_t val) {}
-#endif
-
-static inline void inc_cpu_load(struct rq *rq, unsigned long load)
-{
-	update_load_add(&rq->load, load);
-}
-
-static inline void dec_cpu_load(struct rq *rq, unsigned long load)
-{
-	update_load_sub(&rq->load, load);
-}
-
 #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
 			(defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
-typedef int (*tg_visitor)(struct task_group *, void *);
-
 /*
  * Iterate task_group tree rooted at *from, calling @down when first entering a
  * node and @up when leaving it for the final time.
  *
  * Caller must hold rcu_lock or sufficient equivalent.
  */
-static int walk_tg_tree_from(struct task_group *from,
+int walk_tg_tree_from(struct task_group *from,
 			     tg_visitor down, tg_visitor up, void *data)
 {
 	struct task_group *parent, *child;
@@ -1673,284 +636,27 @@ down:
 		goto down;
 
 up:
-		continue;
-	}
-	ret = (*up)(parent, data);
-	if (ret || parent == from)
-		goto out;
-
-	child = parent;
-	parent = parent->parent;
-	if (parent)
-		goto up;
-out:
-	return ret;
-}
-
-/*
- * Iterate the full tree, calling @down when first entering a node and @up when
- * leaving it for the final time.
- *
- * Caller must hold rcu_lock or sufficient equivalent.
- */
-
-static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
-{
-	return walk_tg_tree_from(&root_task_group, down, up, data);
-}
-
-static int tg_nop(struct task_group *tg, void *data)
-{
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_SMP
-/* Used instead of source_load when we know the type == 0 */
-static unsigned long weighted_cpuload(const int cpu)
-{
-	return cpu_rq(cpu)->load.weight;
-}
-
-/*
- * Return a low guess at the load of a migration-source cpu weighted
- * according to the scheduling class and "nice" value.
- *
- * We want to under-estimate the load of migration sources, to
- * balance conservatively.
- */
-static unsigned long source_load(int cpu, int type)
-{
-	struct rq *rq = cpu_rq(cpu);
-	unsigned long total = weighted_cpuload(cpu);
-
-	if (type == 0 || !sched_feat(LB_BIAS))
-		return total;
-
-	return min(rq->cpu_load[type-1], total);
-}
-
-/*
- * Return a high guess at the load of a migration-target cpu weighted
- * according to the scheduling class and "nice" value.
- */
-static unsigned long target_load(int cpu, int type)
-{
-	struct rq *rq = cpu_rq(cpu);
-	unsigned long total = weighted_cpuload(cpu);
-
-	if (type == 0 || !sched_feat(LB_BIAS))
-		return total;
-
-	return max(rq->cpu_load[type-1], total);
-}
-
-static unsigned long power_of(int cpu)
-{
-	return cpu_rq(cpu)->cpu_power;
-}
-
-static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
-
-static unsigned long cpu_avg_load_per_task(int cpu)
-{
-	struct rq *rq = cpu_rq(cpu);
-	unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
-
-	if (nr_running)
-		return rq->load.weight / nr_running;
-
-	return 0;
-}
-
-#ifdef CONFIG_PREEMPT
-
-static void double_rq_lock(struct rq *rq1, struct rq *rq2);
-
-/*
- * fair double_lock_balance: Safely acquires both rq->locks in a fair
- * way at the expense of forcing extra atomic operations in all
- * invocations.  This assures that the double_lock is acquired using the
- * same underlying policy as the spinlock_t on this architecture, which
- * reduces latency compared to the unfair variant below.  However, it
- * also adds more overhead and therefore may reduce throughput.
- */
-static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
-	__releases(this_rq->lock)
-	__acquires(busiest->lock)
-	__acquires(this_rq->lock)
-{
-	raw_spin_unlock(&this_rq->lock);
-	double_rq_lock(this_rq, busiest);
-
-	return 1;
-}
-
-#else
-/*
- * Unfair double_lock_balance: Optimizes throughput at the expense of
- * latency by eliminating extra atomic operations when the locks are
- * already in proper order on entry.  This favors lower cpu-ids and will
- * grant the double lock to lower cpus over higher ids under contention,
- * regardless of entry order into the function.
- */
-static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
-	__releases(this_rq->lock)
-	__acquires(busiest->lock)
-	__acquires(this_rq->lock)
-{
-	int ret = 0;
-
-	if (unlikely(!raw_spin_trylock(&busiest->lock))) {
-		if (busiest < this_rq) {
-			raw_spin_unlock(&this_rq->lock);
-			raw_spin_lock(&busiest->lock);
-			raw_spin_lock_nested(&this_rq->lock,
-					      SINGLE_DEPTH_NESTING);
-			ret = 1;
-		} else
-			raw_spin_lock_nested(&busiest->lock,
-					      SINGLE_DEPTH_NESTING);
-	}
-	return ret;
-}
-
-#endif /* CONFIG_PREEMPT */
-
-/*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- */
-static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
-{
-	if (unlikely(!irqs_disabled())) {
-		/* printk() doesn't work good under rq->lock */
-		raw_spin_unlock(&this_rq->lock);
-		BUG_ON(1);
-	}
-
-	return _double_lock_balance(this_rq, busiest);
-}
-
-static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
-	__releases(busiest->lock)
-{
-	raw_spin_unlock(&busiest->lock);
-	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
-}
-
-/*
- * double_rq_lock - safely lock two runqueues
- *
- * Note this does not disable interrupts like task_rq_lock,
- * you need to do so manually before calling.
- */
-static void double_rq_lock(struct rq *rq1, struct rq *rq2)
-	__acquires(rq1->lock)
-	__acquires(rq2->lock)
-{
-	BUG_ON(!irqs_disabled());
-	if (rq1 == rq2) {
-		raw_spin_lock(&rq1->lock);
-		__acquire(rq2->lock);	/* Fake it out ;) */
-	} else {
-		if (rq1 < rq2) {
-			raw_spin_lock(&rq1->lock);
-			raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
-		} else {
-			raw_spin_lock(&rq2->lock);
-			raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
-		}
-	}
-}
-
-/*
- * double_rq_unlock - safely unlock two runqueues
- *
- * Note this does not restore interrupts like task_rq_unlock,
- * you need to do so manually after calling.
- */
-static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
-	__releases(rq1->lock)
-	__releases(rq2->lock)
-{
-	raw_spin_unlock(&rq1->lock);
-	if (rq1 != rq2)
-		raw_spin_unlock(&rq2->lock);
-	else
-		__release(rq2->lock);
-}
-
-#else /* CONFIG_SMP */
-
-/*
- * double_rq_lock - safely lock two runqueues
- *
- * Note this does not disable interrupts like task_rq_lock,
- * you need to do so manually before calling.
- */
-static void double_rq_lock(struct rq *rq1, struct rq *rq2)
-	__acquires(rq1->lock)
-	__acquires(rq2->lock)
-{
-	BUG_ON(!irqs_disabled());
-	BUG_ON(rq1 != rq2);
-	raw_spin_lock(&rq1->lock);
-	__acquire(rq2->lock);	/* Fake it out ;) */
-}
-
-/*
- * double_rq_unlock - safely unlock two runqueues
- *
- * Note this does not restore interrupts like task_rq_unlock,
- * you need to do so manually after calling.
- */
-static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
-	__releases(rq1->lock)
-	__releases(rq2->lock)
-{
-	BUG_ON(rq1 != rq2);
-	raw_spin_unlock(&rq1->lock);
-	__release(rq2->lock);
-}
-
-#endif
-
-static void calc_load_account_idle(struct rq *this_rq);
-static void update_sysctl(void);
-static int get_update_sysctl_factor(void);
-static void update_cpu_load(struct rq *this_rq);
-
-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-{
-	set_task_rq(p, cpu);
-#ifdef CONFIG_SMP
-	/*
-	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
-	 * successfully executed on another CPU. We must ensure that updates of
-	 * per-task data have been completed by this moment.
-	 */
-	smp_wmb();
-	task_thread_info(p)->cpu = cpu;
-#endif
-}
-
-static const struct sched_class rt_sched_class;
-
-#define sched_class_highest (&stop_sched_class)
-#define for_each_class(class) \
-   for (class = sched_class_highest; class; class = class->next)
-
-#include "sched_stats.h"
+		continue;
+	}
+	ret = (*up)(parent, data);
+	if (ret || parent == from)
+		goto out;
 
-static void inc_nr_running(struct rq *rq)
-{
-	rq->nr_running++;
+	child = parent;
+	parent = parent->parent;
+	if (parent)
+		goto up;
+out:
+	return ret;
 }
 
-static void dec_nr_running(struct rq *rq)
+int tg_nop(struct task_group *tg, void *data)
 {
-	rq->nr_running--;
+	return 0;
 }
+#endif
+
+void update_cpu_load(struct rq *this_rq);
 
 static void set_load_weight(struct task_struct *p)
 {
@@ -1987,7 +693,7 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 /*
  * activate_task - move a task to the runqueue.
  */
-static void activate_task(struct rq *rq, struct task_struct *p, int flags)
+void activate_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible--;
@@ -1998,7 +704,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int flags)
 /*
  * deactivate_task - remove a task from the runqueue.
  */
-static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
+void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible++;
@@ -2223,15 +929,6 @@ static int irqtime_account_si_update(void)
 
 #endif
 
-#include "sched_idletask.c"
-#include "sched_fair.c"
-#include "sched_rt.c"
-#include "sched_autogroup.c"
-#include "sched_stoptask.c"
-#ifdef CONFIG_SCHED_DEBUG
-# include "sched_debug.c"
-#endif
-
 void sched_set_stop_task(int cpu, struct task_struct *stop)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
@@ -2329,7 +1026,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
 		p->sched_class->prio_changed(rq, p, oldprio);
 }
 
-static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
+void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
 	const struct sched_class *class;
 
@@ -2355,38 +1052,6 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 }
 
 #ifdef CONFIG_SMP
-/*
- * Is this task likely cache-hot:
- */
-static int
-task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
-{
-	s64 delta;
-
-	if (p->sched_class != &fair_sched_class)
-		return 0;
-
-	if (unlikely(p->policy == SCHED_IDLE))
-		return 0;
-
-	/*
-	 * Buddy candidates are cache hot:
-	 */
-	if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
-			(&p->se == cfs_rq_of(&p->se)->next ||
-			 &p->se == cfs_rq_of(&p->se)->last))
-		return 1;
-
-	if (sysctl_sched_migration_cost == -1)
-		return 1;
-	if (sysctl_sched_migration_cost == 0)
-		return 0;
-
-	delta = now - p->se.exec_start;
-
-	return delta < (s64)sysctl_sched_migration_cost;
-}
-
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 #ifdef CONFIG_SCHED_DEBUG
@@ -3469,7 +2134,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  */
 static atomic_long_t calc_load_tasks_idle;
 
-static void calc_load_account_idle(struct rq *this_rq)
+void calc_load_account_idle(struct rq *this_rq)
 {
 	long delta;
 
@@ -3613,7 +2278,7 @@ static void calc_global_nohz(unsigned long ticks)
 	 */
 }
 #else
-static void calc_load_account_idle(struct rq *this_rq)
+void calc_load_account_idle(struct rq *this_rq)
 {
 }
 
@@ -3756,7 +2421,7 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
  * scheduler tick (TICK_NSEC). With tickless idle this will not be called
  * every tick. We fix it up based on jiffies.
  */
-static void update_cpu_load(struct rq *this_rq)
+void update_cpu_load(struct rq *this_rq)
 {
 	unsigned long this_load = this_rq->load.weight;
 	unsigned long curr_jiffies = jiffies;
@@ -6148,53 +4813,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 #endif
 }
 
-/*
- * Increase the granularity value when there are more CPUs,
- * because with more CPUs the 'effective latency' as visible
- * to users decreases. But the relationship is not linear,
- * so pick a second-best guess by going with the log2 of the
- * number of CPUs.
- *
- * This idea comes from the SD scheduler of Con Kolivas:
- */
-static int get_update_sysctl_factor(void)
-{
-	unsigned int cpus = min_t(int, num_online_cpus(), 8);
-	unsigned int factor;
-
-	switch (sysctl_sched_tunable_scaling) {
-	case SCHED_TUNABLESCALING_NONE:
-		factor = 1;
-		break;
-	case SCHED_TUNABLESCALING_LINEAR:
-		factor = cpus;
-		break;
-	case SCHED_TUNABLESCALING_LOG:
-	default:
-		factor = 1 + ilog2(cpus);
-		break;
-	}
-
-	return factor;
-}
-
-static void update_sysctl(void)
-{
-	unsigned int factor = get_update_sysctl_factor();
-
-#define SET_SYSCTL(name) \
-	(sysctl_##name = (factor) * normalized_sysctl_##name)
-	SET_SYSCTL(sched_min_granularity);
-	SET_SYSCTL(sched_latency);
-	SET_SYSCTL(sched_wakeup_granularity);
-#undef SET_SYSCTL
-}
-
-static inline void sched_init_granularity(void)
-{
-	update_sysctl();
-}
-
 #ifdef CONFIG_SMP
 void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
@@ -6381,30 +4999,6 @@ static void calc_global_load_remove(struct rq *rq)
 	rq->calc_load_active = 0;
 }
 
-#ifdef CONFIG_CFS_BANDWIDTH
-static void unthrottle_offline_cfs_rqs(struct rq *rq)
-{
-	struct cfs_rq *cfs_rq;
-
-	for_each_leaf_cfs_rq(rq, cfs_rq) {
-		struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
-
-		if (!cfs_rq->runtime_enabled)
-			continue;
-
-		/*
-		 * clock_task is not advancing so we just need to make sure
-		 * there's some valid quota amount
-		 */
-		cfs_rq->runtime_remaining = cfs_b->quota;
-		if (cfs_rq_throttled(cfs_rq))
-			unthrottle_cfs_rq(cfs_rq);
-	}
-}
-#else
-static void unthrottle_offline_cfs_rqs(struct rq *rq) {}
-#endif
-
 /*
  * Migrate all tasks from the rq, sleeping tasks will be migrated by
  * try_to_wake_up()->select_task_rq().
@@ -7010,6 +5604,12 @@ out:
 	return -ENOMEM;
 }
 
+/*
+ * By default the system creates a single root-domain with all cpus as
+ * members (mimicking the global state we have today).
+ */
+struct root_domain def_root_domain;
+
 static void init_defrootdomain(void)
 {
 	init_rootdomain(&def_root_domain);
@@ -7418,6 +6018,11 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 	update_group_power(sd, cpu);
 }
 
+int __weak arch_sd_sibling_asym_packing(void)
+{
+       return 0*SD_ASYM_PACKING;
+}
+
 /*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
@@ -8053,29 +6658,6 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
 	}
 }
 
-static int update_runtime(struct notifier_block *nfb,
-				unsigned long action, void *hcpu)
-{
-	int cpu = (int)(long)hcpu;
-
-	switch (action) {
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		disable_runtime(cpu_rq(cpu));
-		return NOTIFY_OK;
-
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		enable_runtime(cpu_rq(cpu));
-		return NOTIFY_OK;
-
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
 void __init sched_init_smp(void)
 {
 	cpumask_var_t non_isolated_cpus;
@@ -8124,104 +6706,11 @@ int in_sched_functions(unsigned long addr)
 		&& addr < (unsigned long)__sched_text_end);
 }
 
-static void init_cfs_rq(struct cfs_rq *cfs_rq)
-{
-	cfs_rq->tasks_timeline = RB_ROOT;
-	INIT_LIST_HEAD(&cfs_rq->tasks);
-	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
-#ifndef CONFIG_64BIT
-	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
-#endif
-}
-
-static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
-{
-	struct rt_prio_array *array;
-	int i;
-
-	array = &rt_rq->active;
-	for (i = 0; i < MAX_RT_PRIO; i++) {
-		INIT_LIST_HEAD(array->queue + i);
-		__clear_bit(i, array->bitmap);
-	}
-	/* delimiter for bitsearch: */
-	__set_bit(MAX_RT_PRIO, array->bitmap);
-
-#if defined CONFIG_SMP
-	rt_rq->highest_prio.curr = MAX_RT_PRIO;
-	rt_rq->highest_prio.next = MAX_RT_PRIO;
-	rt_rq->rt_nr_migratory = 0;
-	rt_rq->overloaded = 0;
-	plist_head_init(&rt_rq->pushable_tasks);
-#endif
-
-	rt_rq->rt_time = 0;
-	rt_rq->rt_throttled = 0;
-	rt_rq->rt_runtime = 0;
-	raw_spin_lock_init(&rt_rq->rt_runtime_lock);
-}
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
-				struct sched_entity *se, int cpu,
-				struct sched_entity *parent)
-{
-	struct rq *rq = cpu_rq(cpu);
-
-	cfs_rq->tg = tg;
-	cfs_rq->rq = rq;
-#ifdef CONFIG_SMP
-	/* allow initial update_cfs_load() to truncate */
-	cfs_rq->load_stamp = 1;
-#endif
-	init_cfs_rq_runtime(cfs_rq);
-
-	tg->cfs_rq[cpu] = cfs_rq;
-	tg->se[cpu] = se;
-
-	/* se could be NULL for root_task_group */
-	if (!se)
-		return;
-
-	if (!parent)
-		se->cfs_rq = &rq->cfs;
-	else
-		se->cfs_rq = parent->my_q;
-
-	se->my_q = cfs_rq;
-	update_load_set(&se->load, 0);
-	se->parent = parent;
-}
+#ifdef CONFIG_CGROUP_SCHED
+struct task_group root_task_group;
 #endif
 
-#ifdef CONFIG_RT_GROUP_SCHED
-static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
-		struct sched_rt_entity *rt_se, int cpu,
-		struct sched_rt_entity *parent)
-{
-	struct rq *rq = cpu_rq(cpu);
-
-	rt_rq->highest_prio.curr = MAX_RT_PRIO;
-	rt_rq->rt_nr_boosted = 0;
-	rt_rq->rq = rq;
-	rt_rq->tg = tg;
-
-	tg->rt_rq[cpu] = rt_rq;
-	tg->rt_se[cpu] = rt_se;
-
-	if (!rt_se)
-		return;
-
-	if (!parent)
-		rt_se->rt_rq = &rq->rt;
-	else
-		rt_se->rt_rq = parent->my_q;
-
-	rt_se->my_q = rt_rq;
-	rt_se->parent = parent;
-	INIT_LIST_HEAD(&rt_se->run_list);
-}
-#endif
+DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
 
 void __init sched_init(void)
 {
@@ -8294,7 +6783,7 @@ void __init sched_init(void)
 		init_cfs_rq(&rq->cfs);
 		init_rt_rq(&rq->rt, rq);
 #ifdef CONFIG_FAIR_GROUP_SCHED
-		root_task_group.shares = root_task_group_load;
+		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
 		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
 		/*
 		 * How much cpu bandwidth does root_task_group get?
@@ -8357,10 +6846,6 @@ void __init sched_init(void)
 	INIT_HLIST_HEAD(&init_task.preempt_notifiers);
 #endif
 
-#ifdef CONFIG_SMP
-	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
-#endif
-
 #ifdef CONFIG_RT_MUTEXES
 	plist_head_init(&init_task.pi_waiters);
 #endif
@@ -8388,17 +6873,11 @@ void __init sched_init(void)
 
 #ifdef CONFIG_SMP
 	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
-#ifdef CONFIG_NO_HZ
-	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
-	alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
-	atomic_set(&nohz.load_balancer, nr_cpu_ids);
-	atomic_set(&nohz.first_pick_cpu, nr_cpu_ids);
-	atomic_set(&nohz.second_pick_cpu, nr_cpu_ids);
-#endif
 	/* May be allocated at isolcpus cmdline parse time */
 	if (cpu_isolated_map == NULL)
 		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
-#endif /* SMP */
+#endif
+	init_sched_fair_class();
 
 	scheduler_running = 1;
 }
@@ -8550,169 +7029,14 @@ void set_curr_task(int cpu, struct task_struct *p)
 
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-static void free_fair_sched_group(struct task_group *tg)
-{
-	int i;
-
-	destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
-
-	for_each_possible_cpu(i) {
-		if (tg->cfs_rq)
-			kfree(tg->cfs_rq[i]);
-		if (tg->se)
-			kfree(tg->se[i]);
-	}
-
-	kfree(tg->cfs_rq);
-	kfree(tg->se);
-}
-
-static
-int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
-{
-	struct cfs_rq *cfs_rq;
-	struct sched_entity *se;
-	int i;
-
-	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
-	if (!tg->cfs_rq)
-		goto err;
-	tg->se = kzalloc(sizeof(se) * nr_cpu_ids, GFP_KERNEL);
-	if (!tg->se)
-		goto err;
-
-	tg->shares = NICE_0_LOAD;
-
-	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
-
-	for_each_possible_cpu(i) {
-		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
-				      GFP_KERNEL, cpu_to_node(i));
-		if (!cfs_rq)
-			goto err;
-
-		se = kzalloc_node(sizeof(struct sched_entity),
-				  GFP_KERNEL, cpu_to_node(i));
-		if (!se)
-			goto err_free_rq;
-
-		init_cfs_rq(cfs_rq);
-		init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
-	}
-
-	return 1;
-
-err_free_rq:
-	kfree(cfs_rq);
-err:
-	return 0;
-}
-
-static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
-{
-	struct rq *rq = cpu_rq(cpu);
-	unsigned long flags;
-
-	/*
-	* Only empty task groups can be destroyed; so we can speculatively
-	* check on_list without danger of it being re-added.
-	*/
-	if (!tg->cfs_rq[cpu]->on_list)
-		return;
-
-	raw_spin_lock_irqsave(&rq->lock, flags);
-	list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-#else /* !CONFIG_FAIR_GROUP_SCHED */
-static inline void free_fair_sched_group(struct task_group *tg)
-{
-}
-
-static inline
-int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
-{
-	return 1;
-}
-
-static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
-{
-}
-#endif /* CONFIG_FAIR_GROUP_SCHED */
-
 #ifdef CONFIG_RT_GROUP_SCHED
-static void free_rt_sched_group(struct task_group *tg)
-{
-	int i;
-
-	if (tg->rt_se)
-		destroy_rt_bandwidth(&tg->rt_bandwidth);
-
-	for_each_possible_cpu(i) {
-		if (tg->rt_rq)
-			kfree(tg->rt_rq[i]);
-		if (tg->rt_se)
-			kfree(tg->rt_se[i]);
-	}
-
-	kfree(tg->rt_rq);
-	kfree(tg->rt_se);
-}
-
-static
-int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
-{
-	struct rt_rq *rt_rq;
-	struct sched_rt_entity *rt_se;
-	int i;
-
-	tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
-	if (!tg->rt_rq)
-		goto err;
-	tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
-	if (!tg->rt_se)
-		goto err;
-
-	init_rt_bandwidth(&tg->rt_bandwidth,
-			ktime_to_ns(def_rt_bandwidth.rt_period), 0);
-
-	for_each_possible_cpu(i) {
-		rt_rq = kzalloc_node(sizeof(struct rt_rq),
-				     GFP_KERNEL, cpu_to_node(i));
-		if (!rt_rq)
-			goto err;
-
-		rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
-				     GFP_KERNEL, cpu_to_node(i));
-		if (!rt_se)
-			goto err_free_rq;
-
-		init_rt_rq(rt_rq, cpu_rq(i));
-		rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
-		init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
-	}
-
-	return 1;
-
-err_free_rq:
-	kfree(rt_rq);
-err:
-	return 0;
-}
 #else /* !CONFIG_RT_GROUP_SCHED */
-static inline void free_rt_sched_group(struct task_group *tg)
-{
-}
-
-static inline
-int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
-{
-	return 1;
-}
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 #ifdef CONFIG_CGROUP_SCHED
+/* task_group_lock serializes the addition/removal of task groups */
+static DEFINE_SPINLOCK(task_group_lock);
+
 static void free_sched_group(struct task_group *tg)
 {
 	free_fair_sched_group(tg);
@@ -8818,47 +7142,6 @@ void sched_move_task(struct task_struct *tsk)
 #endif /* CONFIG_CGROUP_SCHED */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static DEFINE_MUTEX(shares_mutex);
-
-int sched_group_set_shares(struct task_group *tg, unsigned long shares)
-{
-	int i;
-	unsigned long flags;
-
-	/*
-	 * We can't change the weight of the root cgroup.
-	 */
-	if (!tg->se[0])
-		return -EINVAL;
-
-	shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
-
-	mutex_lock(&shares_mutex);
-	if (tg->shares == shares)
-		goto done;
-
-	tg->shares = shares;
-	for_each_possible_cpu(i) {
-		struct rq *rq = cpu_rq(i);
-		struct sched_entity *se;
-
-		se = tg->se[i];
-		/* Propagate contribution to hierarchy */
-		raw_spin_lock_irqsave(&rq->lock, flags);
-		for_each_sched_entity(se)
-			update_cfs_shares(group_cfs_rq(se));
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
-	}
-
-done:
-	mutex_unlock(&shares_mutex);
-	return 0;
-}
-
-unsigned long sched_group_shares(struct task_group *tg)
-{
-	return tg->shares;
-}
 #endif
 
 #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH)
@@ -8883,7 +7166,7 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
 	struct task_struct *g, *p;
 
 	do_each_thread(g, p) {
-		if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
+		if (rt_task(p) && task_rq(p)->rt.tg == tg)
 			return 1;
 	} while_each_thread(g, p);
 
@@ -9235,7 +7518,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
 static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 {
 	int i, ret = 0, runtime_enabled, runtime_was_enabled;
-	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
+	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
 
 	if (tg == &root_task_group)
 		return -EINVAL;
@@ -9264,7 +7547,6 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 	runtime_enabled = quota != RUNTIME_INF;
 	runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
 	account_cfs_bandwidth_used(runtime_enabled, runtime_was_enabled);
-
 	raw_spin_lock_irq(&cfs_b->lock);
 	cfs_b->period = ns_to_ktime(period);
 	cfs_b->quota = quota;
@@ -9280,13 +7562,13 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 
 	for_each_possible_cpu(i) {
 		struct cfs_rq *cfs_rq = tg->cfs_rq[i];
-		struct rq *rq = rq_of(cfs_rq);
+		struct rq *rq = cfs_rq->rq;
 
 		raw_spin_lock_irq(&rq->lock);
 		cfs_rq->runtime_enabled = runtime_enabled;
 		cfs_rq->runtime_remaining = 0;
 
-		if (cfs_rq_throttled(cfs_rq))
+		if (cfs_rq->throttled)
 			unthrottle_cfs_rq(cfs_rq);
 		raw_spin_unlock_irq(&rq->lock);
 	}
@@ -9300,7 +7582,7 @@ int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
 {
 	u64 quota, period;
 
-	period = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
+	period = ktime_to_ns(tg->cfs_bandwidth.period);
 	if (cfs_quota_us < 0)
 		quota = RUNTIME_INF;
 	else
@@ -9313,10 +7595,10 @@ long tg_get_cfs_quota(struct task_group *tg)
 {
 	u64 quota_us;
 
-	if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF)
+	if (tg->cfs_bandwidth.quota == RUNTIME_INF)
 		return -1;
 
-	quota_us = tg_cfs_bandwidth(tg)->quota;
+	quota_us = tg->cfs_bandwidth.quota;
 	do_div(quota_us, NSEC_PER_USEC);
 
 	return quota_us;
@@ -9327,7 +7609,7 @@ int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
 	u64 quota, period;
 
 	period = (u64)cfs_period_us * NSEC_PER_USEC;
-	quota = tg_cfs_bandwidth(tg)->quota;
+	quota = tg->cfs_bandwidth.quota;
 
 	if (period <= 0)
 		return -EINVAL;
@@ -9339,7 +7621,7 @@ long tg_get_cfs_period(struct task_group *tg)
 {
 	u64 cfs_period_us;
 
-	cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
+	cfs_period_us = ktime_to_ns(tg->cfs_bandwidth.period);
 	do_div(cfs_period_us, NSEC_PER_USEC);
 
 	return cfs_period_us;
@@ -9399,13 +7681,13 @@ static u64 normalize_cfs_quota(struct task_group *tg,
 static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
 {
 	struct cfs_schedulable_data *d = data;
-	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
+	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
 	s64 quota = 0, parent_quota = -1;
 
 	if (!tg->parent) {
 		quota = RUNTIME_INF;
 	} else {
-		struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent);
+		struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth;
 
 		quota = normalize_cfs_quota(tg, d);
 		parent_quota = parent_b->hierarchal_quota;
@@ -9449,7 +7731,7 @@ static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
 		struct cgroup_map_cb *cb)
 {
 	struct task_group *tg = cgroup_tg(cgrp);
-	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
+	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
 
 	cb->fill(cb, "nr_periods", cfs_b->nr_periods);
 	cb->fill(cb, "nr_throttled", cfs_b->nr_throttled);
@@ -9748,7 +8030,7 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
  *
  * called with rq->lock held.
  */
-static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
+void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
 	int cpu;
@@ -9790,7 +8072,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 /*
  * Charge the system/user time to the task's accounting group.
  */
-static void cpuacct_update_stats(struct task_struct *tsk,
+void cpuacct_update_stats(struct task_struct *tsk,
 		enum cpuacct_stat_index idx, cputime_t val)
 {
 	struct cpuacct *ca;
diff --git a/kernel/sched.h b/kernel/sched.h
new file mode 100644
index 000000000000..675261ce3c4a
--- /dev/null
+++ b/kernel/sched.h
@@ -0,0 +1,1064 @@
+
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
+
+#include "sched_cpupri.h"
+
+extern __read_mostly int scheduler_running;
+
+/*
+ * Convert user-nice values [ -20 ... 0 ... 19 ]
+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
+ * and back.
+ */
+#define NICE_TO_PRIO(nice)	(MAX_RT_PRIO + (nice) + 20)
+#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
+#define TASK_NICE(p)		PRIO_TO_NICE((p)->static_prio)
+
+/*
+ * 'User priority' is the nice value converted to something we
+ * can work with better when scaling various scheduler parameters,
+ * it's a [ 0 ... 39 ] range.
+ */
+#define USER_PRIO(p)		((p)-MAX_RT_PRIO)
+#define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
+#define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
+
+/*
+ * Helpers for converting nanosecond timing to jiffy resolution
+ */
+#define NS_TO_JIFFIES(TIME)	((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
+
+#define NICE_0_LOAD		SCHED_LOAD_SCALE
+#define NICE_0_SHIFT		SCHED_LOAD_SHIFT
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define DEF_TIMESLICE		(100 * HZ / 1000)
+
+/*
+ * single value that denotes runtime == period, ie unlimited time.
+ */
+#define RUNTIME_INF	((u64)~0ULL)
+
+static inline int rt_policy(int policy)
+{
+	if (policy == SCHED_FIFO || policy == SCHED_RR)
+		return 1;
+	return 0;
+}
+
+static inline int task_has_rt_policy(struct task_struct *p)
+{
+	return rt_policy(p->policy);
+}
+
+/*
+ * This is the priority-queue data structure of the RT scheduling class:
+ */
+struct rt_prio_array {
+	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
+	struct list_head queue[MAX_RT_PRIO];
+};
+
+struct rt_bandwidth {
+	/* nests inside the rq lock: */
+	raw_spinlock_t		rt_runtime_lock;
+	ktime_t			rt_period;
+	u64			rt_runtime;
+	struct hrtimer		rt_period_timer;
+};
+
+extern struct mutex sched_domains_mutex;
+
+#ifdef CONFIG_CGROUP_SCHED
+
+#include <linux/cgroup.h>
+
+struct cfs_rq;
+struct rt_rq;
+
+static LIST_HEAD(task_groups);
+
+struct cfs_bandwidth {
+#ifdef CONFIG_CFS_BANDWIDTH
+	raw_spinlock_t lock;
+	ktime_t period;
+	u64 quota, runtime;
+	s64 hierarchal_quota;
+	u64 runtime_expires;
+
+	int idle, timer_active;
+	struct hrtimer period_timer, slack_timer;
+	struct list_head throttled_cfs_rq;
+
+	/* statistics */
+	int nr_periods, nr_throttled;
+	u64 throttled_time;
+#endif
+};
+
+/* task group related information */
+struct task_group {
+	struct cgroup_subsys_state css;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	/* schedulable entities of this group on each cpu */
+	struct sched_entity **se;
+	/* runqueue "owned" by this group on each cpu */
+	struct cfs_rq **cfs_rq;
+	unsigned long shares;
+
+	atomic_t load_weight;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	struct sched_rt_entity **rt_se;
+	struct rt_rq **rt_rq;
+
+	struct rt_bandwidth rt_bandwidth;
+#endif
+
+	struct rcu_head rcu;
+	struct list_head list;
+
+	struct task_group *parent;
+	struct list_head siblings;
+	struct list_head children;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+	struct autogroup *autogroup;
+#endif
+
+	struct cfs_bandwidth cfs_bandwidth;
+};
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+#define ROOT_TASK_GROUP_LOAD	NICE_0_LOAD
+
+/*
+ * A weight of 0 or 1 can cause arithmetics problems.
+ * A weight of a cfs_rq is the sum of weights of which entities
+ * are queued on this cfs_rq, so a weight of a entity should not be
+ * too large, so as the shares value of a task group.
+ * (The default weight is 1024 - so there's no practical
+ *  limitation from this.)
+ */
+#define MIN_SHARES	(1UL <<  1)
+#define MAX_SHARES	(1UL << 18)
+#endif
+
+/* Default task group.
+ *	Every task in system belong to this group at bootup.
+ */
+extern struct task_group root_task_group;
+
+typedef int (*tg_visitor)(struct task_group *, void *);
+
+extern int walk_tg_tree_from(struct task_group *from,
+			     tg_visitor down, tg_visitor up, void *data);
+
+/*
+ * Iterate the full tree, calling @down when first entering a node and @up when
+ * leaving it for the final time.
+ *
+ * Caller must hold rcu_lock or sufficient equivalent.
+ */
+static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
+{
+	return walk_tg_tree_from(&root_task_group, down, up, data);
+}
+
+extern int tg_nop(struct task_group *tg, void *data);
+
+extern void free_fair_sched_group(struct task_group *tg);
+extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
+extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
+extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
+			struct sched_entity *se, int cpu,
+			struct sched_entity *parent);
+extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
+extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
+
+extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
+extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
+extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
+
+extern void free_rt_sched_group(struct task_group *tg);
+extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
+extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
+		struct sched_rt_entity *rt_se, int cpu,
+		struct sched_rt_entity *parent);
+
+#else /* CONFIG_CGROUP_SCHED */
+
+struct cfs_bandwidth { };
+
+#endif	/* CONFIG_CGROUP_SCHED */
+
+/* CFS-related fields in a runqueue */
+struct cfs_rq {
+	struct load_weight load;
+	unsigned long nr_running, h_nr_running;
+
+	u64 exec_clock;
+	u64 min_vruntime;
+#ifndef CONFIG_64BIT
+	u64 min_vruntime_copy;
+#endif
+
+	struct rb_root tasks_timeline;
+	struct rb_node *rb_leftmost;
+
+	struct list_head tasks;
+	struct list_head *balance_iterator;
+
+	/*
+	 * 'curr' points to currently running entity on this cfs_rq.
+	 * It is set to NULL otherwise (i.e when none are currently running).
+	 */
+	struct sched_entity *curr, *next, *last, *skip;
+
+#ifdef	CONFIG_SCHED_DEBUG
+	unsigned int nr_spread_over;
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
+
+	/*
+	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
+	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
+	 * (like users, containers etc.)
+	 *
+	 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
+	 * list is used during load balance.
+	 */
+	int on_list;
+	struct list_head leaf_cfs_rq_list;
+	struct task_group *tg;	/* group that "owns" this runqueue */
+
+#ifdef CONFIG_SMP
+	/*
+	 * the part of load.weight contributed by tasks
+	 */
+	unsigned long task_weight;
+
+	/*
+	 *   h_load = weight * f(tg)
+	 *
+	 * Where f(tg) is the recursive weight fraction assigned to
+	 * this group.
+	 */
+	unsigned long h_load;
+
+	/*
+	 * Maintaining per-cpu shares distribution for group scheduling
+	 *
+	 * load_stamp is the last time we updated the load average
+	 * load_last is the last time we updated the load average and saw load
+	 * load_unacc_exec_time is currently unaccounted execution time
+	 */
+	u64 load_avg;
+	u64 load_period;
+	u64 load_stamp, load_last, load_unacc_exec_time;
+
+	unsigned long load_contribution;
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_CFS_BANDWIDTH
+	int runtime_enabled;
+	u64 runtime_expires;
+	s64 runtime_remaining;
+
+	u64 throttled_timestamp;
+	int throttled, throttle_count;
+	struct list_head throttled_list;
+#endif /* CONFIG_CFS_BANDWIDTH */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+};
+
+static inline int rt_bandwidth_enabled(void)
+{
+	return sysctl_sched_rt_runtime >= 0;
+}
+
+/* Real-Time classes' related field in a runqueue: */
+struct rt_rq {
+	struct rt_prio_array active;
+	unsigned long rt_nr_running;
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
+	struct {
+		int curr; /* highest queued rt task prio */
+#ifdef CONFIG_SMP
+		int next; /* next highest */
+#endif
+	} highest_prio;
+#endif
+#ifdef CONFIG_SMP
+	unsigned long rt_nr_migratory;
+	unsigned long rt_nr_total;
+	int overloaded;
+	struct plist_head pushable_tasks;
+#endif
+	int rt_throttled;
+	u64 rt_time;
+	u64 rt_runtime;
+	/* Nests inside the rq lock: */
+	raw_spinlock_t rt_runtime_lock;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	unsigned long rt_nr_boosted;
+
+	struct rq *rq;
+	struct list_head leaf_rt_rq_list;
+	struct task_group *tg;
+#endif
+};
+
+#ifdef CONFIG_SMP
+
+/*
+ * We add the notion of a root-domain which will be used to define per-domain
+ * variables. Each exclusive cpuset essentially defines an island domain by
+ * fully partitioning the member cpus from any other cpuset. Whenever a new
+ * exclusive cpuset is created, we also create and attach a new root-domain
+ * object.
+ *
+ */
+struct root_domain {
+	atomic_t refcount;
+	atomic_t rto_count;
+	struct rcu_head rcu;
+	cpumask_var_t span;
+	cpumask_var_t online;
+
+	/*
+	 * The "RT overload" flag: it gets set if a CPU has more than
+	 * one runnable RT task.
+	 */
+	cpumask_var_t rto_mask;
+	struct cpupri cpupri;
+};
+
+extern struct root_domain def_root_domain;
+
+#endif /* CONFIG_SMP */
+
+/*
+ * This is the main, per-CPU runqueue data structure.
+ *
+ * Locking rule: those places that want to lock multiple runqueues
+ * (such as the load balancing or the thread migration code), lock
+ * acquire operations must be ordered by ascending &runqueue.
+ */
+struct rq {
+	/* runqueue lock: */
+	raw_spinlock_t lock;
+
+	/*
+	 * nr_running and cpu_load should be in the same cacheline because
+	 * remote CPUs use both these fields when doing load calculation.
+	 */
+	unsigned long nr_running;
+	#define CPU_LOAD_IDX_MAX 5
+	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+	unsigned long last_load_update_tick;
+#ifdef CONFIG_NO_HZ
+	u64 nohz_stamp;
+	unsigned char nohz_balance_kick;
+#endif
+	int skip_clock_update;
+
+	/* capture load from *all* tasks on this cpu: */
+	struct load_weight load;
+	unsigned long nr_load_updates;
+	u64 nr_switches;
+
+	struct cfs_rq cfs;
+	struct rt_rq rt;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	/* list of leaf cfs_rq on this cpu: */
+	struct list_head leaf_cfs_rq_list;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+	struct list_head leaf_rt_rq_list;
+#endif
+
+	/*
+	 * This is part of a global counter where only the total sum
+	 * over all CPUs matters. A task can increase this counter on
+	 * one CPU and if it got migrated afterwards it may decrease
+	 * it on another CPU. Always updated under the runqueue lock:
+	 */
+	unsigned long nr_uninterruptible;
+
+	struct task_struct *curr, *idle, *stop;
+	unsigned long next_balance;
+	struct mm_struct *prev_mm;
+
+	u64 clock;
+	u64 clock_task;
+
+	atomic_t nr_iowait;
+
+#ifdef CONFIG_SMP
+	struct root_domain *rd;
+	struct sched_domain *sd;
+
+	unsigned long cpu_power;
+
+	unsigned char idle_balance;
+	/* For active balancing */
+	int post_schedule;
+	int active_balance;
+	int push_cpu;
+	struct cpu_stop_work active_balance_work;
+	/* cpu of this runqueue: */
+	int cpu;
+	int online;
+
+	u64 rt_avg;
+	u64 age_stamp;
+	u64 idle_stamp;
+	u64 avg_idle;
+#endif
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+	u64 prev_irq_time;
+#endif
+#ifdef CONFIG_PARAVIRT
+	u64 prev_steal_time;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+	u64 prev_steal_time_rq;
+#endif
+
+	/* calc_load related fields */
+	unsigned long calc_load_update;
+	long calc_load_active;
+
+#ifdef CONFIG_SCHED_HRTICK
+#ifdef CONFIG_SMP
+	int hrtick_csd_pending;
+	struct call_single_data hrtick_csd;
+#endif
+	struct hrtimer hrtick_timer;
+#endif
+
+#ifdef CONFIG_SCHEDSTATS
+	/* latency stats */
+	struct sched_info rq_sched_info;
+	unsigned long long rq_cpu_time;
+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
+
+	/* sys_sched_yield() stats */
+	unsigned int yld_count;
+
+	/* schedule() stats */
+	unsigned int sched_switch;
+	unsigned int sched_count;
+	unsigned int sched_goidle;
+
+	/* try_to_wake_up() stats */
+	unsigned int ttwu_count;
+	unsigned int ttwu_local;
+#endif
+
+#ifdef CONFIG_SMP
+	struct llist_head wake_list;
+#endif
+};
+
+static inline int cpu_of(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+	return rq->cpu;
+#else
+	return 0;
+#endif
+}
+
+DECLARE_PER_CPU(struct rq, runqueues);
+
+#define rcu_dereference_check_sched_domain(p) \
+	rcu_dereference_check((p), \
+			      lockdep_is_held(&sched_domains_mutex))
+
+/*
+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+ * See detach_destroy_domains: synchronize_sched for details.
+ *
+ * The domain tree of any CPU may only be accessed from within
+ * preempt-disabled sections.
+ */
+#define for_each_domain(cpu, __sd) \
+	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+
+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
+#define this_rq()		(&__get_cpu_var(runqueues))
+#define task_rq(p)		cpu_rq(task_cpu(p))
+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
+#define raw_rq()		(&__raw_get_cpu_var(runqueues))
+
+#include "sched_stats.h"
+#include "sched_autogroup.h"
+
+#ifdef CONFIG_CGROUP_SCHED
+
+/*
+ * Return the group to which this tasks belongs.
+ *
+ * We use task_subsys_state_check() and extend the RCU verification with
+ * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
+ * task it moves into the cgroup. Therefore by holding either of those locks,
+ * we pin the task to the current cgroup.
+ */
+static inline struct task_group *task_group(struct task_struct *p)
+{
+	struct task_group *tg;
+	struct cgroup_subsys_state *css;
+
+	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
+			lockdep_is_held(&p->pi_lock) ||
+			lockdep_is_held(&task_rq(p)->lock));
+	tg = container_of(css, struct task_group, css);
+
+	return autogroup_task_group(p, tg);
+}
+
+/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
+{
+#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
+	struct task_group *tg = task_group(p);
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	p->se.cfs_rq = tg->cfs_rq[cpu];
+	p->se.parent = tg->se[cpu];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	p->rt.rt_rq  = tg->rt_rq[cpu];
+	p->rt.parent = tg->rt_se[cpu];
+#endif
+}
+
+#else /* CONFIG_CGROUP_SCHED */
+
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+static inline struct task_group *task_group(struct task_struct *p)
+{
+	return NULL;
+}
+
+#endif /* CONFIG_CGROUP_SCHED */
+
+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+	set_task_rq(p, cpu);
+#ifdef CONFIG_SMP
+	/*
+	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+	 * successfuly executed on another CPU. We must ensure that updates of
+	 * per-task data have been completed by this moment.
+	 */
+	smp_wmb();
+	task_thread_info(p)->cpu = cpu;
+#endif
+}
+
+/*
+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+ */
+#ifdef CONFIG_SCHED_DEBUG
+# define const_debug __read_mostly
+#else
+# define const_debug const
+#endif
+
+extern const_debug unsigned int sysctl_sched_features;
+
+#define SCHED_FEAT(name, enabled)	\
+	__SCHED_FEAT_##name ,
+
+enum {
+#include "sched_features.h"
+};
+
+#undef SCHED_FEAT
+
+#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
+
+static inline u64 global_rt_period(void)
+{
+	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
+}
+
+static inline u64 global_rt_runtime(void)
+{
+	if (sysctl_sched_rt_runtime < 0)
+		return RUNTIME_INF;
+
+	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
+}
+
+
+
+static inline int task_current(struct rq *rq, struct task_struct *p)
+{
+	return rq->curr == p;
+}
+
+static inline int task_running(struct rq *rq, struct task_struct *p)
+{
+#ifdef CONFIG_SMP
+	return p->on_cpu;
+#else
+	return task_current(rq, p);
+#endif
+}
+
+
+#ifndef prepare_arch_switch
+# define prepare_arch_switch(next)	do { } while (0)
+#endif
+#ifndef finish_arch_switch
+# define finish_arch_switch(prev)	do { } while (0)
+#endif
+
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * We can optimise this out completely for !SMP, because the
+	 * SMP rebalancing from interrupt is the only thing that cares
+	 * here.
+	 */
+	next->on_cpu = 1;
+#endif
+}
+
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
+	 * We must ensure this doesn't happen until the switch is completely
+	 * finished.
+	 */
+	smp_wmb();
+	prev->on_cpu = 0;
+#endif
+#ifdef CONFIG_DEBUG_SPINLOCK
+	/* this is a valid case when another task releases the spinlock */
+	rq->lock.owner = current;
+#endif
+	/*
+	 * If we are tracking spinlock dependencies then we have to
+	 * fix up the runqueue lock - which gets 'carried over' from
+	 * prev into current:
+	 */
+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+
+	raw_spin_unlock_irq(&rq->lock);
+}
+
+#else /* __ARCH_WANT_UNLOCKED_CTXSW */
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * We can optimise this out completely for !SMP, because the
+	 * SMP rebalancing from interrupt is the only thing that cares
+	 * here.
+	 */
+	next->on_cpu = 1;
+#endif
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+	raw_spin_unlock_irq(&rq->lock);
+#else
+	raw_spin_unlock(&rq->lock);
+#endif
+}
+
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
+	 * We must ensure this doesn't happen until the switch is completely
+	 * finished.
+	 */
+	smp_wmb();
+	prev->on_cpu = 0;
+#endif
+#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+	local_irq_enable();
+#endif
+}
+#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+
+
+static inline void update_load_add(struct load_weight *lw, unsigned long inc)
+{
+	lw->weight += inc;
+	lw->inv_weight = 0;
+}
+
+static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
+{
+	lw->weight -= dec;
+	lw->inv_weight = 0;
+}
+
+static inline void update_load_set(struct load_weight *lw, unsigned long w)
+{
+	lw->weight = w;
+	lw->inv_weight = 0;
+}
+
+/*
+ * To aid in avoiding the subversion of "niceness" due to uneven distribution
+ * of tasks with abnormal "nice" values across CPUs the contribution that
+ * each task makes to its run queue's load is weighted according to its
+ * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
+ * scaled version of the new time slice allocation that they receive on time
+ * slice expiry etc.
+ */
+
+#define WEIGHT_IDLEPRIO                3
+#define WMULT_IDLEPRIO         1431655765
+
+/*
+ * Nice levels are multiplicative, with a gentle 10% change for every
+ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
+ * nice 1, it will get ~10% less CPU time than another CPU-bound task
+ * that remained on nice 0.
+ *
+ * The "10% effect" is relative and cumulative: from _any_ nice level,
+ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
+ * If a task goes up by ~10% and another task goes down by ~10% then
+ * the relative distance between them is ~25%.)
+ */
+static const int prio_to_weight[40] = {
+ /* -20 */     88761,     71755,     56483,     46273,     36291,
+ /* -15 */     29154,     23254,     18705,     14949,     11916,
+ /* -10 */      9548,      7620,      6100,      4904,      3906,
+ /*  -5 */      3121,      2501,      1991,      1586,      1277,
+ /*   0 */      1024,       820,       655,       526,       423,
+ /*   5 */       335,       272,       215,       172,       137,
+ /*  10 */       110,        87,        70,        56,        45,
+ /*  15 */        36,        29,        23,        18,        15,
+};
+
+/*
+ * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
+ *
+ * In cases where the weight does not change often, we can use the
+ * precalculated inverse to speed up arithmetics by turning divisions
+ * into multiplications:
+ */
+static const u32 prio_to_wmult[40] = {
+ /* -20 */     48388,     59856,     76040,     92818,    118348,
+ /* -15 */    147320,    184698,    229616,    287308,    360437,
+ /* -10 */    449829,    563644,    704093,    875809,   1099582,
+ /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
+ /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
+ /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
+ /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
+ /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+};
+
+/* Time spent by the tasks of the cpu accounting group executing in ... */
+enum cpuacct_stat_index {
+	CPUACCT_STAT_USER,	/* ... user mode */
+	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
+
+	CPUACCT_STAT_NSTATS,
+};
+
+
+#define sched_class_highest (&stop_sched_class)
+#define for_each_class(class) \
+   for (class = sched_class_highest; class; class = class->next)
+
+extern const struct sched_class stop_sched_class;
+extern const struct sched_class rt_sched_class;
+extern const struct sched_class fair_sched_class;
+extern const struct sched_class idle_sched_class;
+
+
+#ifdef CONFIG_SMP
+
+extern void trigger_load_balance(struct rq *rq, int cpu);
+extern void idle_balance(int this_cpu, struct rq *this_rq);
+
+#else	/* CONFIG_SMP */
+
+static inline void idle_balance(int cpu, struct rq *rq)
+{
+}
+
+#endif
+
+extern void sysrq_sched_debug_show(void);
+extern void sched_init_granularity(void);
+extern void update_max_interval(void);
+extern void update_group_power(struct sched_domain *sd, int cpu);
+extern int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu);
+extern void init_sched_rt_class(void);
+extern void init_sched_fair_class(void);
+
+extern void resched_task(struct task_struct *p);
+extern void resched_cpu(int cpu);
+
+extern struct rt_bandwidth def_rt_bandwidth;
+extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+
+extern void update_cpu_load(struct rq *this_rq);
+
+#ifdef CONFIG_CGROUP_CPUACCT
+extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+extern void cpuacct_update_stats(struct task_struct *tsk,
+		enum cpuacct_stat_index idx, cputime_t val);
+#else
+static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+static inline void cpuacct_update_stats(struct task_struct *tsk,
+		enum cpuacct_stat_index idx, cputime_t val) {}
+#endif
+
+static inline void inc_nr_running(struct rq *rq)
+{
+	rq->nr_running++;
+}
+
+static inline void dec_nr_running(struct rq *rq)
+{
+	rq->nr_running--;
+}
+
+extern void update_rq_clock(struct rq *rq);
+
+extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
+extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
+
+extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
+
+extern const_debug unsigned int sysctl_sched_time_avg;
+extern const_debug unsigned int sysctl_sched_nr_migrate;
+extern const_debug unsigned int sysctl_sched_migration_cost;
+
+static inline u64 sched_avg_period(void)
+{
+	return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
+}
+
+void calc_load_account_idle(struct rq *this_rq);
+
+#ifdef CONFIG_SCHED_HRTICK
+
+/*
+ * Use hrtick when:
+ *  - enabled by features
+ *  - hrtimer is actually high res
+ */
+static inline int hrtick_enabled(struct rq *rq)
+{
+	if (!sched_feat(HRTICK))
+		return 0;
+	if (!cpu_active(cpu_of(rq)))
+		return 0;
+	return hrtimer_is_hres_active(&rq->hrtick_timer);
+}
+
+void hrtick_start(struct rq *rq, u64 delay);
+
+#endif /* CONFIG_SCHED_HRTICK */
+
+#ifdef CONFIG_SMP
+extern void sched_avg_update(struct rq *rq);
+static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+	rq->rt_avg += rt_delta;
+	sched_avg_update(rq);
+}
+#else
+static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
+static inline void sched_avg_update(struct rq *rq) { }
+#endif
+
+extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPT
+
+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
+/*
+ * fair double_lock_balance: Safely acquires both rq->locks in a fair
+ * way at the expense of forcing extra atomic operations in all
+ * invocations.  This assures that the double_lock is acquired using the
+ * same underlying policy as the spinlock_t on this architecture, which
+ * reduces latency compared to the unfair variant below.  However, it
+ * also adds more overhead and therefore may reduce throughput.
+ */
+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+	__releases(this_rq->lock)
+	__acquires(busiest->lock)
+	__acquires(this_rq->lock)
+{
+	raw_spin_unlock(&this_rq->lock);
+	double_rq_lock(this_rq, busiest);
+
+	return 1;
+}
+
+#else
+/*
+ * Unfair double_lock_balance: Optimizes throughput at the expense of
+ * latency by eliminating extra atomic operations when the locks are
+ * already in proper order on entry.  This favors lower cpu-ids and will
+ * grant the double lock to lower cpus over higher ids under contention,
+ * regardless of entry order into the function.
+ */
+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+	__releases(this_rq->lock)
+	__acquires(busiest->lock)
+	__acquires(this_rq->lock)
+{
+	int ret = 0;
+
+	if (unlikely(!raw_spin_trylock(&busiest->lock))) {
+		if (busiest < this_rq) {
+			raw_spin_unlock(&this_rq->lock);
+			raw_spin_lock(&busiest->lock);
+			raw_spin_lock_nested(&this_rq->lock,
+					      SINGLE_DEPTH_NESTING);
+			ret = 1;
+		} else
+			raw_spin_lock_nested(&busiest->lock,
+					      SINGLE_DEPTH_NESTING);
+	}
+	return ret;
+}
+
+#endif /* CONFIG_PREEMPT */
+
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+{
+	if (unlikely(!irqs_disabled())) {
+		/* printk() doesn't work good under rq->lock */
+		raw_spin_unlock(&this_rq->lock);
+		BUG_ON(1);
+	}
+
+	return _double_lock_balance(this_rq, busiest);
+}
+
+static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+	__releases(busiest->lock)
+{
+	raw_spin_unlock(&busiest->lock);
+	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+}
+
+/*
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
+	__acquires(rq1->lock)
+	__acquires(rq2->lock)
+{
+	BUG_ON(!irqs_disabled());
+	if (rq1 == rq2) {
+		raw_spin_lock(&rq1->lock);
+		__acquire(rq2->lock);	/* Fake it out ;) */
+	} else {
+		if (rq1 < rq2) {
+			raw_spin_lock(&rq1->lock);
+			raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
+		} else {
+			raw_spin_lock(&rq2->lock);
+			raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
+		}
+	}
+}
+
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+	__releases(rq1->lock)
+	__releases(rq2->lock)
+{
+	raw_spin_unlock(&rq1->lock);
+	if (rq1 != rq2)
+		raw_spin_unlock(&rq2->lock);
+	else
+		__release(rq2->lock);
+}
+
+#else /* CONFIG_SMP */
+
+/*
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
+	__acquires(rq1->lock)
+	__acquires(rq2->lock)
+{
+	BUG_ON(!irqs_disabled());
+	BUG_ON(rq1 != rq2);
+	raw_spin_lock(&rq1->lock);
+	__acquire(rq2->lock);	/* Fake it out ;) */
+}
+
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+	__releases(rq1->lock)
+	__releases(rq2->lock)
+{
+	BUG_ON(rq1 != rq2);
+	raw_spin_unlock(&rq1->lock);
+	__release(rq2->lock);
+}
+
+#endif
+
+extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
+extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
+extern void print_cfs_stats(struct seq_file *m, int cpu);
+extern void print_rt_stats(struct seq_file *m, int cpu);
+
+extern void init_cfs_rq(struct cfs_rq *cfs_rq);
+extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
+extern void unthrottle_offline_cfs_rqs(struct rq *rq);
+
+extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 429242f3c484..e8a1f83ee0e7 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -1,15 +1,19 @@
 #ifdef CONFIG_SCHED_AUTOGROUP
 
+#include "sched.h"
+
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
+#include <linux/security.h>
+#include <linux/export.h>
 
 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
 static struct autogroup autogroup_default;
 static atomic_t autogroup_seq_nr;
 
-static void __init autogroup_init(struct task_struct *init_task)
+void __init autogroup_init(struct task_struct *init_task)
 {
 	autogroup_default.tg = &root_task_group;
 	kref_init(&autogroup_default.kref);
@@ -17,7 +21,7 @@ static void __init autogroup_init(struct task_struct *init_task)
 	init_task->signal->autogroup = &autogroup_default;
 }
 
-static inline void autogroup_free(struct task_group *tg)
+void autogroup_free(struct task_group *tg)
 {
 	kfree(tg->autogroup);
 }
@@ -59,10 +63,6 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p)
 	return ag;
 }
 
-#ifdef CONFIG_RT_GROUP_SCHED
-static void free_rt_sched_group(struct task_group *tg);
-#endif
-
 static inline struct autogroup *autogroup_create(void)
 {
 	struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
@@ -108,8 +108,7 @@ out_fail:
 	return autogroup_kref_get(&autogroup_default);
 }
 
-static inline bool
-task_wants_autogroup(struct task_struct *p, struct task_group *tg)
+bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 {
 	if (tg != &root_task_group)
 		return false;
@@ -127,22 +126,6 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 	return true;
 }
 
-static inline bool task_group_is_autogroup(struct task_group *tg)
-{
-	return !!tg->autogroup;
-}
-
-static inline struct task_group *
-autogroup_task_group(struct task_struct *p, struct task_group *tg)
-{
-	int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
-
-	if (enabled && task_wants_autogroup(p, tg))
-		return p->signal->autogroup->tg;
-
-	return tg;
-}
-
 static void
 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 {
@@ -263,7 +246,7 @@ out:
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SCHED_DEBUG
-static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
+int autogroup_path(struct task_group *tg, char *buf, int buflen)
 {
 	if (!task_group_is_autogroup(tg))
 		return 0;
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index c2f0e7248dca..8bd047142816 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -1,5 +1,8 @@
 #ifdef CONFIG_SCHED_AUTOGROUP
 
+#include <linux/kref.h>
+#include <linux/rwsem.h>
+
 struct autogroup {
 	/*
 	 * reference doesn't mean how many thread attach to this
@@ -13,9 +16,28 @@ struct autogroup {
 	int			nice;
 };
 
-static inline bool task_group_is_autogroup(struct task_group *tg);
+extern void autogroup_init(struct task_struct *init_task);
+extern void autogroup_free(struct task_group *tg);
+
+static inline bool task_group_is_autogroup(struct task_group *tg)
+{
+	return !!tg->autogroup;
+}
+
+extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
+
 static inline struct task_group *
-autogroup_task_group(struct task_struct *p, struct task_group *tg);
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+	int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
+
+	if (enabled && task_wants_autogroup(p, tg))
+		return p->signal->autogroup->tg;
+
+	return tg;
+}
+
+extern int autogroup_path(struct task_group *tg, char *buf, int buflen);
 
 #else /* !CONFIG_SCHED_AUTOGROUP */
 
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index a6710a112b4f..ce1a85f2ddcb 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -16,6 +16,8 @@
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
 
+#include "sched.h"
+
 static DEFINE_SPINLOCK(sched_debug_lock);
 
 /*
@@ -373,7 +375,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static void sysrq_sched_debug_show(void)
+void sysrq_sched_debug_show(void)
 {
 	sched_debug_show(NULL, NULL);
 }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a608593df243..cd3b64219d9f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -23,6 +23,13 @@
 #include <linux/latencytop.h>
 #include <linux/sched.h>
 #include <linux/cpumask.h>
+#include <linux/slab.h>
+#include <linux/profile.h>
+#include <linux/interrupt.h>
+
+#include <trace/events/sched.h>
+
+#include "sched.h"
 
 /*
  * Targeted preemption latency for CPU-bound tasks:
@@ -103,7 +110,110 @@ unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
 unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
 #endif
 
-static const struct sched_class fair_sched_class;
+/*
+ * Increase the granularity value when there are more CPUs,
+ * because with more CPUs the 'effective latency' as visible
+ * to users decreases. But the relationship is not linear,
+ * so pick a second-best guess by going with the log2 of the
+ * number of CPUs.
+ *
+ * This idea comes from the SD scheduler of Con Kolivas:
+ */
+static int get_update_sysctl_factor(void)
+{
+	unsigned int cpus = min_t(int, num_online_cpus(), 8);
+	unsigned int factor;
+
+	switch (sysctl_sched_tunable_scaling) {
+	case SCHED_TUNABLESCALING_NONE:
+		factor = 1;
+		break;
+	case SCHED_TUNABLESCALING_LINEAR:
+		factor = cpus;
+		break;
+	case SCHED_TUNABLESCALING_LOG:
+	default:
+		factor = 1 + ilog2(cpus);
+		break;
+	}
+
+	return factor;
+}
+
+static void update_sysctl(void)
+{
+	unsigned int factor = get_update_sysctl_factor();
+
+#define SET_SYSCTL(name) \
+	(sysctl_##name = (factor) * normalized_sysctl_##name)
+	SET_SYSCTL(sched_min_granularity);
+	SET_SYSCTL(sched_latency);
+	SET_SYSCTL(sched_wakeup_granularity);
+#undef SET_SYSCTL
+}
+
+void sched_init_granularity(void)
+{
+	update_sysctl();
+}
+
+#if BITS_PER_LONG == 32
+# define WMULT_CONST	(~0UL)
+#else
+# define WMULT_CONST	(1UL << 32)
+#endif
+
+#define WMULT_SHIFT	32
+
+/*
+ * Shift right and round:
+ */
+#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
+
+/*
+ * delta *= weight / lw
+ */
+static unsigned long
+calc_delta_mine(unsigned long delta_exec, unsigned long weight,
+		struct load_weight *lw)
+{
+	u64 tmp;
+
+	/*
+	 * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
+	 * entities since MIN_SHARES = 2. Treat weight as 1 if less than
+	 * 2^SCHED_LOAD_RESOLUTION.
+	 */
+	if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
+		tmp = (u64)delta_exec * scale_load_down(weight);
+	else
+		tmp = (u64)delta_exec;
+
+	if (!lw->inv_weight) {
+		unsigned long w = scale_load_down(lw->weight);
+
+		if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
+			lw->inv_weight = 1;
+		else if (unlikely(!w))
+			lw->inv_weight = WMULT_CONST;
+		else
+			lw->inv_weight = WMULT_CONST / w;
+	}
+
+	/*
+	 * Check whether we'd overflow the 64-bit multiplication:
+	 */
+	if (unlikely(tmp > WMULT_CONST))
+		tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
+			WMULT_SHIFT/2);
+	else
+		tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
+
+	return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
+}
+
+
+const struct sched_class fair_sched_class;
 
 /**************************************************************
  * CFS operations on generic schedulable entities:
@@ -413,7 +523,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
 }
 
-static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
+struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
 {
 	struct rb_node *left = cfs_rq->rb_leftmost;
 
@@ -434,7 +544,7 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se)
 }
 
 #ifdef CONFIG_SCHED_DEBUG
-static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
+struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 {
 	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
 
@@ -684,7 +794,7 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	update_load_add(&cfs_rq->load, se->load.weight);
 	if (!parent_entity(se))
-		inc_cpu_load(rq_of(cfs_rq), se->load.weight);
+		update_load_add(&rq_of(cfs_rq)->load, se->load.weight);
 	if (entity_is_task(se)) {
 		add_cfs_task_weight(cfs_rq, se->load.weight);
 		list_add(&se->group_node, &cfs_rq->tasks);
@@ -697,7 +807,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	if (!parent_entity(se))
-		dec_cpu_load(rq_of(cfs_rq), se->load.weight);
+		update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
 	if (entity_is_task(se)) {
 		add_cfs_task_weight(cfs_rq, -se->load.weight);
 		list_del_init(&se->group_node);
@@ -1287,6 +1397,32 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
  */
 
 #ifdef CONFIG_CFS_BANDWIDTH
+
+#ifdef HAVE_JUMP_LABEL
+static struct jump_label_key __cfs_bandwidth_used;
+
+static inline bool cfs_bandwidth_used(void)
+{
+	return static_branch(&__cfs_bandwidth_used);
+}
+
+void account_cfs_bandwidth_used(int enabled, int was_enabled)
+{
+	/* only need to count groups transitioning between enabled/!enabled */
+	if (enabled && !was_enabled)
+		jump_label_inc(&__cfs_bandwidth_used);
+	else if (!enabled && was_enabled)
+		jump_label_dec(&__cfs_bandwidth_used);
+}
+#else /* HAVE_JUMP_LABEL */
+static bool cfs_bandwidth_used(void)
+{
+	return true;
+}
+
+void account_cfs_bandwidth_used(int enabled, int was_enabled) {}
+#endif /* HAVE_JUMP_LABEL */
+
 /*
  * default period for cfs group bandwidth.
  * default: 0.1s, units: nanoseconds
@@ -1308,7 +1444,7 @@ static inline u64 sched_cfs_bandwidth_slice(void)
  *
  * requires cfs_b->lock
  */
-static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
+void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
 {
 	u64 now;
 
@@ -1320,6 +1456,11 @@ static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
 	cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
 }
 
+static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
+{
+	return &tg->cfs_bandwidth;
+}
+
 /* returns 0 on failure to allocate runtime */
 static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
@@ -1530,7 +1671,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
 	raw_spin_unlock(&cfs_b->lock);
 }
 
-static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	struct rq *rq = rq_of(cfs_rq);
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
@@ -1839,7 +1980,112 @@ static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 
 	throttle_cfs_rq(cfs_rq);
 }
-#else
+
+static inline u64 default_cfs_period(void);
+static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
+static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b);
+
+static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
+{
+	struct cfs_bandwidth *cfs_b =
+		container_of(timer, struct cfs_bandwidth, slack_timer);
+	do_sched_cfs_slack_timer(cfs_b);
+
+	return HRTIMER_NORESTART;
+}
+
+static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
+{
+	struct cfs_bandwidth *cfs_b =
+		container_of(timer, struct cfs_bandwidth, period_timer);
+	ktime_t now;
+	int overrun;
+	int idle = 0;
+
+	for (;;) {
+		now = hrtimer_cb_get_time(timer);
+		overrun = hrtimer_forward(timer, now, cfs_b->period);
+
+		if (!overrun)
+			break;
+
+		idle = do_sched_cfs_period_timer(cfs_b, overrun);
+	}
+
+	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
+}
+
+void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+{
+	raw_spin_lock_init(&cfs_b->lock);
+	cfs_b->runtime = 0;
+	cfs_b->quota = RUNTIME_INF;
+	cfs_b->period = ns_to_ktime(default_cfs_period());
+
+	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
+	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	cfs_b->period_timer.function = sched_cfs_period_timer;
+	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	cfs_b->slack_timer.function = sched_cfs_slack_timer;
+}
+
+static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
+{
+	cfs_rq->runtime_enabled = 0;
+	INIT_LIST_HEAD(&cfs_rq->throttled_list);
+}
+
+/* requires cfs_b->lock, may release to reprogram timer */
+void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+{
+	/*
+	 * The timer may be active because we're trying to set a new bandwidth
+	 * period or because we're racing with the tear-down path
+	 * (timer_active==0 becomes visible before the hrtimer call-back
+	 * terminates).  In either case we ensure that it's re-programmed
+	 */
+	while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
+		raw_spin_unlock(&cfs_b->lock);
+		/* ensure cfs_b->lock is available while we wait */
+		hrtimer_cancel(&cfs_b->period_timer);
+
+		raw_spin_lock(&cfs_b->lock);
+		/* if someone else restarted the timer then we're done */
+		if (cfs_b->timer_active)
+			return;
+	}
+
+	cfs_b->timer_active = 1;
+	start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
+}
+
+static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+{
+	hrtimer_cancel(&cfs_b->period_timer);
+	hrtimer_cancel(&cfs_b->slack_timer);
+}
+
+void unthrottle_offline_cfs_rqs(struct rq *rq)
+{
+	struct cfs_rq *cfs_rq;
+
+	for_each_leaf_cfs_rq(rq, cfs_rq) {
+		struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+
+		if (!cfs_rq->runtime_enabled)
+			continue;
+
+		/*
+		 * clock_task is not advancing so we just need to make sure
+		 * there's some valid quota amount
+		 */
+		cfs_rq->runtime_remaining = cfs_b->quota;
+		if (cfs_rq_throttled(cfs_rq))
+			unthrottle_cfs_rq(cfs_rq);
+	}
+}
+
+#else /* CONFIG_CFS_BANDWIDTH */
 static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
 				     unsigned long delta_exec) {}
 static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
@@ -1861,8 +2107,22 @@ static inline int throttled_lb_pair(struct task_group *tg,
 {
 	return 0;
 }
+
+void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 #endif
 
+static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
+{
+	return NULL;
+}
+static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
+void unthrottle_offline_cfs_rqs(struct rq *rq) {}
+
+#endif /* CONFIG_CFS_BANDWIDTH */
+
 /**************************************************
  * CFS operations on tasks:
  */
@@ -2029,6 +2289,61 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 }
 
 #ifdef CONFIG_SMP
+/* Used instead of source_load when we know the type == 0 */
+static unsigned long weighted_cpuload(const int cpu)
+{
+	return cpu_rq(cpu)->load.weight;
+}
+
+/*
+ * Return a low guess at the load of a migration-source cpu weighted
+ * according to the scheduling class and "nice" value.
+ *
+ * We want to under-estimate the load of migration sources, to
+ * balance conservatively.
+ */
+static unsigned long source_load(int cpu, int type)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long total = weighted_cpuload(cpu);
+
+	if (type == 0 || !sched_feat(LB_BIAS))
+		return total;
+
+	return min(rq->cpu_load[type-1], total);
+}
+
+/*
+ * Return a high guess at the load of a migration-target cpu weighted
+ * according to the scheduling class and "nice" value.
+ */
+static unsigned long target_load(int cpu, int type)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long total = weighted_cpuload(cpu);
+
+	if (type == 0 || !sched_feat(LB_BIAS))
+		return total;
+
+	return max(rq->cpu_load[type-1], total);
+}
+
+static unsigned long power_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_power;
+}
+
+static unsigned long cpu_avg_load_per_task(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
+
+	if (nr_running)
+		return rq->load.weight / nr_running;
+
+	return 0;
+}
+
 
 static void task_waking_fair(struct task_struct *p)
 {
@@ -2782,6 +3097,38 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
 	check_preempt_curr(this_rq, p, 0);
 }
 
+/*
+ * Is this task likely cache-hot:
+ */
+static int
+task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
+{
+	s64 delta;
+
+	if (p->sched_class != &fair_sched_class)
+		return 0;
+
+	if (unlikely(p->policy == SCHED_IDLE))
+		return 0;
+
+	/*
+	 * Buddy candidates are cache hot:
+	 */
+	if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
+			(&p->se == cfs_rq_of(&p->se)->next ||
+			 &p->se == cfs_rq_of(&p->se)->last))
+		return 1;
+
+	if (sysctl_sched_migration_cost == -1)
+		return 1;
+	if (sysctl_sched_migration_cost == 0)
+		return 0;
+
+	delta = now - p->se.exec_start;
+
+	return delta < (s64)sysctl_sched_migration_cost;
+}
+
 /*
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
@@ -3161,15 +3508,6 @@ struct sg_lb_stats {
 	int group_has_capacity; /* Is there extra capacity in the group? */
 };
 
-/**
- * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
- * @group: The group whose first cpu is to be returned.
- */
-static inline unsigned int group_first_cpu(struct sched_group *group)
-{
-	return cpumask_first(sched_group_cpus(group));
-}
-
 /**
  * get_sd_load_idx - Obtain the load index for a given sched domain.
  * @sd: The sched_domain whose load_idx is to be obtained.
@@ -3419,7 +3757,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 	sdg->sgp->power = power;
 }
 
-static void update_group_power(struct sched_domain *sd, int cpu)
+void update_group_power(struct sched_domain *sd, int cpu)
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
@@ -3685,11 +4023,6 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 	} while (sg != sd->groups);
 }
 
-int __weak arch_sd_sibling_asym_packing(void)
-{
-       return 0*SD_ASYM_PACKING;
-}
-
 /**
  * check_asym_packing - Check to see if the group is packed into the
  *			sched doman.
@@ -4053,7 +4386,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
 #define MAX_PINNED_INTERVAL	512
 
 /* Working cpumask for load_balance and load_balance_newidle. */
-static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
+DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
 
 static int need_active_balance(struct sched_domain *sd, int idle,
 			       int busiest_cpu, int this_cpu)
@@ -4256,7 +4589,7 @@ out:
  * idle_balance is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
  */
-static void idle_balance(int this_cpu, struct rq *this_rq)
+void idle_balance(int this_cpu, struct rq *this_rq)
 {
 	struct sched_domain *sd;
 	int pulled_task = 0;
@@ -4631,7 +4964,7 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10;
  * Scale the max load_balance interval with the number of CPUs in the system.
  * This trades load-balance latency on larger machines for less cross talk.
  */
-static void update_max_interval(void)
+void update_max_interval(void)
 {
 	max_load_balance_interval = HZ*num_online_cpus()/10;
 }
@@ -4833,7 +5166,7 @@ static inline int on_null_domain(int cpu)
 /*
  * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
  */
-static inline void trigger_load_balance(struct rq *rq, int cpu)
+void trigger_load_balance(struct rq *rq, int cpu)
 {
 	/* Don't need to rebalance while attached to NULL domain */
 	if (time_after_eq(jiffies, rq->next_balance) &&
@@ -4855,15 +5188,6 @@ static void rq_offline_fair(struct rq *rq)
 	update_sysctl();
 }
 
-#else	/* CONFIG_SMP */
-
-/*
- * on UP we do not need to balance between CPUs:
- */
-static inline void idle_balance(int cpu, struct rq *rq)
-{
-}
-
 #endif /* CONFIG_SMP */
 
 /*
@@ -5006,6 +5330,16 @@ static void set_curr_task_fair(struct rq *rq)
 	}
 }
 
+void init_cfs_rq(struct cfs_rq *cfs_rq)
+{
+	cfs_rq->tasks_timeline = RB_ROOT;
+	INIT_LIST_HEAD(&cfs_rq->tasks);
+	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
+#ifndef CONFIG_64BIT
+	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
+#endif
+}
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void task_move_group_fair(struct task_struct *p, int on_rq)
 {
@@ -5028,7 +5362,161 @@ static void task_move_group_fair(struct task_struct *p, int on_rq)
 	if (!on_rq)
 		p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime;
 }
+
+void free_fair_sched_group(struct task_group *tg)
+{
+	int i;
+
+	destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
+
+	for_each_possible_cpu(i) {
+		if (tg->cfs_rq)
+			kfree(tg->cfs_rq[i]);
+		if (tg->se)
+			kfree(tg->se[i]);
+	}
+
+	kfree(tg->cfs_rq);
+	kfree(tg->se);
+}
+
+int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
+{
+	struct cfs_rq *cfs_rq;
+	struct sched_entity *se;
+	int i;
+
+	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
+	if (!tg->cfs_rq)
+		goto err;
+	tg->se = kzalloc(sizeof(se) * nr_cpu_ids, GFP_KERNEL);
+	if (!tg->se)
+		goto err;
+
+	tg->shares = NICE_0_LOAD;
+
+	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
+
+	for_each_possible_cpu(i) {
+		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
+				      GFP_KERNEL, cpu_to_node(i));
+		if (!cfs_rq)
+			goto err;
+
+		se = kzalloc_node(sizeof(struct sched_entity),
+				  GFP_KERNEL, cpu_to_node(i));
+		if (!se)
+			goto err_free_rq;
+
+		init_cfs_rq(cfs_rq);
+		init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
+	}
+
+	return 1;
+
+err_free_rq:
+	kfree(cfs_rq);
+err:
+	return 0;
+}
+
+void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	/*
+	* Only empty task groups can be destroyed; so we can speculatively
+	* check on_list without danger of it being re-added.
+	*/
+	if (!tg->cfs_rq[cpu]->on_list)
+		return;
+
+	raw_spin_lock_irqsave(&rq->lock, flags);
+	list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
+			struct sched_entity *se, int cpu,
+			struct sched_entity *parent)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	cfs_rq->tg = tg;
+	cfs_rq->rq = rq;
+#ifdef CONFIG_SMP
+	/* allow initial update_cfs_load() to truncate */
+	cfs_rq->load_stamp = 1;
 #endif
+	init_cfs_rq_runtime(cfs_rq);
+
+	tg->cfs_rq[cpu] = cfs_rq;
+	tg->se[cpu] = se;
+
+	/* se could be NULL for root_task_group */
+	if (!se)
+		return;
+
+	if (!parent)
+		se->cfs_rq = &rq->cfs;
+	else
+		se->cfs_rq = parent->my_q;
+
+	se->my_q = cfs_rq;
+	update_load_set(&se->load, 0);
+	se->parent = parent;
+}
+
+static DEFINE_MUTEX(shares_mutex);
+
+int sched_group_set_shares(struct task_group *tg, unsigned long shares)
+{
+	int i;
+	unsigned long flags;
+
+	/*
+	 * We can't change the weight of the root cgroup.
+	 */
+	if (!tg->se[0])
+		return -EINVAL;
+
+	shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
+
+	mutex_lock(&shares_mutex);
+	if (tg->shares == shares)
+		goto done;
+
+	tg->shares = shares;
+	for_each_possible_cpu(i) {
+		struct rq *rq = cpu_rq(i);
+		struct sched_entity *se;
+
+		se = tg->se[i];
+		/* Propagate contribution to hierarchy */
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		for_each_sched_entity(se)
+			update_cfs_shares(group_cfs_rq(se));
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
+
+done:
+	mutex_unlock(&shares_mutex);
+	return 0;
+}
+#else /* CONFIG_FAIR_GROUP_SCHED */
+
+void free_fair_sched_group(struct task_group *tg) { }
+
+int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
+{
+	return 1;
+}
+
+void unregister_fair_sched_group(struct task_group *tg, int cpu) { }
+
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
 
 static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
 {
@@ -5048,7 +5536,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
 /*
  * All the scheduling class methods:
  */
-static const struct sched_class fair_sched_class = {
+const struct sched_class fair_sched_class = {
 	.next			= &idle_sched_class,
 	.enqueue_task		= enqueue_task_fair,
 	.dequeue_task		= dequeue_task_fair,
@@ -5085,7 +5573,7 @@ static const struct sched_class fair_sched_class = {
 };
 
 #ifdef CONFIG_SCHED_DEBUG
-static void print_cfs_stats(struct seq_file *m, int cpu)
+void print_cfs_stats(struct seq_file *m, int cpu)
 {
 	struct cfs_rq *cfs_rq;
 
@@ -5095,3 +5583,19 @@ static void print_cfs_stats(struct seq_file *m, int cpu)
 	rcu_read_unlock();
 }
 #endif
+
+__init void init_sched_fair_class(void)
+{
+#ifdef CONFIG_SMP
+	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
+
+#ifdef CONFIG_NO_HZ
+	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
+	alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
+	atomic_set(&nohz.load_balancer, nr_cpu_ids);
+	atomic_set(&nohz.first_pick_cpu, nr_cpu_ids);
+	atomic_set(&nohz.second_pick_cpu, nr_cpu_ids);
+#endif
+#endif /* SMP */
+
+}
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 0a51882534ea..91b4c957f289 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -1,3 +1,5 @@
+#include "sched.h"
+
 /*
  * idle-task scheduling class.
  *
@@ -71,7 +73,7 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task
 /*
  * Simple, special scheduling class for the per-CPU idle tasks:
  */
-static const struct sched_class idle_sched_class = {
+const struct sched_class idle_sched_class = {
 	/* .next is NULL */
 	/* no enqueue/yield_task for idle tasks */
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d95e861122cf..023b35502509 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -3,7 +3,92 @@
  * policies)
  */
 
+#include "sched.h"
+
+#include <linux/slab.h>
+
+static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
+
+struct rt_bandwidth def_rt_bandwidth;
+
+static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
+{
+	struct rt_bandwidth *rt_b =
+		container_of(timer, struct rt_bandwidth, rt_period_timer);
+	ktime_t now;
+	int overrun;
+	int idle = 0;
+
+	for (;;) {
+		now = hrtimer_cb_get_time(timer);
+		overrun = hrtimer_forward(timer, now, rt_b->rt_period);
+
+		if (!overrun)
+			break;
+
+		idle = do_sched_rt_period_timer(rt_b, overrun);
+	}
+
+	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
+}
+
+void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
+{
+	rt_b->rt_period = ns_to_ktime(period);
+	rt_b->rt_runtime = runtime;
+
+	raw_spin_lock_init(&rt_b->rt_runtime_lock);
+
+	hrtimer_init(&rt_b->rt_period_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rt_b->rt_period_timer.function = sched_rt_period_timer;
+}
+
+static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
+{
+	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
+		return;
+
+	if (hrtimer_active(&rt_b->rt_period_timer))
+		return;
+
+	raw_spin_lock(&rt_b->rt_runtime_lock);
+	start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
+	raw_spin_unlock(&rt_b->rt_runtime_lock);
+}
+
+void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
+{
+	struct rt_prio_array *array;
+	int i;
+
+	array = &rt_rq->active;
+	for (i = 0; i < MAX_RT_PRIO; i++) {
+		INIT_LIST_HEAD(array->queue + i);
+		__clear_bit(i, array->bitmap);
+	}
+	/* delimiter for bitsearch: */
+	__set_bit(MAX_RT_PRIO, array->bitmap);
+
+#if defined CONFIG_SMP
+	rt_rq->highest_prio.curr = MAX_RT_PRIO;
+	rt_rq->highest_prio.next = MAX_RT_PRIO;
+	rt_rq->rt_nr_migratory = 0;
+	rt_rq->overloaded = 0;
+	plist_head_init(&rt_rq->pushable_tasks);
+#endif
+
+	rt_rq->rt_time = 0;
+	rt_rq->rt_throttled = 0;
+	rt_rq->rt_runtime = 0;
+	raw_spin_lock_init(&rt_rq->rt_runtime_lock);
+}
+
 #ifdef CONFIG_RT_GROUP_SCHED
+static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
+{
+	hrtimer_cancel(&rt_b->rt_period_timer);
+}
 
 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
 
@@ -25,6 +110,91 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 	return rt_se->rt_rq;
 }
 
+void free_rt_sched_group(struct task_group *tg)
+{
+	int i;
+
+	if (tg->rt_se)
+		destroy_rt_bandwidth(&tg->rt_bandwidth);
+
+	for_each_possible_cpu(i) {
+		if (tg->rt_rq)
+			kfree(tg->rt_rq[i]);
+		if (tg->rt_se)
+			kfree(tg->rt_se[i]);
+	}
+
+	kfree(tg->rt_rq);
+	kfree(tg->rt_se);
+}
+
+void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
+		struct sched_rt_entity *rt_se, int cpu,
+		struct sched_rt_entity *parent)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	rt_rq->highest_prio.curr = MAX_RT_PRIO;
+	rt_rq->rt_nr_boosted = 0;
+	rt_rq->rq = rq;
+	rt_rq->tg = tg;
+
+	tg->rt_rq[cpu] = rt_rq;
+	tg->rt_se[cpu] = rt_se;
+
+	if (!rt_se)
+		return;
+
+	if (!parent)
+		rt_se->rt_rq = &rq->rt;
+	else
+		rt_se->rt_rq = parent->my_q;
+
+	rt_se->my_q = rt_rq;
+	rt_se->parent = parent;
+	INIT_LIST_HEAD(&rt_se->run_list);
+}
+
+int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
+{
+	struct rt_rq *rt_rq;
+	struct sched_rt_entity *rt_se;
+	int i;
+
+	tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
+	if (!tg->rt_rq)
+		goto err;
+	tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
+	if (!tg->rt_se)
+		goto err;
+
+	init_rt_bandwidth(&tg->rt_bandwidth,
+			ktime_to_ns(def_rt_bandwidth.rt_period), 0);
+
+	for_each_possible_cpu(i) {
+		rt_rq = kzalloc_node(sizeof(struct rt_rq),
+				     GFP_KERNEL, cpu_to_node(i));
+		if (!rt_rq)
+			goto err;
+
+		rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
+				     GFP_KERNEL, cpu_to_node(i));
+		if (!rt_se)
+			goto err_free_rq;
+
+		init_rt_rq(rt_rq, cpu_rq(i));
+		rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
+		init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
+	}
+
+	return 1;
+
+err_free_rq:
+	kfree(rt_rq);
+err:
+	return 0;
+}
+
 #else /* CONFIG_RT_GROUP_SCHED */
 
 #define rt_entity_is_task(rt_se) (1)
@@ -47,6 +217,12 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 	return &rq->rt;
 }
 
+void free_rt_sched_group(struct task_group *tg) { }
+
+int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
+{
+	return 1;
+}
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 #ifdef CONFIG_SMP
@@ -556,6 +732,28 @@ static void enable_runtime(struct rq *rq)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
+int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	int cpu = (int)(long)hcpu;
+
+	switch (action) {
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		disable_runtime(cpu_rq(cpu));
+		return NOTIFY_OK;
+
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		enable_runtime(cpu_rq(cpu));
+		return NOTIFY_OK;
+
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
 static int balance_runtime(struct rt_rq *rt_rq)
 {
 	int more = 0;
@@ -1178,8 +1376,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 /* Only try algorithms three times */
 #define RT_MAX_TRIES 3
 
-static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
-
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
@@ -1653,13 +1849,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 		pull_rt_task(rq);
 }
 
-static inline void init_sched_rt_class(void)
+void init_sched_rt_class(void)
 {
 	unsigned int i;
 
-	for_each_possible_cpu(i)
+	for_each_possible_cpu(i) {
 		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
 					GFP_KERNEL, cpu_to_node(i));
+	}
 }
 #endif /* CONFIG_SMP */
 
@@ -1800,7 +1997,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 		return 0;
 }
 
-static const struct sched_class rt_sched_class = {
+const struct sched_class rt_sched_class = {
 	.next			= &fair_sched_class,
 	.enqueue_task		= enqueue_task_rt,
 	.dequeue_task		= dequeue_task_rt,
@@ -1835,7 +2032,7 @@ static const struct sched_class rt_sched_class = {
 #ifdef CONFIG_SCHED_DEBUG
 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 
-static void print_rt_stats(struct seq_file *m, int cpu)
+void print_rt_stats(struct seq_file *m, int cpu)
 {
 	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
diff --git a/kernel/sched_stats.c b/kernel/sched_stats.c
new file mode 100644
index 000000000000..2a581ba8e190
--- /dev/null
+++ b/kernel/sched_stats.c
@@ -0,0 +1,111 @@
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+
+#include "sched.h"
+
+/*
+ * bump this up when changing the output format or the meaning of an existing
+ * format, so that tools can adapt (or abort)
+ */
+#define SCHEDSTAT_VERSION 15
+
+static int show_schedstat(struct seq_file *seq, void *v)
+{
+	int cpu;
+	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
+	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
+
+	if (mask_str == NULL)
+		return -ENOMEM;
+
+	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
+	seq_printf(seq, "timestamp %lu\n", jiffies);
+	for_each_online_cpu(cpu) {
+		struct rq *rq = cpu_rq(cpu);
+#ifdef CONFIG_SMP
+		struct sched_domain *sd;
+		int dcount = 0;
+#endif
+
+		/* runqueue-specific stats */
+		seq_printf(seq,
+		    "cpu%d %u %u %u %u %u %u %llu %llu %lu",
+		    cpu, rq->yld_count,
+		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
+		    rq->ttwu_count, rq->ttwu_local,
+		    rq->rq_cpu_time,
+		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
+
+		seq_printf(seq, "\n");
+
+#ifdef CONFIG_SMP
+		/* domain-specific stats */
+		rcu_read_lock();
+		for_each_domain(cpu, sd) {
+			enum cpu_idle_type itype;
+
+			cpumask_scnprintf(mask_str, mask_len,
+					  sched_domain_span(sd));
+			seq_printf(seq, "domain%d %s", dcount++, mask_str);
+			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
+					itype++) {
+				seq_printf(seq, " %u %u %u %u %u %u %u %u",
+				    sd->lb_count[itype],
+				    sd->lb_balanced[itype],
+				    sd->lb_failed[itype],
+				    sd->lb_imbalance[itype],
+				    sd->lb_gained[itype],
+				    sd->lb_hot_gained[itype],
+				    sd->lb_nobusyq[itype],
+				    sd->lb_nobusyg[itype]);
+			}
+			seq_printf(seq,
+				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
+			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
+			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
+			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
+			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
+			    sd->ttwu_move_balance);
+		}
+		rcu_read_unlock();
+#endif
+	}
+	kfree(mask_str);
+	return 0;
+}
+
+static int schedstat_open(struct inode *inode, struct file *file)
+{
+	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
+	char *buf = kmalloc(size, GFP_KERNEL);
+	struct seq_file *m;
+	int res;
+
+	if (!buf)
+		return -ENOMEM;
+	res = single_open(file, show_schedstat, NULL);
+	if (!res) {
+		m = file->private_data;
+		m->buf = buf;
+		m->size = size;
+	} else
+		kfree(buf);
+	return res;
+}
+
+static const struct file_operations proc_schedstat_operations = {
+	.open    = schedstat_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+static int __init proc_schedstat_init(void)
+{
+	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
+	return 0;
+}
+module_init(proc_schedstat_init);
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 87f9e36ea56e..ea2b6f0ec868 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -1,108 +1,5 @@
 
 #ifdef CONFIG_SCHEDSTATS
-/*
- * bump this up when changing the output format or the meaning of an existing
- * format, so that tools can adapt (or abort)
- */
-#define SCHEDSTAT_VERSION 15
-
-static int show_schedstat(struct seq_file *seq, void *v)
-{
-	int cpu;
-	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
-	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
-
-	if (mask_str == NULL)
-		return -ENOMEM;
-
-	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
-	seq_printf(seq, "timestamp %lu\n", jiffies);
-	for_each_online_cpu(cpu) {
-		struct rq *rq = cpu_rq(cpu);
-#ifdef CONFIG_SMP
-		struct sched_domain *sd;
-		int dcount = 0;
-#endif
-
-		/* runqueue-specific stats */
-		seq_printf(seq,
-		    "cpu%d %u %u %u %u %u %u %llu %llu %lu",
-		    cpu, rq->yld_count,
-		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
-		    rq->ttwu_count, rq->ttwu_local,
-		    rq->rq_cpu_time,
-		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
-
-		seq_printf(seq, "\n");
-
-#ifdef CONFIG_SMP
-		/* domain-specific stats */
-		rcu_read_lock();
-		for_each_domain(cpu, sd) {
-			enum cpu_idle_type itype;
-
-			cpumask_scnprintf(mask_str, mask_len,
-					  sched_domain_span(sd));
-			seq_printf(seq, "domain%d %s", dcount++, mask_str);
-			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
-					itype++) {
-				seq_printf(seq, " %u %u %u %u %u %u %u %u",
-				    sd->lb_count[itype],
-				    sd->lb_balanced[itype],
-				    sd->lb_failed[itype],
-				    sd->lb_imbalance[itype],
-				    sd->lb_gained[itype],
-				    sd->lb_hot_gained[itype],
-				    sd->lb_nobusyq[itype],
-				    sd->lb_nobusyg[itype]);
-			}
-			seq_printf(seq,
-				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
-			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
-			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
-			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
-			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
-			    sd->ttwu_move_balance);
-		}
-		rcu_read_unlock();
-#endif
-	}
-	kfree(mask_str);
-	return 0;
-}
-
-static int schedstat_open(struct inode *inode, struct file *file)
-{
-	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
-	char *buf = kmalloc(size, GFP_KERNEL);
-	struct seq_file *m;
-	int res;
-
-	if (!buf)
-		return -ENOMEM;
-	res = single_open(file, show_schedstat, NULL);
-	if (!res) {
-		m = file->private_data;
-		m->buf = buf;
-		m->size = size;
-	} else
-		kfree(buf);
-	return res;
-}
-
-static const struct file_operations proc_schedstat_operations = {
-	.open    = schedstat_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release,
-};
-
-static int __init proc_schedstat_init(void)
-{
-	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
-	return 0;
-}
-module_init(proc_schedstat_init);
 
 /*
  * Expects runqueue lock to be held for atomicity of update
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 8b44e7fa7fb3..7b386e86fd23 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -1,3 +1,5 @@
+#include "sched.h"
+
 /*
  * stop-task scheduling class.
  *
@@ -80,7 +82,7 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task)
 /*
  * Simple, special scheduling class for the per-CPU stop tasks:
  */
-static const struct sched_class stop_sched_class = {
+const struct sched_class stop_sched_class = {
 	.next			= &rt_sched_class,
 
 	.enqueue_task		= enqueue_task_stop,
-- 
cgit v1.2.3


From 907d0ed1c84114d4e8dafd66af982515d3739c90 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 16 Nov 2011 10:13:35 +0100
Subject: drivercore: Generalize module_platform_driver

This patch generalizes the module_platform_driver macro and introduces a new
module_driver macro. The module_driver macro takes a driver name, a register
and a unregister function for this driver type. Using these it construct the
module init and exit sections which register and unregister the driver. Since
such init/exit sections are commonly found in drivers this macro can be used
to eliminate a lot of boilerplate code.

The macro is not intended to be used by driver modules directly, instead it
should be used to generate bus specific macros for registering drivers like
the module_platform_driver macro.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h          | 21 +++++++++++++++++++++
 include/linux/platform_device.h | 12 ++----------
 2 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index c6335982774c..341fb740d851 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -922,4 +922,25 @@ extern long sysfs_deprecated;
 #define sysfs_deprecated 0
 #endif
 
+/**
+ * module_driver() - Helper macro for drivers that don't do anything
+ * special in module init/exit. This eliminates a lot of boilerplate.
+ * Each module may only use this macro once, and calling it replaces
+ * module_init() and module_exit().
+ *
+ * Use this macro to construct bus specific macros for registering
+ * drivers, and do not use it on its own.
+ */
+#define module_driver(__driver, __register, __unregister) \
+static int __init __driver##_init(void) \
+{ \
+	return __register(&(__driver)); \
+} \
+module_init(__driver##_init); \
+static void __exit __driver##_exit(void) \
+{ \
+	__unregister(&(__driver)); \
+} \
+module_exit(__driver##_exit);
+
 #endif /* _DEVICE_H_ */
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 2a23f7d1a825..165a8d175370 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -196,16 +196,8 @@ static inline void platform_set_drvdata(struct platform_device *pdev, void *data
  * calling it replaces module_init() and module_exit()
  */
 #define module_platform_driver(__platform_driver) \
-static int __init __platform_driver##_init(void) \
-{ \
-	return platform_driver_register(&(__platform_driver)); \
-} \
-module_init(__platform_driver##_init); \
-static void __exit __platform_driver##_exit(void) \
-{ \
-	platform_driver_unregister(&(__platform_driver)); \
-} \
-module_exit(__platform_driver##_exit);
+	module_driver(__platform_driver, platform_driver_register, \
+			platform_driver_unregister)
 
 extern struct platform_device *platform_create_bundle(struct platform_driver *driver,
 					int (*probe)(struct platform_device *),
-- 
cgit v1.2.3


From 7c92784a546d2945b6d6973a30f7134be78eb7a4 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 16 Nov 2011 10:13:36 +0100
Subject: I2C: Add helper macro for i2c_driver boilerplate

This patch introduces the module_i2c_driver macro which is a convenience macro
for I2C driver modules similar to module_platform_driver. It is intended to be
used by drivers which init/exit section does nothing but register/unregister
the I2C driver. By using this macro it is possible to eliminate a few lines of
boilerplate code per I2C driver.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index a81bf6d23b3e..7e92854fe9cc 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -485,6 +485,19 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap)
 {
 	return adap->nr;
 }
+
+/**
+ * module_i2c_driver() - Helper macro for registering a I2C driver
+ * @__i2c_driver: i2c_driver struct
+ *
+ * Helper macro for I2C drivers which do not do anything special in module
+ * init/exit. This eliminates a lot of boilerplate. Each module may only
+ * use this macro once, and calling it replaces module_init() and module_exit()
+ */
+#define module_i2c_driver(__i2c_driver) \
+	module_driver(__i2c_driver, i2c_add_driver, \
+			i2c_del_driver)
+
 #endif /* I2C */
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 3acbb0142d48713a8f65cde678a54f419801c189 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 16 Nov 2011 10:13:37 +0100
Subject: SPI: Add helper macro for spi_driver boilerplate

This patch introduces the module_spi_driver macro which is a convenience macro
for SPI driver modules similar to module_platform_driver. It is intended to be
used by drivers which init/exit section does nothing but register/unregister
the SPI driver. By using this macro it is possible to eliminate a few lines of
boilerplate code per SPI driver.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/spi/spi.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index bb4f5fbbbd8e..176fce9cc6b1 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -200,6 +200,17 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
 		driver_unregister(&sdrv->driver);
 }
 
+/**
+ * module_spi_driver() - Helper macro for registering a SPI driver
+ * @__spi_driver: spi_driver struct
+ *
+ * Helper macro for SPI drivers which do not do anything special in module
+ * init/exit. This eliminates a lot of boilerplate. Each module may only
+ * use this macro once, and calling it replaces module_init() and module_exit()
+ */
+#define module_spi_driver(__spi_driver) \
+	module_driver(__spi_driver, spi_register_driver, \
+			spi_unregister_driver)
 
 /**
  * struct spi_master - interface to SPI master controller
-- 
cgit v1.2.3


From f6f8285132907757ef84ef8dae0a1244b8cde6ac Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2011 12:58:07 -0800
Subject: pstore: pass allocated memory region back to caller

The buf_lock cannot be held while populating the inodes, so make the backend
pass forward an allocated and filled buffer instead. This solves the following
backtrace. The effect is that "buf" is only ever used to notify the backends
that something was written to it, and shouldn't be used in the read path.

To replace the buf_lock during the read path, isolate the open/read/close
loop with a separate mutex to maintain serialized access to the backend.

Note that is is up to the pstore backend to cope if the (*write)() path is
called in the middle of the read path.

[   59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847
[   59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount
[   59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1
[   59.691019] Call Trace:
[   59.691019]  [<810252d5>] __might_sleep+0xc3/0xca
[   59.691019]  [<810a26e6>] kmem_cache_alloc+0x32/0xf3
[   59.691019]  [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4
[   59.691019]  [<810b68b1>] alloc_inode+0x2a/0x64
[   59.691019]  [<810b6903>] new_inode+0x18/0x43
[   59.691019]  [<81142447>] pstore_get_inode.isra.1+0x11/0x98
[   59.691019]  [<81142623>] pstore_mkfile+0xae/0x26f
[   59.691019]  [<810a2a66>] ? kmem_cache_free+0x19/0xb1
[   59.691019]  [<8116c821>] ? ida_get_new_above+0x140/0x158
[   59.691019]  [<811708ea>] ? __init_rwsem+0x1e/0x2c
[   59.691019]  [<810b67e8>] ? inode_init_always+0x111/0x1b0
[   59.691019]  [<8102127e>] ? should_resched+0xd/0x27
[   59.691019]  [<8137977f>] ? _cond_resched+0xd/0x21
[   59.691019]  [<81142abf>] pstore_get_records+0x52/0xa7
[   59.691019]  [<8114254b>] pstore_fill_super+0x7d/0x91
[   59.691019]  [<810a7ff5>] mount_single+0x46/0x82
[   59.691019]  [<8114231a>] pstore_mount+0x15/0x17
[   59.691019]  [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98
[   59.691019]  [<810a8199>] mount_fs+0x5a/0x12d
[   59.691019]  [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a
[   59.691019]  [<810b9474>] vfs_kern_mount+0x4f/0x7d
[   59.691019]  [<810b9d7e>] do_kern_mount+0x34/0xb2
[   59.691019]  [<810bb15f>] do_mount+0x5fc/0x64a
[   59.691019]  [<810912fb>] ? strndup_user+0x2e/0x3f
[   59.691019]  [<810bb3cb>] sys_mount+0x66/0x99
[   59.691019]  [<8137b537>] sysenter_do_call+0x12/0x26

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c   | 31 ++++++++++++++++++++++---------
 drivers/firmware/efivars.c |  9 +++++++--
 fs/pstore/platform.c       | 13 ++++++++-----
 include/linux/pstore.h     |  4 +++-
 4 files changed, 40 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 127408069ca7..631b9477b99c 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -932,7 +932,8 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 static int erst_open_pstore(struct pstore_info *psi);
 static int erst_close_pstore(struct pstore_info *psi);
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
-			   struct timespec *time, struct pstore_info *psi);
+			   struct timespec *time, char **buf,
+			   struct pstore_info *psi);
 static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part,
 		       size_t size, struct pstore_info *psi);
 static int erst_clearer(enum pstore_type_id type, u64 id,
@@ -986,17 +987,23 @@ static int erst_close_pstore(struct pstore_info *psi)
 }
 
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
-			   struct timespec *time, struct pstore_info *psi)
+			   struct timespec *time, char **buf,
+			   struct pstore_info *psi)
 {
 	int rc;
 	ssize_t len = 0;
 	u64 record_id;
-	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
-					(erst_info.buf - sizeof(*rcd));
+	struct cper_pstore_record *rcd;
+	size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
 
 	if (erst_disable)
 		return -ENODEV;
 
+	rcd = kmalloc(rcd_len, GFP_KERNEL);
+	if (!rcd) {
+		rc = -ENOMEM;
+		goto out;
+	}
 skip:
 	rc = erst_get_record_id_next(&reader_pos, &record_id);
 	if (rc)
@@ -1004,22 +1011,27 @@ skip:
 
 	/* no more record */
 	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
-		rc = -1;
+		rc = -EINVAL;
 		goto out;
 	}
 
-	len = erst_read(record_id, &rcd->hdr, sizeof(*rcd) +
-			erst_info.bufsize);
+	len = erst_read(record_id, &rcd->hdr, rcd_len);
 	/* The record may be cleared by others, try read next record */
 	if (len == -ENOENT)
 		goto skip;
-	else if (len < 0) {
-		rc = -1;
+	else if (len < sizeof(*rcd)) {
+		rc = -EIO;
 		goto out;
 	}
 	if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
 		goto skip;
 
+	*buf = kmalloc(len, GFP_KERNEL);
+	if (*buf == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	memcpy(*buf, rcd->data, len - sizeof(*rcd));
 	*id = record_id;
 	if (uuid_le_cmp(rcd->sec_hdr.section_type,
 			CPER_SECTION_TYPE_DMESG) == 0)
@@ -1037,6 +1049,7 @@ skip:
 	time->tv_nsec = 0;
 
 out:
+	kfree(rcd);
 	return (rc < 0) ? rc : (len - sizeof(*rcd));
 }
 
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 8370f72d87ff..a54a6b972ced 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -457,7 +457,8 @@ static int efi_pstore_close(struct pstore_info *psi)
 }
 
 static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
-			       struct timespec *timespec, struct pstore_info *psi)
+			       struct timespec *timespec,
+			       char **buf, struct pstore_info *psi)
 {
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	struct efivars *efivars = psi->data;
@@ -478,7 +479,11 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 				timespec->tv_nsec = 0;
 				get_var_data_locked(efivars, &efivars->walk_entry->var);
 				size = efivars->walk_entry->var.DataSize;
-				memcpy(psi->buf, efivars->walk_entry->var.Data, size);
+				*buf = kmalloc(size, GFP_KERNEL);
+				if (*buf == NULL)
+					return -ENOMEM;
+				memcpy(*buf, efivars->walk_entry->var.Data,
+				       size);
 				efivars->walk_entry = list_entry(efivars->walk_entry->list.next,
 					           struct efivar_entry, list);
 				return size;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 2bd620f0d796..57bbf9078ac8 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -167,6 +167,7 @@ int pstore_register(struct pstore_info *psi)
 	}
 
 	psinfo = psi;
+	mutex_init(&psinfo->read_mutex);
 	spin_unlock(&pstore_lock);
 
 	if (owner && !try_module_get(owner)) {
@@ -195,30 +196,32 @@ EXPORT_SYMBOL_GPL(pstore_register);
 void pstore_get_records(int quiet)
 {
 	struct pstore_info *psi = psinfo;
+	char			*buf = NULL;
 	ssize_t			size;
 	u64			id;
 	enum pstore_type_id	type;
 	struct timespec		time;
 	int			failed = 0, rc;
-	unsigned long		flags;
 
 	if (!psi)
 		return;
 
-	spin_lock_irqsave(&psinfo->buf_lock, flags);
+	mutex_lock(&psi->read_mutex);
 	rc = psi->open(psi);
 	if (rc)
 		goto out;
 
-	while ((size = psi->read(&id, &type, &time, psi)) > 0) {
-		rc = pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
+	while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) {
+		rc = pstore_mkfile(type, psi->name, id, buf, (size_t)size,
 				  time, psi);
+		kfree(buf);
+		buf = NULL;
 		if (rc && (rc != -EEXIST || !quiet))
 			failed++;
 	}
 	psi->close(psi);
 out:
-	spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+	mutex_unlock(&psi->read_mutex);
 
 	if (failed)
 		printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ea567321ae3c..2ca8cde5459d 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,10 +35,12 @@ struct pstore_info {
 	spinlock_t	buf_lock;	/* serialize access to 'buf' */
 	char		*buf;
 	size_t		bufsize;
+	struct mutex	read_mutex;	/* serialize open/read/close */
 	int		(*open)(struct pstore_info *psi);
 	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
-			struct timespec *time, struct pstore_info *psi);
+			struct timespec *time, char **buf,
+			struct pstore_info *psi);
 	int		(*write)(enum pstore_type_id type, u64 *id,
 			unsigned int part, size_t size, struct pstore_info *psi);
 	int		(*erase)(enum pstore_type_id type, u64 id,
-- 
cgit v1.2.3


From 3d6d8d20ec4fd3b256632edb373a9c504724b8a9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2011 13:13:29 -0800
Subject: pstore: pass reason to backend write callback

This allows a backend to filter on the dmesg reason as well as the pstore
reason. When ramoops is switched to pstore, this is needed since it has
no interest in storing non-crash dmesg details.

Drop pstore_write() as it has no users, and handling the "reason" here
has no obviously correct value.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c   |  6 ++++--
 drivers/firmware/efivars.c |  8 +++++---
 fs/pstore/platform.c       | 30 +-----------------------------
 include/linux/pstore.h     | 12 +++++-------
 4 files changed, 15 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 631b9477b99c..6a9e3bad13f4 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -934,7 +934,8 @@ static int erst_close_pstore(struct pstore_info *psi);
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 			   struct timespec *time, char **buf,
 			   struct pstore_info *psi);
-static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part,
+static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
+		       u64 *id, unsigned int part,
 		       size_t size, struct pstore_info *psi);
 static int erst_clearer(enum pstore_type_id type, u64 id,
 			struct pstore_info *psi);
@@ -1053,7 +1054,8 @@ out:
 	return (rc < 0) ? rc : (len - sizeof(*rcd));
 }
 
-static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part,
+static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
+		       u64 *id, unsigned int part,
 		       size_t size, struct pstore_info *psi)
 {
 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index a54a6b972ced..0a53a05a850d 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -495,7 +495,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 	return 0;
 }
 
-static int efi_pstore_write(enum pstore_type_id type, u64 *id,
+static int efi_pstore_write(enum pstore_type_id type,
+		enum kmsg_dump_reason reason, u64 *id,
 		unsigned int part, size_t size, struct pstore_info *psi)
 {
 	char name[DUMP_NAME_LEN];
@@ -565,7 +566,7 @@ static int efi_pstore_write(enum pstore_type_id type, u64 *id,
 static int efi_pstore_erase(enum pstore_type_id type, u64 id,
 			    struct pstore_info *psi)
 {
-	efi_pstore_write(type, &id, (unsigned int)id, 0, psi);
+	efi_pstore_write(type, 0, &id, (unsigned int)id, 0, psi);
 
 	return 0;
 }
@@ -586,7 +587,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 	return -1;
 }
 
-static int efi_pstore_write(enum pstore_type_id type, u64 *id,
+static int efi_pstore_write(enum pstore_type_id type,
+		enum kmsg_dump_reason reason, u64 *id,
 		unsigned int part, size_t size, struct pstore_info *psi)
 {
 	return 0;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 57bbf9078ac8..f146d89179bf 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -122,7 +122,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		memcpy(dst, s1 + s1_start, l1_cpy);
 		memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
 
-		ret = psinfo->write(PSTORE_TYPE_DMESG, &id, part,
+		ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
 				   hsize + l1_cpy + l2_cpy, psinfo);
 		if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
 			pstore_new_entry = 1;
@@ -243,33 +243,5 @@ static void pstore_timefunc(unsigned long dummy)
 	mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL);
 }
 
-/*
- * Call platform driver to write a record to the
- * persistent store.
- */
-int pstore_write(enum pstore_type_id type, char *buf, size_t size)
-{
-	u64		id;
-	int		ret;
-	unsigned long	flags;
-
-	if (!psinfo)
-		return -ENODEV;
-
-	if (size > psinfo->bufsize)
-		return -EFBIG;
-
-	spin_lock_irqsave(&psinfo->buf_lock, flags);
-	memcpy(psinfo->buf, buf, size);
-	ret = psinfo->write(type, &id, 0, size, psinfo);
-	if (ret == 0 && pstore_is_mounted())
-		pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
-			      size, CURRENT_TIME, psinfo);
-	spin_unlock_irqrestore(&psinfo->buf_lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pstore_write);
-
 module_param(backend, charp, 0444);
 MODULE_PARM_DESC(backend, "Pstore backend to use");
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 2ca8cde5459d..e1461e143be2 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -22,6 +22,9 @@
 #ifndef _LINUX_PSTORE_H
 #define _LINUX_PSTORE_H
 
+#include <linux/time.h>
+#include <linux/kmsg_dump.h>
+
 /* types */
 enum pstore_type_id {
 	PSTORE_TYPE_DMESG	= 0,
@@ -41,7 +44,8 @@ struct pstore_info {
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
 			struct timespec *time, char **buf,
 			struct pstore_info *psi);
-	int		(*write)(enum pstore_type_id type, u64 *id,
+	int		(*write)(enum pstore_type_id type,
+			enum kmsg_dump_reason reason, u64 *id,
 			unsigned int part, size_t size, struct pstore_info *psi);
 	int		(*erase)(enum pstore_type_id type, u64 id,
 			struct pstore_info *psi);
@@ -50,18 +54,12 @@ struct pstore_info {
 
 #ifdef CONFIG_PSTORE
 extern int pstore_register(struct pstore_info *);
-extern int pstore_write(enum pstore_type_id type, char *buf, size_t size);
 #else
 static inline int
 pstore_register(struct pstore_info *psi)
 {
 	return -ENODEV;
 }
-static inline int
-pstore_write(enum pstore_type_id type, char *buf, size_t size)
-{
-	return -ENODEV;
-}
 #endif
 
 #endif /*_LINUX_PSTORE_H*/
-- 
cgit v1.2.3


From adc9300e78e6091a7eaa1821213836379d4dbaa8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 17 Nov 2011 03:13:26 +0000
Subject: net: use jump_label to shortcut RPS if not setup

Most machines dont use RPS/RFS, and pay a fair amount of instructions in
netif_receive_skb() / netif_rx() / get_rps_cpu() just to discover
RPS/RFS is not setup.

Add a jump_label named rps_needed.

If no device rps_map or global rps_sock_flow_table is setup,
netif_receive_skb() / netif_rx() do a single instruction instead of many
ones, including conditional jumps.

jmp +0    (if CONFIG_JUMP_LABEL=y)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  |  5 +++++
 net/core/dev.c             | 21 +++++++++------------
 net/core/net-sysfs.c       |  7 +++++--
 net/core/sysctl_net_core.c |  9 +++++++--
 4 files changed, 26 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4d5698aa828b..0bbe030fc014 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -214,6 +214,11 @@ enum {
 #include <linux/cache.h>
 #include <linux/skbuff.h>
 
+#ifdef CONFIG_RPS
+#include <linux/jump_label.h>
+extern struct jump_label_key rps_needed;
+#endif
+
 struct neighbour;
 struct neigh_parms;
 struct sk_buff;
diff --git a/net/core/dev.c b/net/core/dev.c
index 26c49d55e79d..f78959996148 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2711,6 +2711,8 @@ EXPORT_SYMBOL(__skb_get_rxhash);
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
+struct jump_label_key rps_needed __read_mostly;
+
 static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	    struct rps_dev_flow *rflow, u16 next_cpu)
@@ -2994,7 +2996,7 @@ int netif_rx(struct sk_buff *skb)
 
 	trace_netif_rx(skb);
 #ifdef CONFIG_RPS
-	{
+	if (static_branch(&rps_needed))	{
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
@@ -3009,14 +3011,13 @@ int netif_rx(struct sk_buff *skb)
 
 		rcu_read_unlock();
 		preempt_enable();
-	}
-#else
+	} else
+#endif
 	{
 		unsigned int qtail;
 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
 		put_cpu();
 	}
-#endif
 	return ret;
 }
 EXPORT_SYMBOL(netif_rx);
@@ -3359,7 +3360,7 @@ int netif_receive_skb(struct sk_buff *skb)
 		return NET_RX_SUCCESS;
 
 #ifdef CONFIG_RPS
-	{
+	if (static_branch(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu, ret;
 
@@ -3370,16 +3371,12 @@ int netif_receive_skb(struct sk_buff *skb)
 		if (cpu >= 0) {
 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			rcu_read_unlock();
-		} else {
-			rcu_read_unlock();
-			ret = __netif_receive_skb(skb);
+			return ret;
 		}
-
-		return ret;
+		rcu_read_unlock();
 	}
-#else
-	return __netif_receive_skb(skb);
 #endif
+	return __netif_receive_skb(skb);
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 602b1419998c..db6c2f83633f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -606,9 +606,12 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	rcu_assign_pointer(queue->rps_map, map);
 	spin_unlock(&rps_map_lock);
 
-	if (old_map)
+	if (map)
+		jump_label_inc(&rps_needed);
+	if (old_map) {
 		kfree_rcu(old_map, rcu);
-
+		jump_label_dec(&rps_needed);
+	}
 	free_cpumask_var(mask);
 	return len;
 }
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 77a65f031488..d05559d4d9cd 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -68,8 +68,13 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
 
 		if (sock_table != orig_sock_table) {
 			rcu_assign_pointer(rps_sock_flow_table, sock_table);
-			synchronize_rcu();
-			vfree(orig_sock_table);
+			if (sock_table)
+				jump_label_inc(&rps_needed);
+			if (orig_sock_table) {
+				jump_label_dec(&rps_needed);
+				synchronize_rcu();
+				vfree(orig_sock_table);
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From f3a6a4b6cfc80e57bf16bb12f9425bec1a5731a9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 17 Nov 2011 14:38:33 -0800
Subject: USB: Add helper macro for usb_driver boilerplate

This patch introduces the module_usb_driver macro which is a convenience
macro for USB driver modules similar to module_platform_driver. It is
intended to be used by drivers which init/exit section does nothing but
register/unregister the USB driver. By using this macro it is possible
to eliminate a few lines of boilerplate code per USB driver.

Based on work done by Lars-Peter Clausen <lars@metafoo.de> for other
busses (i2c and spi).

Cc: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index d3d0c1374334..5d258c3180ae 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -953,6 +953,18 @@ extern int usb_register_driver(struct usb_driver *, struct module *,
 
 extern void usb_deregister(struct usb_driver *);
 
+/**
+ * module_usb_driver() - Helper macro for registering a USB driver
+ * @__usb_driver: usb_driver struct
+ *
+ * Helper macro for USB drivers which do not do anything special in module
+ * init/exit. This eliminates a lot of boilerplate. Each module may only
+ * use this macro once, and calling it replaces module_init() and module_exit()
+ */
+#define module_usb_driver(__usb_driver) \
+	module_driver(__usb_driver, usb_register, \
+		       usb_deregister)
+
 extern int usb_register_device_driver(struct usb_device_driver *,
 			struct module *);
 extern void usb_deregister_device_driver(struct usb_device_driver *);
-- 
cgit v1.2.3


From b14dab792dee3245b628e046d80a7fad5573fea6 Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Thu, 13 Oct 2011 12:33:30 +0530
Subject: DMAEngine: Define interleaved transfer request api

Define a new api that could be used for doing fancy data transfers
like interleaved to contiguous copy and vice-versa.
Traditional SG_list based transfers tend to be very inefficient in
such cases as where the interleave and chunk are only a few bytes,
which call for a very condensed api to convey pattern of the transfer.
This api supports all 4 variants of scatter-gather and contiguous transfer.

Of course, neither can this api help transfers that don't lend to DMA by
nature, i.e, scattered tiny read/writes with no periodic pattern.

Also since now we support SLAVE channels that might not provide
device_prep_slave_sg callback but device_prep_interleaved_dma,
remove the BUG_ON check.

Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
Acked-by: Barry Song <Baohua.Song@csr.com>
[renamed dmaxfer_template to dma_interleaved_template
 did fixup after the enum dma_transfer_merge]
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 Documentation/dmaengine.txt |  8 +++++
 drivers/dma/dmaengine.c     |  4 +--
 include/linux/dmaengine.h   | 78 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 85 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt
index 94b7e0f96b38..bbe6cb3d1856 100644
--- a/Documentation/dmaengine.txt
+++ b/Documentation/dmaengine.txt
@@ -75,6 +75,10 @@ The slave DMA usage consists of following steps:
    slave_sg	- DMA a list of scatter gather buffers from/to a peripheral
    dma_cyclic	- Perform a cyclic DMA operation from/to a peripheral till the
 		  operation is explicitly stopped.
+   interleaved_dma - This is common to Slave as well as M2M clients. For slave
+		 address of devices' fifo could be already known to the driver.
+		 Various types of operations could be expressed by setting
+		 appropriate values to the 'dma_interleaved_template' members.
 
    A non-NULL return of this transfer API represents a "descriptor" for
    the given transaction.
@@ -89,6 +93,10 @@ The slave DMA usage consists of following steps:
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_data_direction direction);
 
+	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags);
+
    The peripheral driver is expected to have mapped the scatterlist for
    the DMA operation prior to calling device_prep_slave_sg, and must
    keep the scatterlist mapped until the DMA operation has completed.
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index b48967b499da..a6c6051ec858 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -693,12 +693,12 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_interrupt);
 	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
 		!device->device_prep_dma_sg);
-	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
-		!device->device_prep_slave_sg);
 	BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) &&
 		!device->device_prep_dma_cyclic);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_control);
+	BUG_ON(dma_has_cap(DMA_INTERLEAVE, device->cap_mask) &&
+		!device->device_prep_interleaved_dma);
 
 	BUG_ON(!device->device_alloc_chan_resources);
 	BUG_ON(!device->device_free_chan_resources);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index a865b3a354cd..5532bb8b500c 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -71,10 +71,10 @@ enum dma_transaction_type {
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
 	DMA_CYCLIC,
-};
-
+	DMA_INTERLEAVE,
 /* last transaction type for creation of the capabilities mask */
-#define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
+	DMA_TX_TYPE_END,
+};
 
 /**
  * enum dma_transfer_direction - dma transfer mode and direction indicator
@@ -90,6 +90,74 @@ enum dma_transfer_direction {
 	DMA_DEV_TO_DEV,
 };
 
+/**
+ * Interleaved Transfer Request
+ * ----------------------------
+ * A chunk is collection of contiguous bytes to be transfered.
+ * The gap(in bytes) between two chunks is called inter-chunk-gap(ICG).
+ * ICGs may or maynot change between chunks.
+ * A FRAME is the smallest series of contiguous {chunk,icg} pairs,
+ *  that when repeated an integral number of times, specifies the transfer.
+ * A transfer template is specification of a Frame, the number of times
+ *  it is to be repeated and other per-transfer attributes.
+ *
+ * Practically, a client driver would have ready a template for each
+ *  type of transfer it is going to need during its lifetime and
+ *  set only 'src_start' and 'dst_start' before submitting the requests.
+ *
+ *
+ *  |      Frame-1        |       Frame-2       | ~ |       Frame-'numf'  |
+ *  |====....==.===...=...|====....==.===...=...| ~ |====....==.===...=...|
+ *
+ *    ==  Chunk size
+ *    ... ICG
+ */
+
+/**
+ * struct data_chunk - Element of scatter-gather list that makes a frame.
+ * @size: Number of bytes to read from source.
+ *	  size_dst := fn(op, size_src), so doesn't mean much for destination.
+ * @icg: Number of bytes to jump after last src/dst address of this
+ *	 chunk and before first src/dst address for next chunk.
+ *	 Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false.
+ *	 Ignored for src(assumed 0), if src_inc is true and src_sgl is false.
+ */
+struct data_chunk {
+	size_t size;
+	size_t icg;
+};
+
+/**
+ * struct dma_interleaved_template - Template to convey DMAC the transfer pattern
+ *	 and attributes.
+ * @src_start: Bus address of source for the first chunk.
+ * @dst_start: Bus address of destination for the first chunk.
+ * @dir: Specifies the type of Source and Destination.
+ * @src_inc: If the source address increments after reading from it.
+ * @dst_inc: If the destination address increments after writing to it.
+ * @src_sgl: If the 'icg' of sgl[] applies to Source (scattered read).
+ *		Otherwise, source is read contiguously (icg ignored).
+ *		Ignored if src_inc is false.
+ * @dst_sgl: If the 'icg' of sgl[] applies to Destination (scattered write).
+ *		Otherwise, destination is filled contiguously (icg ignored).
+ *		Ignored if dst_inc is false.
+ * @numf: Number of frames in this template.
+ * @frame_size: Number of chunks in a frame i.e, size of sgl[].
+ * @sgl: Array of {chunk,icg} pairs that make up a frame.
+ */
+struct dma_interleaved_template {
+	dma_addr_t src_start;
+	dma_addr_t dst_start;
+	enum dma_transfer_direction dir;
+	bool src_inc;
+	bool dst_inc;
+	bool src_sgl;
+	bool dst_sgl;
+	size_t numf;
+	size_t frame_size;
+	struct data_chunk sgl[0];
+};
+
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
  *  control completion, and communicate status.
@@ -445,6 +513,7 @@ struct dma_tx_state {
  * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
  *	The function takes a buffer of size buf_len. The callback function will
  *	be called after period_len bytes have been transferred.
+ * @device_prep_interleaved_dma: Transfer expression in a generic way.
  * @device_control: manipulate all pending operations on a channel, returns
  *	zero or error code
  * @device_tx_status: poll for transaction completion, the optional
@@ -509,6 +578,9 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction);
+	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags);
 	int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		unsigned long arg);
 
-- 
cgit v1.2.3


From ca21a146a45a179a2a7bc86d938a2fbf571a7510 Mon Sep 17 00:00:00 2001
From: Rongjun Ying <Rongjun.Ying@csr.com>
Date: Thu, 27 Oct 2011 19:22:39 -0700
Subject: dmaengine: add CSR SiRFprimaII DMAC driver

Cc: Jassi Brar <jaswinder.singh@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rongjun Ying <rongjun.ying@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
[fixed direction enums and cyclic api based on changes
 already merged]
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 MAINTAINERS                 |   1 +
 drivers/dma/Kconfig         |   7 +
 drivers/dma/Makefile        |   1 +
 drivers/dma/sirf-dma.c      | 717 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sirfsoc_dma.h |   6 +
 5 files changed, 732 insertions(+)
 create mode 100644 drivers/dma/sirf-dma.c
 create mode 100644 include/linux/sirfsoc_dma.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 4808256446f2..1b141d71ea13 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -749,6 +749,7 @@ M:	Barry Song <baohua.song@csr.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-prima2/
+F:	drivers/dma/sirf-dma*
 
 ARM/EBSA110 MACHINE SUPPORT
 M:	Russell King <linux@arm.linux.org.uk>
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 7ec0d6cef0c3..f1a274994bb1 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -187,6 +187,13 @@ config TIMB_DMA
 	help
 	  Enable support for the Timberdale FPGA DMA engine.
 
+config SIRF_DMA
+	tristate "CSR SiRFprimaII DMA support"
+	depends on ARCH_PRIMA2
+	select DMA_ENGINE
+	help
+	  Enable support for the CSR SiRFprimaII DMA engine.
+
 config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 30cf3b1f0c5c..009a222e8283 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
+obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
new file mode 100644
index 000000000000..55ec67997670
--- /dev/null
+++ b/drivers/dma/sirf-dma.c
@@ -0,0 +1,717 @@
+/*
+ * DMA controller driver for CSR SiRFprimaII
+ *
+ * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/sirfsoc_dma.h>
+
+#define SIRFSOC_DMA_DESCRIPTORS                 16
+#define SIRFSOC_DMA_CHANNELS                    16
+
+#define SIRFSOC_DMA_CH_ADDR                     0x00
+#define SIRFSOC_DMA_CH_XLEN                     0x04
+#define SIRFSOC_DMA_CH_YLEN                     0x08
+#define SIRFSOC_DMA_CH_CTRL                     0x0C
+
+#define SIRFSOC_DMA_WIDTH_0                     0x100
+#define SIRFSOC_DMA_CH_VALID                    0x140
+#define SIRFSOC_DMA_CH_INT                      0x144
+#define SIRFSOC_DMA_INT_EN                      0x148
+#define SIRFSOC_DMA_CH_LOOP_CTRL                0x150
+
+#define SIRFSOC_DMA_MODE_CTRL_BIT               4
+#define SIRFSOC_DMA_DIR_CTRL_BIT                5
+
+/* xlen and dma_width register is in 4 bytes boundary */
+#define SIRFSOC_DMA_WORD_LEN			4
+
+struct sirfsoc_dma_desc {
+	struct dma_async_tx_descriptor	desc;
+	struct list_head		node;
+
+	/* SiRFprimaII 2D-DMA parameters */
+
+	int             xlen;           /* DMA xlen */
+	int             ylen;           /* DMA ylen */
+	int             width;          /* DMA width */
+	int             dir;
+	bool            cyclic;         /* is loop DMA? */
+	u32             addr;		/* DMA buffer address */
+};
+
+struct sirfsoc_dma_chan {
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		queued;
+	struct list_head		active;
+	struct list_head		completed;
+	dma_cookie_t			completed_cookie;
+	unsigned long			happened_cyclic;
+	unsigned long			completed_cyclic;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+
+	int				mode;
+};
+
+struct sirfsoc_dma {
+	struct dma_device		dma;
+	struct tasklet_struct		tasklet;
+	struct sirfsoc_dma_chan		channels[SIRFSOC_DMA_CHANNELS];
+	void __iomem			*base;
+	int				irq;
+};
+
+#define DRV_NAME	"sirfsoc_dma"
+
+/* Convert struct dma_chan to struct sirfsoc_dma_chan */
+static inline
+struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct sirfsoc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct sirfsoc_dma */
+static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(c);
+	return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
+}
+
+/* Execute all queued DMA descriptors */
+static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+
+	/*
+	 * lock has been held by functions calling this, so we don't hold
+	 * lock again
+	 */
+
+	sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
+		node);
+	/* Move the first queued descriptor to active list */
+	list_move_tail(&schan->queued, &schan->active);
+
+	/* Start the DMA transfer */
+	writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 +
+		cid * 4);
+	writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		(sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 +
+		SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 +
+		SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) |
+		(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+
+	/*
+	 * writel has an implict memory write barrier to make sure data is
+	 * flushed into memory before starting DMA
+	 */
+	writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+			readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL),
+			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		schan->happened_cyclic = schan->completed_cyclic = 0;
+	}
+}
+
+/* Interrupt handler */
+static irqreturn_t sirfsoc_dma_irq(int irq, void *data)
+{
+	struct sirfsoc_dma *sdma = data;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	u32 is;
+	int ch;
+
+	is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
+	while ((ch = fls(is) - 1) >= 0) {
+		is &= ~(1 << ch);
+		writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT);
+		schan = &sdma->channels[ch];
+
+		spin_lock(&schan->lock);
+
+		sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+			node);
+		if (!sdesc->cyclic) {
+			/* Execute queued descriptors */
+			list_splice_tail_init(&schan->active, &schan->completed);
+			if (!list_empty(&schan->queued))
+				sirfsoc_dma_execute(schan);
+		} else
+			schan->happened_cyclic++;
+
+		spin_unlock(&schan->lock);
+	}
+
+	/* Schedule tasklet */
+	tasklet_schedule(&sdma->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/* process completed descriptors */
+static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
+{
+	dma_cookie_t last_cookie = 0;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc;
+	struct dma_async_tx_descriptor *desc;
+	unsigned long flags;
+	unsigned long happened_cyclic;
+	LIST_HEAD(list);
+	int i;
+
+	for (i = 0; i < sdma->dma.chancnt; i++) {
+		schan = &sdma->channels[i];
+
+		/* Get all completed descriptors */
+		spin_lock_irqsave(&schan->lock, flags);
+		if (!list_empty(&schan->completed)) {
+			list_splice_tail_init(&schan->completed, &list);
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			/* Execute callbacks and run dependencies */
+			list_for_each_entry(sdesc, &list, node) {
+				desc = &sdesc->desc;
+
+				if (desc->callback)
+					desc->callback(desc->callback_param);
+
+				last_cookie = desc->cookie;
+				dma_run_dependencies(desc);
+			}
+
+			/* Free descriptors */
+			spin_lock_irqsave(&schan->lock, flags);
+			list_splice_tail_init(&list, &schan->free);
+			schan->completed_cookie = last_cookie;
+			spin_unlock_irqrestore(&schan->lock, flags);
+		} else {
+			/* for cyclic channel, desc is always in active list */
+			sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+				node);
+
+			if (!sdesc || (sdesc && !sdesc->cyclic)) {
+				/* without active cyclic DMA */
+				spin_unlock_irqrestore(&schan->lock, flags);
+				continue;
+			}
+
+			/* cyclic DMA */
+			happened_cyclic = schan->happened_cyclic;
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			desc = &sdesc->desc;
+			while (happened_cyclic != schan->completed_cyclic) {
+				if (desc->callback)
+					desc->callback(desc->callback_param);
+				schan->completed_cyclic++;
+			}
+		}
+	}
+}
+
+/* DMA Tasklet */
+static void sirfsoc_dma_tasklet(unsigned long data)
+{
+	struct sirfsoc_dma *sdma = (void *)data;
+
+	sirfsoc_dma_process_completed(sdma);
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(txd->chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	sdesc = container_of(txd, struct sirfsoc_dma_desc, desc);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Move descriptor to queue */
+	list_move_tail(&sdesc->node, &schan->queued);
+
+	/* Update cookie */
+	cookie = schan->chan.cookie + 1;
+	if (cookie <= 0)
+		cookie = 1;
+
+	schan->chan.cookie = cookie;
+	sdesc->desc.cookie = cookie;
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return cookie;
+}
+
+static int sirfsoc_dma_slave_config(struct sirfsoc_dma_chan *schan,
+	struct dma_slave_config *config)
+{
+	unsigned long flags;
+
+	if ((config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+		(config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES))
+		return -EINVAL;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	schan->mode = (config->src_maxburst == 4 ? 1 : 0);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_terminate_all(struct sirfsoc_dma_chan *schan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	unsigned long flags;
+
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
+		~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+	writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
+		& ~((1 << cid) | 1 << (cid + 16)),
+			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+
+	spin_lock_irqsave(&schan->lock, flags);
+	list_splice_tail_init(&schan->active, &schan->free);
+	list_splice_tail_init(&schan->queued, &schan->free);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+	unsigned long arg)
+{
+	struct dma_slave_config *config;
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		return sirfsoc_dma_terminate_all(schan);
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+		return sirfsoc_dma_slave_config(schan, config);
+
+	default:
+		break;
+	}
+
+	return -ENOSYS;
+}
+
+/* Alloc channel resources */
+static int sirfsoc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	LIST_HEAD(descs);
+	int i;
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) {
+		sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL);
+		if (!sdesc) {
+			dev_notice(sdma->dma.dev, "Memory allocation error. "
+				"Allocated only %u descriptors\n", i);
+			break;
+		}
+
+		dma_async_tx_descriptor_init(&sdesc->desc, chan);
+		sdesc->desc.flags = DMA_CTRL_ACK;
+		sdesc->desc.tx_submit = sirfsoc_dma_tx_submit;
+
+		list_add_tail(&sdesc->node, &descs);
+	}
+
+	/* Return error only if no descriptors were allocated */
+	if (i == 0)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	list_splice_tail_init(&descs, &schan->free);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return i;
+}
+
+/* Free channel resources */
+static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc, *tmp;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Channel must be idle */
+	BUG_ON(!list_empty(&schan->prepared));
+	BUG_ON(!list_empty(&schan->queued));
+	BUG_ON(!list_empty(&schan->active));
+	BUG_ON(!list_empty(&schan->completed));
+
+	/* Move data */
+	list_splice_tail_init(&schan->free, &descs);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(sdesc, tmp, &descs, node)
+		kfree(sdesc);
+}
+
+/* Send pending descriptor to hardware */
+static void sirfsoc_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	if (list_empty(&schan->active) && !list_empty(&schan->queued))
+		sirfsoc_dma_execute(schan);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+}
+
+/* Check request completion status */
+static enum dma_status
+sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+	struct dma_tx_state *txstate)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	last_used = schan->chan.cookie;
+	last_complete = schan->completed_cookie;
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	dma_set_tx_state(txstate, last_complete, last_used, 0);
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+	int ret;
+
+	if ((xt->dir != DMA_MEM_TO_DEV) || (xt->dir != DMA_DEV_TO_MEM)) {
+		ret = -EINVAL;
+		goto err_dir;
+	}
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc) {
+		/* try to free completed descriptors */
+		sirfsoc_dma_process_completed(sdma);
+		ret = 0;
+		goto no_desc;
+	}
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+
+	/*
+	 * Number of chunks in a frame can only be 1 for prima2
+	 * and ylen (number of frame - 1) must be at least 0
+	 */
+	if ((xt->frame_size == 1) && (xt->numf > 0)) {
+		sdesc->cyclic = 0;
+		sdesc->xlen = xt->sgl[0].size / SIRFSOC_DMA_WORD_LEN;
+		sdesc->width = (xt->sgl[0].size + xt->sgl[0].icg) /
+				SIRFSOC_DMA_WORD_LEN;
+		sdesc->ylen = xt->numf - 1;
+		if (xt->dir == DMA_MEM_TO_DEV) {
+			sdesc->addr = xt->src_start;
+			sdesc->dir = 1;
+		} else {
+			sdesc->addr = xt->dst_start;
+			sdesc->dir = 0;
+		}
+
+		list_add_tail(&sdesc->node, &schan->prepared);
+	} else {
+		pr_err("sirfsoc DMA Invalid xfer\n");
+		ret = -EINVAL;
+		goto err_xfer;
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+err_xfer:
+	spin_unlock_irqrestore(&schan->lock, iflags);
+no_desc:
+err_dir:
+	return ERR_PTR(ret);
+}
+
+static struct dma_async_tx_descriptor *
+sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
+	size_t buf_len, size_t period_len,
+	enum dma_transfer_direction direction)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+
+	/*
+	 * we only support cycle transfer with 2 period
+	 * If the X-length is set to 0, it would be the loop mode.
+	 * The DMA address keeps increasing until reaching the end of a loop
+	 * area whose size is defined by (DMA_WIDTH x (Y_LENGTH + 1)). Then
+	 * the DMA address goes back to the beginning of this area.
+	 * In loop mode, the DMA data region is divided into two parts, BUFA
+	 * and BUFB. DMA controller generates interrupts twice in each loop:
+	 * when the DMA address reaches the end of BUFA or the end of the
+	 * BUFB
+	 */
+	if (buf_len !=  2 * period_len)
+		return ERR_PTR(-EINVAL);
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc)
+		return 0;
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+	sdesc->addr = addr;
+	sdesc->cyclic = 1;
+	sdesc->xlen = 0;
+	sdesc->ylen = buf_len / SIRFSOC_DMA_WORD_LEN - 1;
+	sdesc->width = 1;
+	list_add_tail(&sdesc->node, &schan->prepared);
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+}
+
+/*
+ * The DMA controller consists of 16 independent DMA channels.
+ * Each channel is allocated to a different function
+ */
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	unsigned int ch_nr = (unsigned int) chan_id;
+
+	if (ch_nr == chan->chan_id +
+		chan->device->dev_id * SIRFSOC_DMA_CHANNELS)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(sirfsoc_dma_filter_id);
+
+static int __devinit sirfsoc_dma_probe(struct platform_device *op)
+{
+	struct device_node *dn = op->dev.of_node;
+	struct device *dev = &op->dev;
+	struct dma_device *dma;
+	struct sirfsoc_dma *sdma;
+	struct sirfsoc_dma_chan *schan;
+	struct resource res;
+	ulong regs_start, regs_size;
+	u32 id;
+	int ret, i;
+
+	sdma = devm_kzalloc(dev, sizeof(*sdma), GFP_KERNEL);
+	if (!sdma) {
+		dev_err(dev, "Memory exhausted!\n");
+		return -ENOMEM;
+	}
+
+	if (of_property_read_u32(dn, "cell-index", &id)) {
+		dev_err(dev, "Fail to get DMAC index\n");
+		ret = -ENODEV;
+		goto free_mem;
+	}
+
+	sdma->irq = irq_of_parse_and_map(dn, 0);
+	if (sdma->irq == NO_IRQ) {
+		dev_err(dev, "Error mapping IRQ!\n");
+		ret = -EINVAL;
+		goto free_mem;
+	}
+
+	ret = of_address_to_resource(dn, 0, &res);
+	if (ret) {
+		dev_err(dev, "Error parsing memory region!\n");
+		goto free_mem;
+	}
+
+	regs_start = res.start;
+	regs_size = resource_size(&res);
+
+	sdma->base = devm_ioremap(dev, regs_start, regs_size);
+	if (!sdma->base) {
+		dev_err(dev, "Error mapping memory region!\n");
+		ret = -ENOMEM;
+		goto irq_dispose;
+	}
+
+	ret = devm_request_irq(dev, sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME,
+		sdma);
+	if (ret) {
+		dev_err(dev, "Error requesting IRQ!\n");
+		ret = -EINVAL;
+		goto unmap_mem;
+	}
+
+	dma = &sdma->dma;
+	dma->dev = dev;
+	dma->chancnt = SIRFSOC_DMA_CHANNELS;
+
+	dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
+	dma->device_issue_pending = sirfsoc_dma_issue_pending;
+	dma->device_control = sirfsoc_dma_control;
+	dma->device_tx_status = sirfsoc_dma_tx_status;
+	dma->device_prep_interleaved_dma = sirfsoc_dma_prep_interleaved;
+	dma->device_prep_dma_cyclic = sirfsoc_dma_prep_cyclic;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma->cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+	for (i = 0; i < dma->chancnt; i++) {
+		schan = &sdma->channels[i];
+
+		schan->chan.device = dma;
+		schan->chan.cookie = 1;
+		schan->completed_cookie = schan->chan.cookie;
+
+		INIT_LIST_HEAD(&schan->free);
+		INIT_LIST_HEAD(&schan->prepared);
+		INIT_LIST_HEAD(&schan->queued);
+		INIT_LIST_HEAD(&schan->active);
+		INIT_LIST_HEAD(&schan->completed);
+
+		spin_lock_init(&schan->lock);
+		list_add_tail(&schan->chan.device_node, &dma->channels);
+	}
+
+	tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma);
+
+	/* Register DMA engine */
+	dev_set_drvdata(dev, sdma);
+	ret = dma_async_device_register(dma);
+	if (ret)
+		goto free_irq;
+
+	dev_info(dev, "initialized SIRFSOC DMAC driver\n");
+
+	return 0;
+
+free_irq:
+	devm_free_irq(dev, sdma->irq, sdma);
+irq_dispose:
+	irq_dispose_mapping(sdma->irq);
+unmap_mem:
+	iounmap(sdma->base);
+free_mem:
+	devm_kfree(dev, sdma);
+	return ret;
+}
+
+static int __devexit sirfsoc_dma_remove(struct platform_device *op)
+{
+	struct device *dev = &op->dev;
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+
+	dma_async_device_unregister(&sdma->dma);
+	devm_free_irq(dev, sdma->irq, sdma);
+	irq_dispose_mapping(sdma->irq);
+	iounmap(sdma->base);
+	devm_kfree(dev, sdma);
+	return 0;
+}
+
+static struct of_device_id sirfsoc_dma_match[] = {
+	{ .compatible = "sirf,prima2-dmac", },
+	{},
+};
+
+static struct platform_driver sirfsoc_dma_driver = {
+	.probe		= sirfsoc_dma_probe,
+	.remove		= __devexit_p(sirfsoc_dma_remove),
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table	= sirfsoc_dma_match,
+	},
+};
+
+static int __init sirfsoc_dma_init(void)
+{
+	return platform_driver_register(&sirfsoc_dma_driver);
+}
+module_init(sirfsoc_dma_init);
+
+static void __exit sirfsoc_dma_exit(void)
+{
+	platform_driver_unregister(&sirfsoc_dma_driver);
+}
+module_exit(sirfsoc_dma_exit);
+
+MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, "
+	"Barry Song <baohua.song@csr.com>");
+MODULE_DESCRIPTION("SIRFSOC DMA control driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/sirfsoc_dma.h b/include/linux/sirfsoc_dma.h
new file mode 100644
index 000000000000..29d959333d81
--- /dev/null
+++ b/include/linux/sirfsoc_dma.h
@@ -0,0 +1,6 @@
+#ifndef _SIRFSOC_DMA_H_
+#define _SIRFSOC_DMA_H_
+
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id);
+
+#endif
-- 
cgit v1.2.3


From 1a087c6ad975bcc193b4bab2e9d61f9c6c547138 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Fri, 18 Nov 2011 14:50:21 +0100
Subject: debugfs: add tools to printk 32-bit registers

Some debugfs file I deal with are mostly blocks of registers,
i.e. lines of the form "<name> = 0x<value>". Some files are only
registers, some include registers blocks among other material.  This
patch introduces data structures and functions to deal with both
cases.  I expect more users of this over time.

Signed-off-by: Alessandro Rubini <rubini@gnudd.com>
Acked-by: Giancarlo Asnaghi <giancarlo.asnaghi@st.com>
Cc: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/filesystems/debugfs.txt | 32 ++++++++++++-
 fs/debugfs/file.c                     | 90 +++++++++++++++++++++++++++++++++++
 include/linux/debugfs.h               | 26 ++++++++++
 3 files changed, 147 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
index 742cc06e138f..f04066a37f4c 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.txt
@@ -97,7 +97,8 @@ A read on the resulting file will yield either Y (for non-zero values) or
 N, followed by a newline.  If written to, it will accept either upper- or
 lower-case values, or 1 or 0.  Any other input will be silently ignored.
 
-Finally, a block of arbitrary binary data can be exported with:
+Another option is exporting a block of arbitrary binary data, with
+this structure and function:
 
     struct debugfs_blob_wrapper {
 	void *data;
@@ -115,6 +116,35 @@ can be used to export binary information, but there does not appear to be
 any code which does so in the mainline.  Note that all files created with
 debugfs_create_blob() are read-only.
 
+If you want to dump a block of registers (something that happens quite
+often during development, even if little such code reaches mainline.
+Debugfs offers two functions: one to make a registers-only file, and
+another to insert a register block in the middle of another sequential
+file.
+
+    struct debugfs_reg32 {
+	char *name;
+	unsigned long offset;
+    };
+
+    struct debugfs_regset32 {
+	struct debugfs_reg32 *regs;
+	int nregs;
+	void __iomem *base;
+    };
+
+    struct dentry *debugfs_create_regset32(const char *name, mode_t mode,
+				     struct dentry *parent,
+				     struct debugfs_regset32 *regset);
+
+    int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+			 int nregs, void __iomem *base, char *prefix);
+
+The "base" argument may be 0, but you may want to build the reg32 array
+using __stringify, and a number of register names (macros) are actually
+byte offsets over a base for the register block.
+
+
 There are a couple of other directory-oriented helper functions:
 
     struct dentry *debugfs_rename(struct dentry *old_dir, 
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 90f76575c056..f31a27c60fc6 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -15,6 +15,7 @@
 
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/seq_file.h>
 #include <linux/pagemap.h>
 #include <linux/namei.h>
 #include <linux/debugfs.h>
@@ -525,3 +526,92 @@ struct dentry *debugfs_create_blob(const char *name, mode_t mode,
 	return debugfs_create_file(name, mode, parent, blob, &fops_blob);
 }
 EXPORT_SYMBOL_GPL(debugfs_create_blob);
+
+/*
+ * The regset32 stuff is used to print 32-bit registers using the
+ * seq_file utilities. We offer printing a register set in an already-opened
+ * sequential file or create a debugfs file that only prints a regset32.
+ */
+
+/**
+ * debugfs_print_regs32 - use seq_print to describe a set of registers
+ * @s: the seq_file structure being used to generate output
+ * @regs: an array if struct debugfs_reg32 structures
+ * @mregs: the length of the above array
+ * @base: the base address to be used in reading the registers
+ * @prefix: a string to be prefixed to every output line
+ *
+ * This function outputs a text block describing the current values of
+ * some 32-bit hardware registers. It is meant to be used within debugfs
+ * files based on seq_file that need to show registers, intermixed with other
+ * information. The prefix argument may be used to specify a leading string,
+ * because some peripherals have several blocks of identical registers,
+ * for example configuration of dma channels
+ */
+int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+			   int nregs, void __iomem *base, char *prefix)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < nregs; i++, regs++) {
+		if (prefix)
+			ret += seq_printf(s, "%s", prefix);
+		ret += seq_printf(s, "%s = 0x%08x\n", regs->name,
+				  readl((void *)(base + regs->offset)));
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(debugfs_print_regs32);
+
+static int debugfs_show_regset32(struct seq_file *s, void *data)
+{
+	struct debugfs_regset32 *regset = s->private;
+
+	debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, "");
+	return 0;
+}
+
+static int debugfs_open_regset32(struct inode *inode, struct file *file)
+{
+	return single_open(file, debugfs_show_regset32, inode->i_private);
+}
+
+static const struct file_operations fops_regset32 = {
+	.open =		debugfs_open_regset32,
+	.read =		seq_read,
+	.llseek =	seq_lseek,
+	.release =	single_release,
+};
+
+/**
+ * debugfs_create_regset32 - create a debugfs file that returns register values
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is %NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @regset: a pointer to a struct debugfs_regset32, which contains a pointer
+ *          to an array of register definitions, the array size and the base
+ *          address where the register bank is to be found.
+ *
+ * This function creates a file in debugfs with the given name that reports
+ * the names and values of a set of 32-bit registers. If the @mode variable
+ * is so set it can be read from. Writing is not supported.
+ *
+ * This function will return a pointer to a dentry if it succeeds.  This
+ * pointer must be passed to the debugfs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.)  If an error occurs, %NULL will be returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.  It is not wise to check for this value, but rather, check for
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
+ * code.
+ */
+struct dentry *debugfs_create_regset32(const char *name, mode_t mode,
+				       struct dentry *parent,
+				       struct debugfs_regset32 *regset)
+{
+	return debugfs_create_file(name, mode, parent, regset, &fops_regset32);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_regset32);
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index e7d9b20ddc5b..5e6b01f6db4c 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -16,6 +16,7 @@
 #define _DEBUGFS_H_
 
 #include <linux/fs.h>
+#include <linux/seq_file.h>
 
 #include <linux/types.h>
 
@@ -26,6 +27,17 @@ struct debugfs_blob_wrapper {
 	unsigned long size;
 };
 
+struct debugfs_reg32 {
+	char *name;
+	unsigned long offset;
+};
+
+struct debugfs_regset32 {
+	struct debugfs_reg32 *regs;
+	int nregs;
+	void __iomem *base;
+};
+
 extern struct dentry *arch_debugfs_dir;
 
 #if defined(CONFIG_DEBUG_FS)
@@ -74,6 +86,13 @@ struct dentry *debugfs_create_blob(const char *name, mode_t mode,
 				  struct dentry *parent,
 				  struct debugfs_blob_wrapper *blob);
 
+struct dentry *debugfs_create_regset32(const char *name, mode_t mode,
+				     struct dentry *parent,
+				     struct debugfs_regset32 *regset);
+
+int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+			 int nregs, void __iomem *base, char *prefix);
+
 bool debugfs_initialized(void);
 
 #else
@@ -188,6 +207,13 @@ static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode,
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct dentry *debugfs_create_regset32(const char *name,
+				   mode_t mode, struct dentry *parent,
+				   struct debugfs_regset32 *regset)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline bool debugfs_initialized(void)
 {
 	return false;
-- 
cgit v1.2.3


From 0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 17 Nov 2011 16:42:19 -0500
Subject: NLS: improve UTF8 -> UTF16 string conversion routine

The utf8s_to_utf16s conversion routine needs to be improved.  Unlike
its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
the maximum length of the output buffer or the endianness of its
16-bit output.

This patch (as1501) adds the two missing arguments, and adjusts the
only two places in the kernel where the function is called.  A
follow-on patch will add a third caller that does utilize the new
capabilities.

The two conversion routines are still annoyingly inconsistent in the
way they handle invalid byte combinations.  But that's a subject for a
different patch.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/hv/hv_kvp.c | 10 ++++++----
 fs/fat/namei_vfat.c |  3 ++-
 fs/nls/nls_base.c   | 43 +++++++++++++++++++++++++++++++++----------
 include/linux/nls.h |  5 +++--
 4 files changed, 44 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 89f52440fcf4..0e8343f585bb 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -212,11 +212,13 @@ kvp_respond_to_host(char *key, char *value, int error)
 	 * The windows host expects the key/value pair to be encoded
 	 * in utf16.
 	 */
-	keylen = utf8s_to_utf16s(key_name, strlen(key_name),
-				(wchar_t *)kvp_data->data.key);
+	keylen = utf8s_to_utf16s(key_name, strlen(key_name), UTF16_HOST_ENDIAN,
+				(wchar_t *) kvp_data->data.key,
+				HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2);
 	kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */
-	valuelen = utf8s_to_utf16s(value, strlen(value),
-				(wchar_t *)kvp_data->data.value);
+	valuelen = utf8s_to_utf16s(value, strlen(value), UTF16_HOST_ENDIAN,
+				(wchar_t *) kvp_data->data.value,
+				HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2);
 	kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */
 
 	kvp_data->data.value_type = REG_SZ; /* all our values are strings */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index a87a65663c25..c25cf151b84b 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
 	int charlen;
 
 	if (utf8) {
-		*outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
+		*outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
+				(wchar_t *) outname, FAT_LFN_LEN + 2);
 		if (*outlen < 0)
 			return *outlen;
 		else if (*outlen > FAT_LFN_LEN)
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 44a88a9fa2c8..0eb059ec6f28 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
 }
 EXPORT_SYMBOL(utf32_to_utf8);
 
-int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
+static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
+{
+	switch (endian) {
+	default:
+		*s = (wchar_t) c;
+		break;
+	case UTF16_LITTLE_ENDIAN:
+		*s = __cpu_to_le16(c);
+		break;
+	case UTF16_BIG_ENDIAN:
+		*s = __cpu_to_be16(c);
+		break;
+	}
+}
+
+int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
+		wchar_t *pwcs, int maxlen)
 {
 	u16 *op;
 	int size;
 	unicode_t u;
 
 	op = pwcs;
-	while (*s && len > 0) {
+	while (len > 0 && maxlen > 0 && *s) {
 		if (*s & 0x80) {
 			size = utf8_to_utf32(s, len, &u);
 			if (size < 0)
 				return -EINVAL;
+			s += size;
+			len -= size;
 
 			if (u >= PLANE_SIZE) {
+				if (maxlen < 2)
+					break;
 				u -= PLANE_SIZE;
-				*op++ = (wchar_t) (SURROGATE_PAIR |
-						((u >> 10) & SURROGATE_BITS));
-				*op++ = (wchar_t) (SURROGATE_PAIR |
+				put_utf16(op++, SURROGATE_PAIR |
+						((u >> 10) & SURROGATE_BITS),
+						endian);
+				put_utf16(op++, SURROGATE_PAIR |
 						SURROGATE_LOW |
-						(u & SURROGATE_BITS));
+						(u & SURROGATE_BITS),
+						endian);
+				maxlen -= 2;
 			} else {
-				*op++ = (wchar_t) u;
+				put_utf16(op++, u, endian);
+				maxlen--;
 			}
-			s += size;
-			len -= size;
 		} else {
-			*op++ = *s++;
+			put_utf16(op++, *s++, endian);
 			len--;
+			maxlen--;
 		}
 	}
 	return op - pwcs;
diff --git a/include/linux/nls.h b/include/linux/nls.h
index d47beef08dfd..5dc635f8d79e 100644
--- a/include/linux/nls.h
+++ b/include/linux/nls.h
@@ -43,7 +43,7 @@ enum utf16_endian {
 	UTF16_BIG_ENDIAN
 };
 
-/* nls.c */
+/* nls_base.c */
 extern int register_nls(struct nls_table *);
 extern int unregister_nls(struct nls_table *);
 extern struct nls_table *load_nls(char *);
@@ -52,7 +52,8 @@ extern struct nls_table *load_nls_default(void);
 
 extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
 extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
-extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
+extern int utf8s_to_utf16s(const u8 *s, int len,
+		enum utf16_endian endian, wchar_t *pwcs, int maxlen);
 extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
 		enum utf16_endian endian, u8 *s, int maxlen);
 
-- 
cgit v1.2.3


From be9b7335e70696bee731c152429b1737e42fe163 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 25 Aug 2011 00:24:21 -0400
Subject: mm: add vm_area_add_early()

The existing vm_area_register_early() allows for early vmalloc space
allocation.  However upcoming cleanups in the ARM architecture require
that some fixed locations in the vmalloc area be reserved also very early.

The name "vm_area_register_early" would have been a good name for the
reservation part without the allocation.  Since it is already in use with
different semantics, let's create vm_area_add_early() instead.

Both vm_area_register_early() and vm_area_add_early() can be used together
meaning that the former is now implemented using the later where it is
ensured that no conflicting areas are added, but no attempt is made to
make the allocation scheme in vm_area_register_early() more sophisticated.
After all, you must know what you're doing when using those functions.

Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org
---
 include/linux/vmalloc.h |  1 +
 mm/vmalloc.c            | 29 +++++++++++++++++++++++++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 687fb11e2010..4115d6aa80be 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -131,6 +131,7 @@ extern long vwrite(char *buf, char *addr, unsigned long count);
  */
 extern rwlock_t vmlist_lock;
 extern struct vm_struct *vmlist;
+extern __init void vm_area_add_early(struct vm_struct *vm);
 extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
 
 #ifdef CONFIG_SMP
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b669aa6f6caf..3f2b59221b78 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1117,6 +1117,32 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
 }
 EXPORT_SYMBOL(vm_map_ram);
 
+/**
+ * vm_area_add_early - add vmap area early during boot
+ * @vm: vm_struct to add
+ *
+ * This function is used to add fixed kernel vm area to vmlist before
+ * vmalloc_init() is called.  @vm->addr, @vm->size, and @vm->flags
+ * should contain proper values and the other fields should be zero.
+ *
+ * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
+ */
+void __init vm_area_add_early(struct vm_struct *vm)
+{
+	struct vm_struct *tmp, **p;
+
+	BUG_ON(vmap_initialized);
+	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
+		if (tmp->addr >= vm->addr) {
+			BUG_ON(tmp->addr < vm->addr + vm->size);
+			break;
+		} else
+			BUG_ON(tmp->addr + tmp->size > vm->addr);
+	}
+	vm->next = *p;
+	*p = vm;
+}
+
 /**
  * vm_area_register_early - register vmap area early during boot
  * @vm: vm_struct to register
@@ -1139,8 +1165,7 @@ void __init vm_area_register_early(struct vm_struct *vm, size_t align)
 
 	vm->addr = (void *)addr;
 
-	vm->next = vmlist;
-	vmlist = vm;
+	vm_area_add_early(vm);
 }
 
 void __init vmalloc_init(void)
-- 
cgit v1.2.3


From 56c978f1da1f630ef18aa668a9748c6c23ab819b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 18 Nov 2011 02:20:05 +0000
Subject: net: Remove LL_ALLOCATED_SPACE

net: Remove LL_ALLOCATED_SPACE

The macro LL_ALLOCATED_SPACE was ill-conceived.  It applies the
alignment to the sum of needed_headroom and needed_tailroom.  As
the amount that is then reserved for head room is needed_headroom
with alignment, this means that the tail room left may be too small.

Now that all uses of LL_ALLOCATED_SPACE have been removed, this
patch finally removes the macro itself.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0bbe030fc014..3eb383a9b5ed 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -279,16 +279,11 @@ struct hh_cache {
  *
  * We could use other alignment values, but we must maintain the
  * relationship HH alignment <= LL alignment.
- *
- * LL_ALLOCATED_SPACE also takes into account the tailroom the device
- * may need.
  */
 #define LL_RESERVED_SPACE(dev) \
 	((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
 	((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
-#define LL_ALLOCATED_SPACE(dev) \
-	((((dev)->hard_header_len+(dev)->needed_headroom+(dev)->needed_tailroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 
 struct header_ops {
 	int	(*create) (struct sk_buff *skb, struct net_device *dev,
-- 
cgit v1.2.3


From bdb6e697b2a76c541960b86ab8fda88f3de1adf2 Mon Sep 17 00:00:00 2001
From: Dinesh Kumar Sharma <dinesh.sharma@stericsson.com>
Date: Fri, 18 Nov 2011 01:22:05 +0000
Subject: Phonet: set the pipe handle using setsockopt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This provides flexibility to set the pipe handle
using setsockopt. The pipe can be enabled (if disabled) later
using ioctl.

Signed-off-by: Hemant Ramdasi <hemant.ramdasi@stericsson.com>
Signed-off-by: Dinesh Kumar Sharma <dinesh.sharma@stericsson.com>
Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h |   2 +
 net/phonet/pep.c       | 106 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 97 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index f53a4167c5f4..f48bfc80cb4b 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -38,6 +38,7 @@
 #define PNPIPE_ENCAP		1
 #define PNPIPE_IFINDEX		2
 #define PNPIPE_HANDLE		3
+#define PNPIPE_INITSTATE	4
 
 #define PNADDR_ANY		0
 #define PNADDR_BROADCAST	0xFC
@@ -49,6 +50,7 @@
 
 /* ioctls */
 #define SIOCPNGETOBJECT		(SIOCPROTOPRIVATE + 0)
+#define SIOCPNENABLEPIPE	(SIOCPROTOPRIVATE + 13)
 #define SIOCPNADDRESOURCE	(SIOCPROTOPRIVATE + 14)
 #define SIOCPNDELRESOURCE	(SIOCPROTOPRIVATE + 15)
 
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 2ba6e9fb4cbc..9f60008740e3 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -534,6 +534,29 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
 	return pipe_handler_send_created_ind(sk);
 }
 
+static int pep_enableresp_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pnpipehdr *hdr = pnp_hdr(skb);
+
+	if (hdr->error_code != PN_PIPE_NO_ERROR)
+		return -ECONNREFUSED;
+
+	return pep_indicate(sk, PNS_PIPE_ENABLED_IND, 0 /* sub-blocks */,
+		NULL, 0, GFP_ATOMIC);
+
+}
+
+static void pipe_start_flow_control(struct sock *sk)
+{
+	struct pep_sock *pn = pep_sk(sk);
+
+	if (!pn_flow_safe(pn->tx_fc)) {
+		atomic_set(&pn->tx_credits, 1);
+		sk->sk_write_space(sk);
+	}
+	pipe_grant_credits(sk, GFP_ATOMIC);
+}
+
 /* Queue an skb to an actively connected sock.
  * Socket lock must be held. */
 static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
@@ -579,13 +602,25 @@ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
 			sk->sk_state = TCP_CLOSE_WAIT;
 			break;
 		}
+		if (pn->init_enable == PN_PIPE_DISABLE)
+			sk->sk_state = TCP_SYN_RECV;
+		else {
+			sk->sk_state = TCP_ESTABLISHED;
+			pipe_start_flow_control(sk);
+		}
+		break;
 
-		sk->sk_state = TCP_ESTABLISHED;
-		if (!pn_flow_safe(pn->tx_fc)) {
-			atomic_set(&pn->tx_credits, 1);
-			sk->sk_write_space(sk);
+	case PNS_PEP_ENABLE_RESP:
+		if (sk->sk_state != TCP_SYN_SENT)
+			break;
+
+		if (pep_enableresp_rcv(sk, skb)) {
+			sk->sk_state = TCP_CLOSE_WAIT;
+			break;
 		}
-		pipe_grant_credits(sk, GFP_ATOMIC);
+
+		sk->sk_state = TCP_ESTABLISHED;
+		pipe_start_flow_control(sk);
 		break;
 
 	case PNS_PEP_DISCONNECT_RESP:
@@ -864,14 +899,32 @@ static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
 	int err;
 	u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD };
 
-	pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
+	if (pn->pipe_handle == PN_PIPE_INVALID_HANDLE)
+		pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
+
 	err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ,
-					PN_PIPE_ENABLE, data, 4);
+				pn->init_enable, data, 4);
 	if (err) {
 		pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
 		return err;
 	}
+
 	sk->sk_state = TCP_SYN_SENT;
+
+	return 0;
+}
+
+static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
+{
+	int err;
+
+	err = pipe_handler_request(sk, PNS_PEP_ENABLE_REQ, PAD,
+				NULL, 0);
+	if (err)
+		return err;
+
+	sk->sk_state = TCP_SYN_SENT;
+
 	return 0;
 }
 
@@ -879,11 +932,14 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	int answ;
+	int ret = -ENOIOCTLCMD;
 
 	switch (cmd) {
 	case SIOCINQ:
-		if (sk->sk_state == TCP_LISTEN)
-			return -EINVAL;
+		if (sk->sk_state == TCP_LISTEN) {
+			ret = -EINVAL;
+			break;
+		}
 
 		lock_sock(sk);
 		if (sock_flag(sk, SOCK_URGINLINE) &&
@@ -894,10 +950,22 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		else
 			answ = 0;
 		release_sock(sk);
-		return put_user(answ, (int __user *)arg);
+		ret = put_user(answ, (int __user *)arg);
+		break;
+
+	case SIOCPNENABLEPIPE:
+		lock_sock(sk);
+		if (sk->sk_state == TCP_SYN_SENT)
+			ret =  -EBUSY;
+		else if (sk->sk_state == TCP_ESTABLISHED)
+			ret = -EISCONN;
+		else
+			ret = pep_sock_enable(sk, NULL, 0);
+		release_sock(sk);
+		break;
 	}
 
-	return -ENOIOCTLCMD;
+	return ret;
 }
 
 static int pep_init(struct sock *sk)
@@ -960,6 +1028,18 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 		}
 		goto out_norel;
 
+	case PNPIPE_HANDLE:
+		if ((sk->sk_state == TCP_CLOSE) &&
+			(val >= 0) && (val < PN_PIPE_INVALID_HANDLE))
+			pn->pipe_handle = val;
+		else
+			err = -EINVAL;
+		break;
+
+	case PNPIPE_INITSTATE:
+		pn->init_enable = !!val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -995,6 +1075,10 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
 			return -EINVAL;
 		break;
 
+	case PNPIPE_INITSTATE:
+		val = pn->init_enable;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From 8ee4dd9f063ce59c08f3ce283ca03306131aaf3a Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Fri, 18 Nov 2011 23:53:29 +0100
Subject: debugfs: print_regs32: make regs array a const pointer

Signed-off-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/file.c       | 2 +-
 include/linux/debugfs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index f31a27c60fc6..fc98ec9e1d83 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -548,7 +548,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_blob);
  * because some peripherals have several blocks of identical registers,
  * for example configuration of dma channels
  */
-int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
 			   int nregs, void __iomem *base, char *prefix)
 {
 	int i, ret = 0;
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 5e6b01f6db4c..e8c3abc60811 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -90,7 +90,7 @@ struct dentry *debugfs_create_regset32(const char *name, mode_t mode,
 				     struct dentry *parent,
 				     struct debugfs_regset32 *regset);
 
-int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
 			 int nregs, void __iomem *base, char *prefix);
 
 bool debugfs_initialized(void);
-- 
cgit v1.2.3


From 4c691664583ef6a91f9ed0e08a75fbd30a5ffd5c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 18 Nov 2011 16:53:00 +0000
Subject: regmap: Remove indexed cache type

There should be no situation where it offers any advantage over rbtree
and there are no current users so remove the code for simplicity.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/Makefile           |  2 +-
 drivers/base/regmap/internal.h         |  3 --
 drivers/base/regmap/regcache-indexed.c | 64 ----------------------------------
 drivers/base/regmap/regcache.c         | 20 -----------
 include/linux/regmap.h                 |  1 -
 5 files changed, 1 insertion(+), 89 deletions(-)
 delete mode 100644 drivers/base/regmap/regcache-indexed.c

(limited to 'include/linux')

diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 3dbe5d3ff227..defd57963c84 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_REGMAP) += regmap.o regcache.o regcache-indexed.o
+obj-$(CONFIG_REGMAP) += regmap.o regcache.o
 obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o
 obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
 obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 954f7b73238f..1a02b7537c8b 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -119,10 +119,7 @@ unsigned int regcache_get_val(const void *base, unsigned int idx,
 bool regcache_set_val(void *base, unsigned int idx,
 		      unsigned int val, unsigned int word_size);
 int regcache_lookup_reg(struct regmap *map, unsigned int reg);
-int regcache_insert_reg(struct regmap *map, unsigned int reg,
-			unsigned int val);
 
-extern struct regcache_ops regcache_indexed_ops;
 extern struct regcache_ops regcache_rbtree_ops;
 extern struct regcache_ops regcache_lzo_ops;
 
diff --git a/drivers/base/regmap/regcache-indexed.c b/drivers/base/regmap/regcache-indexed.c
deleted file mode 100644
index 507731ad8ec1..000000000000
--- a/drivers/base/regmap/regcache-indexed.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Register cache access API - indexed caching support
- *
- * Copyright 2011 Wolfson Microelectronics plc
- *
- * Author: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/slab.h>
-
-#include "internal.h"
-
-static int regcache_indexed_read(struct regmap *map, unsigned int reg,
-				 unsigned int *value)
-{
-	int ret;
-
-	ret = regcache_lookup_reg(map, reg);
-	if (ret >= 0)
-		*value = map->reg_defaults[ret].def;
-
-	return ret;
-}
-
-static int regcache_indexed_write(struct regmap *map, unsigned int reg,
-				  unsigned int value)
-{
-	int ret;
-
-	ret = regcache_lookup_reg(map, reg);
-	if (ret < 0)
-		return regcache_insert_reg(map, reg, value);
-	map->reg_defaults[ret].def = value;
-	return 0;
-}
-
-static int regcache_indexed_sync(struct regmap *map)
-{
-	unsigned int i;
-	int ret;
-
-	for (i = 0; i < map->num_reg_defaults; i++) {
-		ret = _regmap_write(map, map->reg_defaults[i].reg,
-				    map->reg_defaults[i].def);
-		if (ret < 0)
-			return ret;
-		dev_dbg(map->dev, "Synced register %#x, value %#x\n",
-			map->reg_defaults[i].reg,
-			map->reg_defaults[i].def);
-	}
-	return 0;
-}
-
-struct regcache_ops regcache_indexed_ops = {
-	.type = REGCACHE_INDEXED,
-	.name = "indexed",
-	.read = regcache_indexed_read,
-	.write = regcache_indexed_write,
-	.sync = regcache_indexed_sync
-};
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index e21eebd36afa..1ca2d7a1051f 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -19,7 +19,6 @@
 #include "internal.h"
 
 static const struct regcache_ops *cache_types[] = {
-	&regcache_indexed_ops,
 	&regcache_rbtree_ops,
 	&regcache_lzo_ops,
 };
@@ -420,22 +419,3 @@ int regcache_lookup_reg(struct regmap *map, unsigned int reg)
 	else
 		return -ENOENT;
 }
-
-int regcache_insert_reg(struct regmap *map, unsigned int reg,
-			unsigned int val)
-{
-	void *tmp;
-
-	tmp = krealloc(map->reg_defaults,
-		       (map->num_reg_defaults + 1) * sizeof(struct reg_default),
-		       GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
-	map->reg_defaults = tmp;
-	map->num_reg_defaults++;
-	map->reg_defaults[map->num_reg_defaults - 1].reg = reg;
-	map->reg_defaults[map->num_reg_defaults - 1].def = val;
-	sort(map->reg_defaults, map->num_reg_defaults,
-	     sizeof(struct reg_default), regcache_default_cmp, NULL);
-	return 0;
-}
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 458f15f4c37c..81dfe0acb20c 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -23,7 +23,6 @@ struct spi_device;
 /* An enum of all the supported cache types */
 enum regcache_type {
 	REGCACHE_NONE,
-	REGCACHE_INDEXED,
 	REGCACHE_RBTREE,
 	REGCACHE_COMPRESSED
 };
-- 
cgit v1.2.3


From 37f07023d30708b5da091fe6d6be9b60783c6d82 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Thu, 17 Nov 2011 14:30:55 +0000
Subject: net: Change mii to ethtool advertisement function names

This patch implements advice by Ben Hutchings to change the mii side of
the function names to look more like the register whose values they
convert.  New LPA translation functions have been added as well.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2.c | 20 +++++------
 drivers/net/ethernet/broadcom/tg3.c  | 10 +++---
 drivers/net/ethernet/sun/niu.c       |  4 +--
 drivers/net/mii.c                    | 15 ++++-----
 drivers/net/phy/phy_device.c         |  4 +--
 include/linux/mii.h                  | 64 +++++++++++++++++++++++++++---------
 6 files changed, 75 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 66f6e7f654c3..83d8cefba8c0 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -2054,8 +2054,8 @@ __acquires(&bp->phy_lock)
 
 	if (bp->autoneg & AUTONEG_SPEED) {
 		u32 adv_reg, adv1000_reg;
-		u32 new_adv_reg = 0;
-		u32 new_adv1000_reg = 0;
+		u32 new_adv = 0;
+		u32 new_adv1000 = 0;
 
 		bnx2_read_phy(bp, bp->mii_adv, &adv_reg);
 		adv_reg &= (PHY_ALL_10_100_SPEED | ADVERTISE_PAUSE_CAP |
@@ -2064,18 +2064,18 @@ __acquires(&bp->phy_lock)
 		bnx2_read_phy(bp, MII_CTRL1000, &adv1000_reg);
 		adv1000_reg &= PHY_ALL_1000_SPEED;
 
-		new_adv_reg = ethtool_adv_to_mii_100bt(bp->advertising);
-		new_adv_reg |= ADVERTISE_CSMA;
-		new_adv_reg |= bnx2_phy_get_pause_adv(bp);
+		new_adv = ethtool_adv_to_mii_adv_t(bp->advertising);
+		new_adv |= ADVERTISE_CSMA;
+		new_adv |= bnx2_phy_get_pause_adv(bp);
 
-		new_adv1000_reg |= ethtool_adv_to_mii_1000T(bp->advertising);
+		new_adv1000 |= ethtool_adv_to_mii_ctrl1000_t(bp->advertising);
 
-		if ((adv1000_reg != new_adv1000_reg) ||
-			(adv_reg != new_adv_reg) ||
+		if ((adv1000_reg != new_adv1000) ||
+			(adv_reg != new_adv) ||
 			((bmcr & BMCR_ANENABLE) == 0)) {
 
-			bnx2_write_phy(bp, bp->mii_adv, new_adv_reg);
-			bnx2_write_phy(bp, MII_CTRL1000, new_adv1000_reg);
+			bnx2_write_phy(bp, bp->mii_adv, new_adv);
+			bnx2_write_phy(bp, MII_CTRL1000, new_adv1000);
 			bnx2_write_phy(bp, bp->mii_bmcr, BMCR_ANRESTART |
 				BMCR_ANENABLE);
 		}
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 024ca1d4d028..47c0e3a1f58d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -3594,7 +3594,7 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl)
 	u32 val, new_adv;
 
 	new_adv = ADVERTISE_CSMA;
-	new_adv |= ethtool_adv_to_mii_100bt(advertise);
+	new_adv |= ethtool_adv_to_mii_adv_t(advertise);
 	new_adv |= tg3_advert_flowctrl_1000T(flowctrl);
 
 	err = tg3_writephy(tp, MII_ADVERTISE, new_adv);
@@ -3604,7 +3604,7 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl)
 	if (tp->phy_flags & TG3_PHYFLG_10_100_ONLY)
 		goto done;
 
-	new_adv = ethtool_adv_to_mii_1000T(advertise);
+	new_adv = ethtool_adv_to_mii_ctrl1000_t(advertise);
 
 	if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
 	    tp->pci_chip_rev_id == CHIPREV_ID_5701_B0)
@@ -3778,7 +3778,7 @@ static int tg3_copper_is_advertising_all(struct tg3 *tp, u32 mask)
 {
 	u32 adv_reg, all_mask = 0;
 
-	all_mask = ethtool_adv_to_mii_100bt(mask);
+	all_mask = ethtool_adv_to_mii_adv_t(mask);
 
 	if (tg3_readphy(tp, MII_ADVERTISE, &adv_reg))
 		return 0;
@@ -3789,7 +3789,7 @@ static int tg3_copper_is_advertising_all(struct tg3 *tp, u32 mask)
 	if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) {
 		u32 tg3_ctrl;
 
-		all_mask = ethtool_adv_to_mii_1000T(mask);
+		all_mask = ethtool_adv_to_mii_ctrl1000_t(mask);
 
 		if (tg3_readphy(tp, MII_CTRL1000, &tg3_ctrl))
 			return 0;
@@ -4889,7 +4889,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, int force_reset)
 				 ADVERTISE_SLCT);
 
 		newadv |= tg3_advert_flowctrl_1000X(tp->link_config.flowctrl);
-		newadv |= ethtool_adv_to_mii_1000X(tp->link_config.advertising);
+		newadv |= ethtool_adv_to_mii_adv_x(tp->link_config.advertising);
 
 		if ((newadv != adv) || !(bmcr & BMCR_ANENABLE)) {
 			tg3_writephy(tp, MII_ADVERTISE, newadv);
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 9997be525089..680b107fdabd 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -1151,8 +1151,8 @@ static int link_status_mii(struct niu *np, int *link_up_p)
 		supported |= SUPPORTED_1000baseT_Full;
 	lp->supported = supported;
 
-	advertising = mii_adv_to_ethtool_100bt(advert);
-	advertising |= mii_adv_to_ethtool_1000T(ctrl1000);
+	advertising = mii_adv_to_ethtool_adv_t(advert);
+	advertising |= mii_ctrl1000_to_ethtool_adv_t(ctrl1000);
 
 	if (bmcr & BMCR_ANENABLE) {
 		int neg, neg1000;
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index d0a296272713..c70c2332d15e 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -35,14 +35,11 @@
 
 static u32 mii_get_an(struct mii_if_info *mii, u16 addr)
 {
-	u32 result = 0;
 	int advert;
 
 	advert = mii->mdio_read(mii->dev, mii->phy_id, addr);
-	if (advert & LPA_LPACK)
-		result |= ADVERTISED_Autoneg;
 
-	return result | mii_adv_to_ethtool_100bt(advert);
+	return mii_lpa_to_ethtool_lpa_t(advert);
 }
 
 /**
@@ -93,12 +90,13 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 
 		ecmd->advertising |= mii_get_an(mii, MII_ADVERTISE);
 		if (mii->supports_gmii)
-			ecmd->advertising |= mii_adv_to_ethtool_1000T(ctrl1000);
+			ecmd->advertising |=
+					mii_ctrl1000_to_ethtool_adv_t(ctrl1000);
 
 		if (bmsr & BMSR_ANEGCOMPLETE) {
 			ecmd->lp_advertising = mii_get_an(mii, MII_LPA);
 			ecmd->lp_advertising |=
-					     mii_lpa_to_ethtool_1000T(stat1000);
+					mii_stat1000_to_ethtool_lpa_t(stat1000);
 		} else {
 			ecmd->lp_advertising = 0;
 		}
@@ -186,10 +184,11 @@ int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 			advert2 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000);
 			tmp2 = advert2 & ~(ADVERTISE_1000HALF | ADVERTISE_1000FULL);
 		}
-		tmp |= ethtool_adv_to_mii_100bt(ecmd->advertising);
+		tmp |= ethtool_adv_to_mii_adv_t(ecmd->advertising);
 
 		if (mii->supports_gmii)
-			tmp2 |= ethtool_adv_to_mii_1000T(ecmd->advertising);
+			tmp2 |=
+			      ethtool_adv_to_mii_ctrl1000_t(ecmd->advertising);
 		if (advert != tmp) {
 			mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
 			mii->advertising = tmp;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index edb905f80115..f320f466f03b 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -565,7 +565,7 @@ static int genphy_config_advert(struct phy_device *phydev)
 
 	adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP |
 		 ADVERTISE_PAUSE_ASYM);
-	adv |= ethtool_adv_to_mii_100bt(advertise);
+	adv |= ethtool_adv_to_mii_adv_t(advertise);
 
 	if (adv != oldadv) {
 		err = phy_write(phydev, MII_ADVERTISE, adv);
@@ -584,7 +584,7 @@ static int genphy_config_advert(struct phy_device *phydev)
 			return adv;
 
 		adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
-		adv |= ethtool_adv_to_mii_1000T(advertise);
+		adv |= ethtool_adv_to_mii_ctrl1000_t(advertise);
 
 		if (adv != oldadv) {
 			err = phy_write(phydev, MII_CTRL1000, adv);
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 6697b9112014..2783eca629a0 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -241,14 +241,14 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock,
 }
 
 /**
- * ethtool_adv_to_mii_100bt
+ * ethtool_adv_to_mii_adv_t
  * @ethadv: the ethtool advertisement settings
  *
  * A small helper function that translates ethtool advertisement
  * settings to phy autonegotiation advertisements for the
  * MII_ADVERTISE register.
  */
-static inline u32 ethtool_adv_to_mii_100bt(u32 ethadv)
+static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv)
 {
 	u32 result = 0;
 
@@ -269,13 +269,13 @@ static inline u32 ethtool_adv_to_mii_100bt(u32 ethadv)
 }
 
 /**
- * mii_adv_to_ethtool_100bt
+ * mii_adv_to_ethtool_adv_t
  * @adv: value of the MII_ADVERTISE register
  *
  * A small helper function that translates MII_ADVERTISE bits
  * to ethtool advertisement settings.
  */
-static inline u32 mii_adv_to_ethtool_100bt(u32 adv)
+static inline u32 mii_adv_to_ethtool_adv_t(u32 adv)
 {
 	u32 result = 0;
 
@@ -296,14 +296,14 @@ static inline u32 mii_adv_to_ethtool_100bt(u32 adv)
 }
 
 /**
- * ethtool_adv_to_mii_1000T
+ * ethtool_adv_to_mii_ctrl1000_t
  * @ethadv: the ethtool advertisement settings
  *
  * A small helper function that translates ethtool advertisement
  * settings to phy autonegotiation advertisements for the
  * MII_CTRL1000 register when in 1000T mode.
  */
-static inline u32 ethtool_adv_to_mii_1000T(u32 ethadv)
+static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv)
 {
 	u32 result = 0;
 
@@ -316,14 +316,14 @@ static inline u32 ethtool_adv_to_mii_1000T(u32 ethadv)
 }
 
 /**
- * mii_adv_to_ethtool_1000T
+ * mii_ctrl1000_to_ethtool_adv_t
  * @adv: value of the MII_CTRL1000 register
  *
  * A small helper function that translates MII_CTRL1000
  * bits, when in 1000Base-T mode, to ethtool
  * advertisement settings.
  */
-static inline u32 mii_adv_to_ethtool_1000T(u32 adv)
+static inline u32 mii_ctrl1000_to_ethtool_adv_t(u32 adv)
 {
 	u32 result = 0;
 
@@ -335,17 +335,33 @@ static inline u32 mii_adv_to_ethtool_1000T(u32 adv)
 	return result;
 }
 
-#define mii_lpa_to_ethtool_100bt(lpa)	mii_adv_to_ethtool_100bt(lpa)
+/**
+ * mii_lpa_to_ethtool_lpa_t
+ * @adv: value of the MII_LPA register
+ *
+ * A small helper function that translates MII_LPA
+ * bits, when in 1000Base-T mode, to ethtool
+ * LP advertisement settings.
+ */
+static inline u32 mii_lpa_to_ethtool_lpa_t(u32 lpa)
+{
+	u32 result = 0;
+
+	if (lpa & LPA_LPACK)
+		result |= ADVERTISED_Autoneg;
+
+	return result | mii_adv_to_ethtool_adv_t(lpa);
+}
 
 /**
- * mii_lpa_to_ethtool_1000T
+ * mii_stat1000_to_ethtool_lpa_t
  * @adv: value of the MII_STAT1000 register
  *
  * A small helper function that translates MII_STAT1000
  * bits, when in 1000Base-T mode, to ethtool
  * advertisement settings.
  */
-static inline u32 mii_lpa_to_ethtool_1000T(u32 lpa)
+static inline u32 mii_stat1000_to_ethtool_lpa_t(u32 lpa)
 {
 	u32 result = 0;
 
@@ -358,14 +374,14 @@ static inline u32 mii_lpa_to_ethtool_1000T(u32 lpa)
 }
 
 /**
- * ethtool_adv_to_mii_1000X
+ * ethtool_adv_to_mii_adv_x
  * @ethadv: the ethtool advertisement settings
  *
  * A small helper function that translates ethtool advertisement
  * settings to phy autonegotiation advertisements for the
  * MII_CTRL1000 register when in 1000Base-X mode.
  */
-static inline u32 ethtool_adv_to_mii_1000X(u32 ethadv)
+static inline u32 ethtool_adv_to_mii_adv_x(u32 ethadv)
 {
 	u32 result = 0;
 
@@ -382,14 +398,14 @@ static inline u32 ethtool_adv_to_mii_1000X(u32 ethadv)
 }
 
 /**
- * mii_adv_to_ethtool_1000X
+ * mii_adv_to_ethtool_adv_x
  * @adv: value of the MII_CTRL1000 register
  *
  * A small helper function that translates MII_CTRL1000
  * bits, when in 1000Base-X mode, to ethtool
  * advertisement settings.
  */
-static inline u32 mii_adv_to_ethtool_1000X(u32 adv)
+static inline u32 mii_adv_to_ethtool_adv_x(u32 adv)
 {
 	u32 result = 0;
 
@@ -405,6 +421,24 @@ static inline u32 mii_adv_to_ethtool_1000X(u32 adv)
 	return result;
 }
 
+/**
+ * mii_lpa_to_ethtool_lpa_x
+ * @adv: value of the MII_LPA register
+ *
+ * A small helper function that translates MII_LPA
+ * bits, when in 1000Base-X mode, to ethtool
+ * LP advertisement settings.
+ */
+static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa)
+{
+	u32 result = 0;
+
+	if (lpa & LPA_LPACK)
+		result |= ADVERTISED_Autoneg;
+
+	return result | mii_adv_to_ethtool_adv_x(lpa);
+}
+
 /**
  * mii_advertise_flowctrl - get flow control advertisement flags
  * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both)
-- 
cgit v1.2.3


From a0acae0e886d44bd5ce6d2f173c1ace0fcf0d9f6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:22 -0800
Subject: freezer: unexport refrigerator() and update try_to_freeze() slightly

There is no reason to export two functions for entering the
refrigerator.  Calling refrigerator() instead of try_to_freeze()
doesn't save anything noticeable or removes any race condition.

* Rename refrigerator() to __refrigerator() and make it return bool
  indicating whether it scheduled out for freezing.

* Update try_to_freeze() to return bool and relay the return value of
  __refrigerator() if freezing().

* Convert all refrigerator() users to try_to_freeze().

* Update documentation accordingly.

* While at it, add might_sleep() to try_to_freeze().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Samuel Ortiz <samuel@sortiz.org>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>
Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp>
Cc: Christoph Hellwig <hch@infradead.org>
---
 Documentation/power/freezing-of-tasks.txt | 12 ++++++------
 drivers/net/irda/stir4200.c               |  2 +-
 fs/btrfs/async-thread.c                   |  2 +-
 fs/btrfs/disk-io.c                        |  8 ++------
 fs/ext4/super.c                           |  3 +--
 fs/gfs2/log.c                             |  4 ++--
 fs/gfs2/quota.c                           |  4 ++--
 fs/jbd/journal.c                          |  2 +-
 fs/jbd2/journal.c                         |  2 +-
 fs/jfs/jfs_logmgr.c                       |  2 +-
 fs/jfs/jfs_txnmgr.c                       |  4 ++--
 fs/nilfs2/segment.c                       |  2 +-
 fs/xfs/xfs_buf.c                          |  2 +-
 include/linux/freezer.h                   | 17 ++++++++---------
 kernel/freezer.c                          | 10 +++++++---
 15 files changed, 37 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index 587e0828053f..3ab9fbd2800a 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -21,7 +21,7 @@ freeze_processes() (defined in kernel/power/process.c) is called.  It executes
 try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and
 either wakes them up, if they are kernel threads, or sends fake signals to them,
 if they are user space processes.  A task that has TIF_FREEZE set, should react
-to it by calling the function called refrigerator() (defined in
+to it by calling the function called __refrigerator() (defined in
 kernel/freezer.c), which sets the task's PF_FROZEN flag, changes its state
 to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is cleared for it.
 Then, we say that the task is 'frozen' and therefore the set of functions
@@ -29,10 +29,10 @@ handling this mechanism is referred to as 'the freezer' (these functions are
 defined in kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h).
 User space processes are generally frozen before kernel threads.
 
-It is not recommended to call refrigerator() directly.  Instead, it is
-recommended to use the try_to_freeze() function (defined in
-include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the
-task enter refrigerator() if the flag is set.
+__refrigerator() must not be called directly.  Instead, use the
+try_to_freeze() function (defined in include/linux/freezer.h), that checks
+the task's TIF_FREEZE flag and makes the task enter __refrigerator() if the
+flag is set.
 
 For user space processes try_to_freeze() is called automatically from the
 signal-handling code, but the freezable kernel threads need to call it
@@ -61,7 +61,7 @@ wait_event_freezable() and wait_event_freezable_timeout() macros.
 After the system memory state has been restored from a hibernation image and
 devices have been reinitialized, the function thaw_processes() is called in
 order to clear the PF_FROZEN flag for each frozen task.  Then, the tasks that
-have been frozen leave refrigerator() and continue running.
+have been frozen leave __refrigerator() and continue running.
 
 III. Which kernel threads are freezable?
 
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 41c96b3d8152..e880c79d7bd8 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -750,7 +750,7 @@ static int stir_transmit_thread(void *arg)
 
 			write_reg(stir, REG_CTRL1, CTRL1_TXPWD|CTRL1_RXPWD);
 
-			refrigerator();
+			try_to_freeze();
 
 			if (change_speed(stir, stir->speed))
 				break;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7ec14097fef1..98ab240072e5 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -340,7 +340,7 @@ again:
 		if (freezing(current)) {
 			worker->working = 0;
 			spin_unlock_irq(&worker->lock);
-			refrigerator();
+			try_to_freeze();
 		} else {
 			spin_unlock_irq(&worker->lock);
 			if (!kthread_should_stop()) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62afe5c5694e..622654fe051f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1579,9 +1579,7 @@ static int cleaner_kthread(void *arg)
 			btrfs_run_defrag_inodes(root->fs_info);
 		}
 
-		if (freezing(current)) {
-			refrigerator();
-		} else {
+		if (!try_to_freeze()) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (!kthread_should_stop())
 				schedule();
@@ -1635,9 +1633,7 @@ sleep:
 		wake_up_process(root->fs_info->cleaner_kthread);
 		mutex_unlock(&root->fs_info->transaction_kthread_mutex);
 
-		if (freezing(current)) {
-			refrigerator();
-		} else {
+		if (!try_to_freeze()) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (!kthread_should_stop() &&
 			    !btrfs_transaction_blocked(root->fs_info))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9953d80145ad..877350ef0253 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2882,8 +2882,7 @@ cont_thread:
 		}
 		mutex_unlock(&eli->li_list_mtx);
 
-		if (freezing(current))
-			refrigerator();
+		try_to_freeze();
 
 		cur = jiffies;
 		if ((time_after_eq(cur, next_wakeup)) ||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 598646434362..8154d42e4647 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -951,8 +951,8 @@ int gfs2_logd(void *data)
 			wake_up(&sdp->sd_log_waitq);
 
 		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
-		if (freezing(current))
-			refrigerator();
+
+		try_to_freeze();
 
 		do {
 			prepare_to_wait(&sdp->sd_logd_waitq, &wait,
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 7e528dc14f85..d49669e92652 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1427,8 +1427,8 @@ int gfs2_quotad(void *data)
 		/* Check for & recover partially truncated inodes */
 		quotad_check_trunc_list(sdp);
 
-		if (freezing(current))
-			refrigerator();
+		try_to_freeze();
+
 		t = min(quotad_timeo, statfs_timeo);
 
 		prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index fea8dd661d2b..a96cff0c5f1d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -166,7 +166,7 @@ loop:
 		 */
 		jbd_debug(1, "Now suspending kjournald\n");
 		spin_unlock(&journal->j_state_lock);
-		refrigerator();
+		try_to_freeze();
 		spin_lock(&journal->j_state_lock);
 	} else {
 		/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0fa0123151d3..c0a5f9f1b127 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -173,7 +173,7 @@ loop:
 		 */
 		jbd_debug(1, "Now suspending kjournald2\n");
 		write_unlock(&journal->j_state_lock);
-		refrigerator();
+		try_to_freeze();
 		write_lock(&journal->j_state_lock);
 	} else {
 		/*
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index cc5f811ed383..2eb952c41a69 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2349,7 +2349,7 @@ int jfsIOWait(void *arg)
 
 		if (freezing(current)) {
 			spin_unlock_irq(&log_redrive_lock);
-			refrigerator();
+			try_to_freeze();
 		} else {
 			set_current_state(TASK_INTERRUPTIBLE);
 			spin_unlock_irq(&log_redrive_lock);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index af9606057dde..bb8b661bcc50 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2800,7 +2800,7 @@ int jfs_lazycommit(void *arg)
 
 		if (freezing(current)) {
 			LAZY_UNLOCK(flags);
-			refrigerator();
+			try_to_freeze();
 		} else {
 			DECLARE_WAITQUEUE(wq, current);
 
@@ -2994,7 +2994,7 @@ int jfs_sync(void *arg)
 
 		if (freezing(current)) {
 			TXN_UNLOCK();
-			refrigerator();
+			try_to_freeze();
 		} else {
 			set_current_state(TASK_INTERRUPTIBLE);
 			TXN_UNLOCK();
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bb24ab6c282f..0e72ad6f22aa 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2470,7 +2470,7 @@ static int nilfs_segctor_thread(void *arg)
 
 	if (freezing(current)) {
 		spin_unlock(&sci->sc_state_lock);
-		refrigerator();
+		try_to_freeze();
 		spin_lock(&sci->sc_state_lock);
 	} else {
 		DEFINE_WAIT(wait);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cf0ac056815f..018829936d6d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1703,7 +1703,7 @@ xfsbufd(
 
 		if (unlikely(freezing(current))) {
 			set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-			refrigerator();
+			try_to_freeze();
 		} else {
 			clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
 		}
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index a5386e3ee756..7a9427e9fe47 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -47,18 +47,17 @@ static inline bool should_send_signal(struct task_struct *p)
 /* Takes and releases task alloc lock using task_lock() */
 extern int thaw_process(struct task_struct *p);
 
-extern void refrigerator(void);
+extern bool __refrigerator(void);
 extern int freeze_processes(void);
 extern int freeze_kernel_threads(void);
 extern void thaw_processes(void);
 
-static inline int try_to_freeze(void)
+static inline bool try_to_freeze(void)
 {
-	if (freezing(current)) {
-		refrigerator();
-		return 1;
-	} else
-		return 0;
+	might_sleep();
+	if (likely(!freezing(current)))
+		return false;
+	return __refrigerator();
 }
 
 extern bool freeze_task(struct task_struct *p, bool sig_only);
@@ -181,12 +180,12 @@ static inline void set_freeze_flag(struct task_struct *p) {}
 static inline void clear_freeze_flag(struct task_struct *p) {}
 static inline int thaw_process(struct task_struct *p) { return 1; }
 
-static inline void refrigerator(void) {}
+static inline bool __refrigerator(void) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
 static inline int freeze_kernel_threads(void) { return -ENOSYS; }
 static inline void thaw_processes(void) {}
 
-static inline int try_to_freeze(void) { return 0; }
+static inline bool try_to_freeze(void) { return false; }
 
 static inline void freezer_do_not_count(void) {}
 static inline void freezer_count(void) {}
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 3f460104a9d6..732f14f5944f 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -23,10 +23,11 @@ static inline void frozen_process(void)
 }
 
 /* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
+bool __refrigerator(void)
 {
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
+	bool was_frozen = false;
 	long save;
 
 	task_lock(current);
@@ -35,7 +36,7 @@ void refrigerator(void)
 		task_unlock(current);
 	} else {
 		task_unlock(current);
-		return;
+		return was_frozen;
 	}
 	save = current->state;
 	pr_debug("%s entered refrigerator\n", current->comm);
@@ -51,6 +52,7 @@ void refrigerator(void)
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!frozen(current))
 			break;
+		was_frozen = true;
 		schedule();
 	}
 
@@ -65,8 +67,10 @@ void refrigerator(void)
 	 * synchronization which depends on ordered task state change.
 	 */
 	set_current_state(save);
+
+	return was_frozen;
 }
-EXPORT_SYMBOL(refrigerator);
+EXPORT_SYMBOL(__refrigerator);
 
 static void fake_signal_wake_up(struct task_struct *p)
 {
-- 
cgit v1.2.3


From 8a32c441c1609f80e55df75422324a1151208f40 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: implement and use kthread_freezable_should_stop()

Writeback and thinkpad_acpi have been using thaw_process() to prevent
deadlock between the freezer and kthread_stop(); unfortunately, this
is inherently racy - nothing prevents freezing from happening between
thaw_process() and kthread_stop().

This patch implements kthread_freezable_should_stop() which enters
refrigerator if necessary but is guaranteed to return if
kthread_stop() is invoked.  Both thaw_process() users are converted to
use the new function.

Note that this deadlock condition exists for many of freezable
kthreads.  They need to be converted to use the new should_stop or
freezable workqueue.

Tested with synthetic test case.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Henrique de Moraes Holschuh <ibm-acpi@hmh.eng.br>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 15 ++++++---------
 fs/fs-writeback.c                    |  4 +---
 include/linux/freezer.h              |  6 +++---
 include/linux/kthread.h              |  1 +
 kernel/freezer.c                     |  6 ++++--
 kernel/kthread.c                     | 25 +++++++++++++++++++++++++
 mm/backing-dev.c                     |  8 ++------
 7 files changed, 42 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 7b828680b21d..4b11fc91fa7d 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2456,8 +2456,9 @@ static int hotkey_kthread(void *data)
 	u32 poll_mask, event_mask;
 	unsigned int si, so;
 	unsigned long t;
-	unsigned int change_detector, must_reset;
+	unsigned int change_detector;
 	unsigned int poll_freq;
+	bool was_frozen;
 
 	mutex_lock(&hotkey_thread_mutex);
 
@@ -2488,14 +2489,14 @@ static int hotkey_kthread(void *data)
 				t = 100;	/* should never happen... */
 		}
 		t = msleep_interruptible(t);
-		if (unlikely(kthread_should_stop()))
+		if (unlikely(kthread_freezable_should_stop(&was_frozen)))
 			break;
-		must_reset = try_to_freeze();
-		if (t > 0 && !must_reset)
+
+		if (t > 0 && !was_frozen)
 			continue;
 
 		mutex_lock(&hotkey_thread_data_mutex);
-		if (must_reset || hotkey_config_change != change_detector) {
+		if (was_frozen || hotkey_config_change != change_detector) {
 			/* forget old state on thaw or config change */
 			si = so;
 			t = 0;
@@ -2528,10 +2529,6 @@ exit:
 static void hotkey_poll_stop_sync(void)
 {
 	if (tpacpi_hotkey_task) {
-		if (frozen(tpacpi_hotkey_task) ||
-		    freezing(tpacpi_hotkey_task))
-			thaw_process(tpacpi_hotkey_task);
-
 		kthread_stop(tpacpi_hotkey_task);
 		tpacpi_hotkey_task = NULL;
 		mutex_lock(&hotkey_thread_mutex);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 73c3992b2bb4..271fde50f0ee 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -947,7 +947,7 @@ int bdi_writeback_thread(void *data)
 
 	trace_writeback_thread_start(bdi);
 
-	while (!kthread_should_stop()) {
+	while (!kthread_freezable_should_stop(NULL)) {
 		/*
 		 * Remove own delayed wake-up timer, since we are already awake
 		 * and we'll take care of the preriodic write-back.
@@ -977,8 +977,6 @@ int bdi_writeback_thread(void *data)
 			 */
 			schedule();
 		}
-
-		try_to_freeze();
 	}
 
 	/* Flush any work that raced with us exiting */
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 7a9427e9fe47..d02b78448b0f 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -47,7 +47,7 @@ static inline bool should_send_signal(struct task_struct *p)
 /* Takes and releases task alloc lock using task_lock() */
 extern int thaw_process(struct task_struct *p);
 
-extern bool __refrigerator(void);
+extern bool __refrigerator(bool check_kthr_stop);
 extern int freeze_processes(void);
 extern int freeze_kernel_threads(void);
 extern void thaw_processes(void);
@@ -57,7 +57,7 @@ static inline bool try_to_freeze(void)
 	might_sleep();
 	if (likely(!freezing(current)))
 		return false;
-	return __refrigerator();
+	return __refrigerator(false);
 }
 
 extern bool freeze_task(struct task_struct *p, bool sig_only);
@@ -180,7 +180,7 @@ static inline void set_freeze_flag(struct task_struct *p) {}
 static inline void clear_freeze_flag(struct task_struct *p) {}
 static inline int thaw_process(struct task_struct *p) { return 1; }
 
-static inline bool __refrigerator(void) { return false; }
+static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
 static inline int freeze_kernel_threads(void) { return -ENOSYS; }
 static inline void thaw_processes(void) {}
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 5cac19b3a266..0714b24c0e45 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -35,6 +35,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 void kthread_bind(struct task_struct *k, unsigned int cpu);
 int kthread_stop(struct task_struct *k);
 int kthread_should_stop(void);
+bool kthread_freezable_should_stop(bool *was_frozen);
 void *kthread_data(struct task_struct *k);
 
 int kthreadd(void *unused);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 732f14f5944f..b83c30e9483a 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -9,6 +9,7 @@
 #include <linux/export.h>
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
+#include <linux/kthread.h>
 
 /*
  * freezing is complete, mark current process as frozen
@@ -23,7 +24,7 @@ static inline void frozen_process(void)
 }
 
 /* Refrigerator is place where frozen processes are stored :-). */
-bool __refrigerator(void)
+bool __refrigerator(bool check_kthr_stop)
 {
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
@@ -50,7 +51,8 @@ bool __refrigerator(void)
 
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!frozen(current))
+		if (!frozen(current) ||
+		    (check_kthr_stop && kthread_should_stop()))
 			break;
 		was_frozen = true;
 		schedule();
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b6d216a92639..1c36deaae2f1 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -58,6 +58,31 @@ int kthread_should_stop(void)
 }
 EXPORT_SYMBOL(kthread_should_stop);
 
+/**
+ * kthread_freezable_should_stop - should this freezable kthread return now?
+ * @was_frozen: optional out parameter, indicates whether %current was frozen
+ *
+ * kthread_should_stop() for freezable kthreads, which will enter
+ * refrigerator if necessary.  This function is safe from kthread_stop() /
+ * freezer deadlock and freezable kthreads should use this function instead
+ * of calling try_to_freeze() directly.
+ */
+bool kthread_freezable_should_stop(bool *was_frozen)
+{
+	bool frozen = false;
+
+	might_sleep();
+
+	if (unlikely(freezing(current)))
+		frozen = __refrigerator(true);
+
+	if (was_frozen)
+		*was_frozen = frozen;
+
+	return kthread_should_stop();
+}
+EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
+
 /**
  * kthread_data - return data value specified on kthread creation
  * @task: kthread task in question
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 71034f41a2ba..7ba8feae11b8 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -600,14 +600,10 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 
 	/*
 	 * Finally, kill the kernel thread. We don't need to be RCU
-	 * safe anymore, since the bdi is gone from visibility. Force
-	 * unfreeze of the thread before calling kthread_stop(), otherwise
-	 * it would never exet if it is currently stuck in the refrigerator.
+	 * safe anymore, since the bdi is gone from visibility.
 	 */
-	if (bdi->wb.task) {
-		thaw_process(bdi->wb.task);
+	if (bdi->wb.task)
 		kthread_stop(bdi->wb.task);
-	}
 }
 
 /*
-- 
cgit v1.2.3


From a5be2d0d1a8746e7be5210e3d6b904455000443c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: rename thaw_process() to __thaw_task() and simplify the
 implementation

thaw_process() now has only internal users - system and cgroup
freezers.  Remove the unnecessary return value, rename, unexport and
collapse __thaw_process() into it.  This will help further updates to
the freezer code.

-v3: oom_kill grew a use of thaw_process() while this patch was
     pending.  Convert it to use __thaw_task() for now.  In the longer
     term, this should be handled by allowing tasks to die if killed
     even if it's frozen.

-v2: minor style update as suggested by Matt.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Paul Menage <menage@google.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
---
 include/linux/freezer.h |  3 +--
 kernel/cgroup_freezer.c |  7 +++----
 kernel/freezer.c        | 31 ++++++++++++-------------------
 kernel/power/process.c  |  2 +-
 mm/oom_kill.c           |  2 +-
 5 files changed, 18 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index d02b78448b0f..ba4f512d2938 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -45,7 +45,7 @@ static inline bool should_send_signal(struct task_struct *p)
 }
 
 /* Takes and releases task alloc lock using task_lock() */
-extern int thaw_process(struct task_struct *p);
+extern void __thaw_task(struct task_struct *t);
 
 extern bool __refrigerator(bool check_kthr_stop);
 extern int freeze_processes(void);
@@ -178,7 +178,6 @@ static inline int frozen(struct task_struct *p) { return 0; }
 static inline int freezing(struct task_struct *p) { return 0; }
 static inline void set_freeze_flag(struct task_struct *p) {}
 static inline void clear_freeze_flag(struct task_struct *p) {}
-static inline int thaw_process(struct task_struct *p) { return 1; }
 
 static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 5e828a2ca8e6..a6d405a86ee0 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -130,7 +130,7 @@ struct cgroup_subsys freezer_subsys;
  *   write_lock css_set_lock (cgroup iterator start)
  *    task->alloc_lock
  *   read_lock css_set_lock (cgroup iterator start)
- *    task->alloc_lock (inside thaw_process(), prevents race with refrigerator())
+ *    task->alloc_lock (inside __thaw_task(), prevents race with refrigerator())
  *     sighand->siglock
  */
 static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
@@ -300,9 +300,8 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	struct task_struct *task;
 
 	cgroup_iter_start(cgroup, &it);
-	while ((task = cgroup_iter_next(cgroup, &it))) {
-		thaw_process(task);
-	}
+	while ((task = cgroup_iter_next(cgroup, &it)))
+		__thaw_task(task);
 	cgroup_iter_end(cgroup, &it);
 
 	freezer->state = CGROUP_THAWED;
diff --git a/kernel/freezer.c b/kernel/freezer.c
index b83c30e9483a..c851d588e29f 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -145,18 +145,8 @@ void cancel_freezing(struct task_struct *p)
 	}
 }
 
-static int __thaw_process(struct task_struct *p)
-{
-	if (frozen(p)) {
-		p->flags &= ~PF_FROZEN;
-		return 1;
-	}
-	clear_freeze_flag(p);
-	return 0;
-}
-
 /*
- * Wake up a frozen process
+ * Wake up a frozen task
  *
  * task_lock() is needed to prevent the race with refrigerator() which may
  * occur if the freezing of tasks fails.  Namely, without the lock, if the
@@ -164,15 +154,18 @@ static int __thaw_process(struct task_struct *p)
  * refrigerator() could call frozen_process(), in which case the task would be
  * frozen and no one would thaw it.
  */
-int thaw_process(struct task_struct *p)
+void __thaw_task(struct task_struct *p)
 {
+	bool was_frozen;
+
 	task_lock(p);
-	if (__thaw_process(p) == 1) {
-		task_unlock(p);
-		wake_up_process(p);
-		return 1;
-	}
+	was_frozen = frozen(p);
+	if (was_frozen)
+		p->flags &= ~PF_FROZEN;
+	else
+		clear_freeze_flag(p);
 	task_unlock(p);
-	return 0;
+
+	if (was_frozen)
+		wake_up_process(p);
 }
-EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index addbbe5531bc..fe2787207f00 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -186,7 +186,7 @@ static void thaw_tasks(bool nosig_only)
 		if (cgroup_freezing_or_frozen(p))
 			continue;
 
-		thaw_process(p);
+		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 76f2c5ae908e..3134ee2fb2e8 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -328,7 +328,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		 */
 		if (test_tsk_thread_flag(p, TIF_MEMDIE)) {
 			if (unlikely(frozen(p)))
-				thaw_process(p);
+				__thaw_task(p);
 			return ERR_PTR(-1UL);
 		}
 		if (!p->mm)
-- 
cgit v1.2.3


From 376fede80e74d98b49d1ba9ac18f23c9fd026ddd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: kill PF_FREEZING

With the previous changes, there's no meaningful difference between
PF_FREEZING and PF_FROZEN.  Remove PF_FREEZING and use PF_FROZEN
instead in task_contributes_to_load().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched.h | 3 +--
 kernel/freezer.c      | 6 ------
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68daf4f27e2c..d12bd03b688f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -220,7 +220,7 @@ extern char ___assert_task_state[1 - 2*!!(
 			((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
 #define task_contributes_to_load(task)	\
 				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-				 (task->flags & PF_FREEZING) == 0)
+				 (task->flags & PF_FROZEN) == 0)
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
@@ -1773,7 +1773,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
 #define PF_NPROC_EXCEEDED 0x00001000	/* set_user noticed that RLIMIT_NPROC was exceeded */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
-#define PF_FREEZING	0x00004000	/* freeze in progress. do not account to load */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
diff --git a/kernel/freezer.c b/kernel/freezer.c
index a257ecd37c48..b8b562124ba9 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -44,9 +44,6 @@ repeat:
 	recalc_sigpending(); /* We sent fake signal, clean it up */
 	spin_unlock_irq(&current->sighand->siglock);
 
-	/* prevent accounting of that task to load */
-	current->flags |= PF_FREEZING;
-
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!freezing(current) ||
@@ -56,9 +53,6 @@ repeat:
 		schedule();
 	}
 
-	/* Remove the accounting blocker */
-	current->flags &= ~PF_FREEZING;
-
 	/* leave FROZEN */
 	spin_lock_irq(&freezer_lock);
 	if (freezing(current))
-- 
cgit v1.2.3


From 03afed8bc296fa70186ba832c1126228bb992465 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: clean up freeze_processes() failure path

freeze_processes() failure path is rather messy.  Freezing is canceled
for workqueues and tasks which aren't frozen yet but frozen tasks are
left alone and should be thawed by the caller and of course some
callers (xen and kexec) didn't do it.

This patch updates __thaw_task() to handle cancelation correctly and
makes freeze_processes() and freeze_kernel_threads() call
thaw_processes() on failure instead so that the system is fully thawed
on failure.  Unnecessary [suspend_]thaw_processes() calls are removed
from kernel/power/hibernate.c, suspend.c and user.c.

While at it, restructure error checking if clause in suspend_prepare()
to be less weird.

-v2: Srivatsa spotted missing removal of suspend_thaw_processes() in
     suspend_prepare() and error in commit message.  Updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---
 include/linux/freezer.h  |  1 -
 kernel/freezer.c         | 25 +++++++++----------------
 kernel/power/hibernate.c | 15 ++-------------
 kernel/power/process.c   | 16 ++++++++--------
 kernel/power/suspend.c   |  8 +++-----
 kernel/power/user.c      |  4 +---
 6 files changed, 23 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index ba4f512d2938..93f411a52872 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -61,7 +61,6 @@ static inline bool try_to_freeze(void)
 }
 
 extern bool freeze_task(struct task_struct *p, bool sig_only);
-extern void cancel_freezing(struct task_struct *p);
 
 #ifdef CONFIG_CGROUP_FREEZER
 extern int cgroup_freezing_or_frozen(struct task_struct *task);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index b8b562124ba9..11e32d419dec 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -129,21 +129,6 @@ out_unlock:
 	return ret;
 }
 
-void cancel_freezing(struct task_struct *p)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&freezer_lock, flags);
-	if (freezing(p)) {
-		pr_debug("  clean up: %s\n", p->comm);
-		clear_freeze_flag(p);
-		spin_lock(&p->sighand->siglock);
-		recalc_sigpending_and_wake(p);
-		spin_unlock(&p->sighand->siglock);
-	}
-	spin_unlock_irqrestore(&freezer_lock, flags);
-}
-
 void __thaw_task(struct task_struct *p)
 {
 	unsigned long flags;
@@ -153,10 +138,18 @@ void __thaw_task(struct task_struct *p)
 	 * be visible to @p as waking up implies wmb.  Waking up inside
 	 * freezer_lock also prevents wakeups from leaking outside
 	 * refrigerator.
+	 *
+	 * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to
+	 * avoid leaving dangling TIF_SIGPENDING behind.
 	 */
 	spin_lock_irqsave(&freezer_lock, flags);
 	clear_freeze_flag(p);
-	if (frozen(p))
+	if (frozen(p)) {
 		wake_up_process(p);
+	} else {
+		spin_lock(&p->sighand->siglock);
+		recalc_sigpending_and_wake(p);
+		spin_unlock(&p->sighand->siglock);
+	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 196c01268ebd..ba2319ffc860 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -607,17 +607,6 @@ static void power_down(void)
 	while(1);
 }
 
-static int prepare_processes(void)
-{
-	int error = 0;
-
-	if (freeze_processes()) {
-		error = -EBUSY;
-		thaw_processes();
-	}
-	return error;
-}
-
 /**
  * hibernate - Carry out system hibernation, including saving the image.
  */
@@ -650,7 +639,7 @@ int hibernate(void)
 	sys_sync();
 	printk("done.\n");
 
-	error = prepare_processes();
+	error = freeze_processes();
 	if (error)
 		goto Finish;
 
@@ -811,7 +800,7 @@ static int software_resume(void)
 		goto close_finish;
 
 	pr_debug("PM: Preparing processes for restore.\n");
-	error = prepare_processes();
+	error = freeze_processes();
 	if (error) {
 		swsusp_close(FMODE_READ);
 		goto Done;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index e59676f5811d..ce643838a00c 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -91,11 +91,6 @@ static int try_to_freeze_tasks(bool sig_only)
 	elapsed_csecs = elapsed_csecs64;
 
 	if (todo) {
-		/* This does not unfreeze processes that are already frozen
-		 * (we have slightly ugly calling convention in that respect,
-		 * and caller must call thaw_processes() if something fails),
-		 * but it cleans up leftover PF_FREEZE requests.
-		 */
 		printk("\n");
 		printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
 		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
@@ -103,14 +98,11 @@ static int try_to_freeze_tasks(bool sig_only)
 		       elapsed_csecs / 100, elapsed_csecs % 100,
 		       todo - wq_busy, wq_busy);
 
-		thaw_workqueues();
-
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			if (!wakeup && !freezer_should_skip(p) &&
 			    freezing(p) && !frozen(p))
 				sched_show_task(p);
-			cancel_freezing(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 	} else {
@@ -123,6 +115,8 @@ static int try_to_freeze_tasks(bool sig_only)
 
 /**
  * freeze_processes - Signal user space processes to enter the refrigerator.
+ *
+ * On success, returns 0.  On failure, -errno and system is fully thawed.
  */
 int freeze_processes(void)
 {
@@ -137,11 +131,15 @@ int freeze_processes(void)
 	printk("\n");
 	BUG_ON(in_atomic());
 
+	if (error)
+		thaw_processes();
 	return error;
 }
 
 /**
  * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
+ *
+ * On success, returns 0.  On failure, -errno and system is fully thawed.
  */
 int freeze_kernel_threads(void)
 {
@@ -155,6 +153,8 @@ int freeze_kernel_threads(void)
 	printk("\n");
 	BUG_ON(in_atomic());
 
+	if (error)
+		thaw_processes();
 	return error;
 }
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4953dc054c53..d336b27d1104 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -106,13 +106,11 @@ static int suspend_prepare(void)
 		goto Finish;
 
 	error = suspend_freeze_processes();
-	if (error) {
-		suspend_stats.failed_freeze++;
-		dpm_save_failed_step(SUSPEND_FREEZE);
-	} else
+	if (!error)
 		return 0;
 
-	suspend_thaw_processes();
+	suspend_stats.failed_freeze++;
+	dpm_save_failed_step(SUSPEND_FREEZE);
 	usermodehelper_enable();
  Finish:
 	pm_notifier_call_chain(PM_POST_SUSPEND);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6d8f535c2b88..7cc3f5bc5c24 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -257,10 +257,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			break;
 
 		error = freeze_processes();
-		if (error) {
-			thaw_processes();
+		if (error)
 			usermodehelper_enable();
-		}
 		if (!error)
 			data->frozen = 1;
 		break;
-- 
cgit v1.2.3


From 22b4e111fa01a1147aa562ceaf18a752a928ef4e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: cgroup_freezer: prepare for removal of TIF_FREEZE

TIF_FREEZE will be removed soon and freezing() will directly test
whether any freezing condition is in effect.  Make the following
changes in preparation.

* Rename cgroup_freezing_or_frozen() to cgroup_freezing() and make it
  return bool.

* Make cgroup_freezing() access task_freezer() under rcu read lock
  instead of task_lock().  This makes the state dereferencing racy
  against task moving to another cgroup; however, it was already racy
  without this change as ->state dereference wasn't synchronized.
  This will be later dealt with using attach hooks.

* freezer->state is now set before trying to push tasks into the
  target state.

-v2: Oleg pointed out that freeze_change_state() was setting
     freeze->state incorrectly to CGROUP_FROZEN instead of
     CGROUP_FREEZING.  Fixed.

-v3: Matt pointed out that setting CGROUP_FROZEN used to always invoke
     try_to_freeze_cgroup() regardless of the current state.  Patch
     updated such that the actual freeze/thaw operations are always
     performed on invocation.  This shouldn't make any difference
     unless something is broken.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Paul Menage <paul@paulmenage.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/freezer.h |  6 +++---
 kernel/cgroup_freezer.c | 40 +++++++++++++---------------------------
 kernel/power/process.c  |  2 +-
 3 files changed, 17 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 93f411a52872..b2b4abc5a739 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -63,11 +63,11 @@ static inline bool try_to_freeze(void)
 extern bool freeze_task(struct task_struct *p, bool sig_only);
 
 #ifdef CONFIG_CGROUP_FREEZER
-extern int cgroup_freezing_or_frozen(struct task_struct *task);
+extern bool cgroup_freezing(struct task_struct *task);
 #else /* !CONFIG_CGROUP_FREEZER */
-static inline int cgroup_freezing_or_frozen(struct task_struct *task)
+static inline bool cgroup_freezing(struct task_struct *task)
 {
-	return 0;
+	return false;
 }
 #endif /* !CONFIG_CGROUP_FREEZER */
 
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index cd27b0825560..e6a1b8d1b8bc 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -48,19 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task)
 			    struct freezer, css);
 }
 
-static inline int __cgroup_freezing_or_frozen(struct task_struct *task)
+bool cgroup_freezing(struct task_struct *task)
 {
-	enum freezer_state state = task_freezer(task)->state;
-	return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
-}
+	enum freezer_state state;
+	bool ret;
 
-int cgroup_freezing_or_frozen(struct task_struct *task)
-{
-	int result;
-	task_lock(task);
-	result = __cgroup_freezing_or_frozen(task);
-	task_unlock(task);
-	return result;
+	rcu_read_lock();
+	state = task_freezer(task)->state;
+	ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN;
+	rcu_read_unlock();
+
+	return ret;
 }
 
 /*
@@ -102,9 +100,6 @@ struct cgroup_subsys freezer_subsys;
  * freezer_can_attach():
  * cgroup_mutex (held by caller of can_attach)
  *
- * cgroup_freezing_or_frozen():
- * task->alloc_lock (to get task's cgroup)
- *
  * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
  * freezer->lock
  *  sighand->siglock (if the cgroup is freezing)
@@ -177,13 +172,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
 
 static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
-	rcu_read_lock();
-	if (__cgroup_freezing_or_frozen(tsk)) {
-		rcu_read_unlock();
-		return -EBUSY;
-	}
-	rcu_read_unlock();
-	return 0;
+	return cgroup_freezing(tsk) ? -EBUSY : 0;
 }
 
 static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
@@ -279,7 +268,6 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	struct task_struct *task;
 	unsigned int num_cant_freeze_now = 0;
 
-	freezer->state = CGROUP_FREEZING;
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
 		if (!freeze_task(task, true))
@@ -303,8 +291,6 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	while ((task = cgroup_iter_next(cgroup, &it)))
 		__thaw_task(task);
 	cgroup_iter_end(cgroup, &it);
-
-	freezer->state = CGROUP_THAWED;
 }
 
 static int freezer_change_state(struct cgroup *cgroup,
@@ -318,20 +304,20 @@ static int freezer_change_state(struct cgroup *cgroup,
 	spin_lock_irq(&freezer->lock);
 
 	update_if_frozen(cgroup, freezer);
-	if (goal_state == freezer->state)
-		goto out;
 
 	switch (goal_state) {
 	case CGROUP_THAWED:
+		freezer->state = CGROUP_THAWED;
 		unfreeze_cgroup(cgroup, freezer);
 		break;
 	case CGROUP_FROZEN:
+		freezer->state = CGROUP_FREEZING;
 		retval = try_to_freeze_cgroup(cgroup, freezer);
 		break;
 	default:
 		BUG();
 	}
-out:
+
 	spin_unlock_irq(&freezer->lock);
 
 	return retval;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index ce643838a00c..9f6f5c755cfa 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -170,7 +170,7 @@ void thaw_processes(void)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		if (cgroup_freezing_or_frozen(p))
+		if (cgroup_freezing(p))
 			continue;
 
 		__thaw_task(p);
-- 
cgit v1.2.3


From a3201227f803ad7fd43180c5195dbe5a2bf998aa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: make freezing() test freeze conditions in effect instead of
 TIF_FREEZE

Using TIF_FREEZE for freezing worked when there was only single
freezing condition (the PM one); however, now there is also the
cgroup_freezer and single bit flag is getting clumsy.
thaw_processes() is already testing whether cgroup freezing in in
effect to avoid thawing tasks which were frozen by both PM and cgroup
freezers.

This is racy (nothing prevents race against cgroup freezing) and
fragile.  A much simpler way is to test actual freeze conditions from
freezing() - ie. directly test whether PM or cgroup freezing is in
effect.

This patch adds variables to indicate whether and what type of
freezing conditions are in effect and reimplements freezing() such
that it directly tests whether any of the two freezing conditions is
active and the task should freeze.  On fast path, freezing() is still
very cheap - it only tests system_freezing_cnt.

This makes the clumsy dancing aroung TIF_FREEZE unnecessary and
freeze/thaw operations more usual - updating state variables for the
new state and nudging target tasks so that they notice the new state
and comply.  As long as the nudging happens after state update, it's
race-free.

* This allows use of freezing() in freeze_task().  Replace the open
  coded tests with freezing().

* p != current test is added to warning printing conditions in
  try_to_freeze_tasks() failure path.  This is necessary as freezing()
  is now true for the task which initiated freezing too.

-v2: Oleg pointed out that re-freezing FROZEN cgroup could increment
     system_freezing_cnt.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Paul Menage <paul@paulmenage.org>  (for the cgroup portions)
---
 include/linux/freezer.h | 33 ++++++++++----------------
 kernel/cgroup_freezer.c | 10 +++++++-
 kernel/fork.c           |  1 -
 kernel/freezer.c        | 62 +++++++++++++++++++++++++++++++------------------
 kernel/power/process.c  | 15 ++++++++----
 5 files changed, 72 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index b2b4abc5a739..8e29f2b7ce11 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -5,8 +5,13 @@
 
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_FREEZER
+extern atomic_t system_freezing_cnt;	/* nr of freezing conds in effect */
+extern bool pm_freezing;		/* PM freezing in effect */
+extern bool pm_nosig_freezing;		/* PM nosig freezing in effect */
+
 /*
  * Check if a process has been frozen
  */
@@ -15,28 +20,16 @@ static inline int frozen(struct task_struct *p)
 	return p->flags & PF_FROZEN;
 }
 
-/*
- * Check if there is a request to freeze a process
- */
-static inline int freezing(struct task_struct *p)
-{
-	return test_tsk_thread_flag(p, TIF_FREEZE);
-}
+extern bool freezing_slow_path(struct task_struct *p);
 
 /*
- * Request that a process be frozen
- */
-static inline void set_freeze_flag(struct task_struct *p)
-{
-	set_tsk_thread_flag(p, TIF_FREEZE);
-}
-
-/*
- * Sometimes we may need to cancel the previous 'freeze' request
+ * Check if there is a request to freeze a process
  */
-static inline void clear_freeze_flag(struct task_struct *p)
+static inline bool freezing(struct task_struct *p)
 {
-	clear_tsk_thread_flag(p, TIF_FREEZE);
+	if (likely(!atomic_read(&system_freezing_cnt)))
+		return false;
+	return freezing_slow_path(p);
 }
 
 static inline bool should_send_signal(struct task_struct *p)
@@ -174,9 +167,7 @@ static inline void set_freezable_with_signal(void)
 })
 #else /* !CONFIG_FREEZER */
 static inline int frozen(struct task_struct *p) { return 0; }
-static inline int freezing(struct task_struct *p) { return 0; }
-static inline void set_freeze_flag(struct task_struct *p) {}
-static inline void clear_freeze_flag(struct task_struct *p) {}
+static inline bool freezing(struct task_struct *p) { return false; }
 
 static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e6a1b8d1b8bc..2327ad11725f 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -145,7 +145,11 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
 static void freezer_destroy(struct cgroup_subsys *ss,
 			    struct cgroup *cgroup)
 {
-	kfree(cgroup_freezer(cgroup));
+	struct freezer *freezer = cgroup_freezer(cgroup);
+
+	if (freezer->state != CGROUP_THAWED)
+		atomic_dec(&system_freezing_cnt);
+	kfree(freezer);
 }
 
 /*
@@ -307,10 +311,14 @@ static int freezer_change_state(struct cgroup *cgroup,
 
 	switch (goal_state) {
 	case CGROUP_THAWED:
+		if (freezer->state != CGROUP_THAWED)
+			atomic_dec(&system_freezing_cnt);
 		freezer->state = CGROUP_THAWED;
 		unfreeze_cgroup(cgroup, freezer);
 		break;
 	case CGROUP_FROZEN:
+		if (freezer->state == CGROUP_THAWED)
+			atomic_inc(&system_freezing_cnt);
 		freezer->state = CGROUP_FREEZING;
 		retval = try_to_freeze_cgroup(cgroup, freezer);
 		break;
diff --git a/kernel/fork.c b/kernel/fork.c
index ba0d17261329..d53316e88d9d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -997,7 +997,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 	new_flags |= PF_FORKNOEXEC;
 	new_flags |= PF_STARTING;
 	p->flags = new_flags;
-	clear_freeze_flag(p);
 }
 
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 11e32d419dec..f53cd5aa5b2e 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -11,9 +11,41 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 
+/* total number of freezing conditions in effect */
+atomic_t system_freezing_cnt = ATOMIC_INIT(0);
+EXPORT_SYMBOL(system_freezing_cnt);
+
+/* indicate whether PM freezing is in effect, protected by pm_mutex */
+bool pm_freezing;
+bool pm_nosig_freezing;
+
 /* protects freezing and frozen transitions */
 static DEFINE_SPINLOCK(freezer_lock);
 
+/**
+ * freezing_slow_path - slow path for testing whether a task needs to be frozen
+ * @p: task to be tested
+ *
+ * This function is called by freezing() if system_freezing_cnt isn't zero
+ * and tests whether @p needs to enter and stay in frozen state.  Can be
+ * called under any context.  The freezers are responsible for ensuring the
+ * target tasks see the updated state.
+ */
+bool freezing_slow_path(struct task_struct *p)
+{
+	if (p->flags & PF_NOFREEZE)
+		return false;
+
+	if (pm_nosig_freezing || cgroup_freezing(p))
+		return true;
+
+	if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(freezing_slow_path);
+
 /* Refrigerator is place where frozen processes are stored :-). */
 bool __refrigerator(bool check_kthr_stop)
 {
@@ -23,17 +55,11 @@ bool __refrigerator(bool check_kthr_stop)
 	long save;
 
 	/*
-	 * Enter FROZEN.  If NOFREEZE, schedule immediate thawing by
-	 * clearing freezing.
+	 * No point in checking freezing() again - the caller already did.
+	 * Proceed to enter FROZEN.
 	 */
 	spin_lock_irq(&freezer_lock);
 repeat:
-	if (!freezing(current)) {
-		spin_unlock_irq(&freezer_lock);
-		return was_frozen;
-	}
-	if (current->flags & PF_NOFREEZE)
-		clear_freeze_flag(current);
 	current->flags |= PF_FROZEN;
 	spin_unlock_irq(&freezer_lock);
 
@@ -99,18 +125,12 @@ static void fake_signal_wake_up(struct task_struct *p)
 bool freeze_task(struct task_struct *p, bool sig_only)
 {
 	unsigned long flags;
-	bool ret = false;
 
 	spin_lock_irqsave(&freezer_lock, flags);
-
-	if ((p->flags & PF_NOFREEZE) ||
-	    (sig_only && !should_send_signal(p)))
-		goto out_unlock;
-
-	if (frozen(p))
-		goto out_unlock;
-
-	set_freeze_flag(p);
+	if (!freezing(p) || frozen(p)) {
+		spin_unlock_irqrestore(&freezer_lock, flags);
+		return false;
+	}
 
 	if (should_send_signal(p)) {
 		fake_signal_wake_up(p);
@@ -123,10 +143,9 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 	} else {
 		wake_up_state(p, TASK_INTERRUPTIBLE);
 	}
-	ret = true;
-out_unlock:
+
 	spin_unlock_irqrestore(&freezer_lock, flags);
-	return ret;
+	return true;
 }
 
 void __thaw_task(struct task_struct *p)
@@ -143,7 +162,6 @@ void __thaw_task(struct task_struct *p)
 	 * avoid leaving dangling TIF_SIGPENDING behind.
 	 */
 	spin_lock_irqsave(&freezer_lock, flags);
-	clear_freeze_flag(p);
 	if (frozen(p)) {
 		wake_up_process(p);
 	} else {
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 9f6f5c755cfa..0beb51e1dec9 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -101,7 +101,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			if (!wakeup && !freezer_should_skip(p) &&
-			    freezing(p) && !frozen(p))
+			    p != current && freezing(p) && !frozen(p))
 				sched_show_task(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
@@ -122,7 +122,11 @@ int freeze_processes(void)
 {
 	int error;
 
+	if (!pm_freezing)
+		atomic_inc(&system_freezing_cnt);
+
 	printk("Freezing user space processes ... ");
+	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
 	if (!error) {
 		printk("done.");
@@ -146,6 +150,7 @@ int freeze_kernel_threads(void)
 	int error;
 
 	printk("Freezing remaining freezable tasks ... ");
+	pm_nosig_freezing = true;
 	error = try_to_freeze_tasks(false);
 	if (!error)
 		printk("done.");
@@ -162,6 +167,11 @@ void thaw_processes(void)
 {
 	struct task_struct *g, *p;
 
+	if (pm_freezing)
+		atomic_dec(&system_freezing_cnt);
+	pm_freezing = false;
+	pm_nosig_freezing = false;
+
 	oom_killer_enable();
 
 	printk("Restarting tasks ... ");
@@ -170,9 +180,6 @@ void thaw_processes(void)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		if (cgroup_freezing(p))
-			continue;
-
 		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
-- 
cgit v1.2.3


From 948246f70a811c872b9d93bb4a8ab5823c4c79e0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: remove should_send_signal() and update frozen()

should_send_signal() is only used in freezer.c.  Exporting them only
increases chance of abuse.  Open code the two users and remove it.

Update frozen() to return bool.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/freezer.h | 9 ++-------
 kernel/freezer.c        | 2 +-
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 8e29f2b7ce11..3d50913d39d0 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -15,7 +15,7 @@ extern bool pm_nosig_freezing;		/* PM nosig freezing in effect */
 /*
  * Check if a process has been frozen
  */
-static inline int frozen(struct task_struct *p)
+static inline bool frozen(struct task_struct *p)
 {
 	return p->flags & PF_FROZEN;
 }
@@ -32,11 +32,6 @@ static inline bool freezing(struct task_struct *p)
 	return freezing_slow_path(p);
 }
 
-static inline bool should_send_signal(struct task_struct *p)
-{
-	return !(p->flags & PF_FREEZER_NOSIG);
-}
-
 /* Takes and releases task alloc lock using task_lock() */
 extern void __thaw_task(struct task_struct *t);
 
@@ -166,7 +161,7 @@ static inline void set_freezable_with_signal(void)
 	__retval;							\
 })
 #else /* !CONFIG_FREEZER */
-static inline int frozen(struct task_struct *p) { return 0; }
+static inline bool frozen(struct task_struct *p) { return false; }
 static inline bool freezing(struct task_struct *p) { return false; }
 
 static inline bool __refrigerator(bool check_kthr_stop) { return false; }
diff --git a/kernel/freezer.c b/kernel/freezer.c
index f53cd5aa5b2e..95a123844241 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -132,7 +132,7 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 		return false;
 	}
 
-	if (should_send_signal(p)) {
+	if (!(p->flags & PF_FREEZER_NOSIG)) {
 		fake_signal_wake_up(p);
 		/*
 		 * fake_signal_wake_up() goes through p's scheduler
-- 
cgit v1.2.3


From 96ee6d8539c9fc6742908d85eb9723abb5c91854 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: fix set_freezable[_with_signal]() race

A kthread doing set_freezable*() may race with on-going PM freeze and
the freezer might think all tasks are frozen while the new freezable
kthread is merrily proceeding to execute code paths which aren't
supposed to be executing during PM freeze.

Reimplement set_freezable[_with_signal]() using __set_freezable() such
that freezable PF flags are modified under freezer_lock and
try_to_freeze() is called afterwards.  This eliminates race condition
against freezing.

Note: Separated out from larger patch to resolve fix order dependency
      Oleg pointed out.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/freezer.h |  9 +++++----
 kernel/freezer.c        | 25 +++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 3d50913d39d0..a0f1b3a3604f 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -49,6 +49,7 @@ static inline bool try_to_freeze(void)
 }
 
 extern bool freeze_task(struct task_struct *p, bool sig_only);
+extern bool __set_freezable(bool with_signal);
 
 #ifdef CONFIG_CGROUP_FREEZER
 extern bool cgroup_freezing(struct task_struct *task);
@@ -106,18 +107,18 @@ static inline int freezer_should_skip(struct task_struct *p)
 /*
  * Tell the freezer that the current task should be frozen by it
  */
-static inline void set_freezable(void)
+static inline bool set_freezable(void)
 {
-	current->flags &= ~PF_NOFREEZE;
+	return __set_freezable(false);
 }
 
 /*
  * Tell the freezer that the current task should be frozen by it and that it
  * should send a fake signal to the task to freeze it.
  */
-static inline void set_freezable_with_signal(void)
+static inline bool set_freezable_with_signal(void)
 {
-	current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG);
+	return __set_freezable(true);
 }
 
 /*
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 95a123844241..b1e7a7b3d2cd 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -171,3 +171,28 @@ void __thaw_task(struct task_struct *p)
 	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
+
+/**
+ * __set_freezable - make %current freezable
+ * @with_signal: do we want %TIF_SIGPENDING for notification too?
+ *
+ * Mark %current freezable and enter refrigerator if necessary.
+ */
+bool __set_freezable(bool with_signal)
+{
+	might_sleep();
+
+	/*
+	 * Modify flags while holding freezer_lock.  This ensures the
+	 * freezer notices that we aren't frozen yet or the freezing
+	 * condition is visible to try_to_freeze() below.
+	 */
+	spin_lock_irq(&freezer_lock);
+	current->flags &= ~PF_NOFREEZE;
+	if (with_signal)
+		current->flags &= ~PF_FREEZER_NOSIG;
+	spin_unlock_irq(&freezer_lock);
+
+	return try_to_freeze();
+}
+EXPORT_SYMBOL(__set_freezable);
-- 
cgit v1.2.3


From 839e3407d90a810318d17c17ceb3d5928a910704 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:26 -0800
Subject: freezer: remove unused @sig_only from freeze_task()

After "freezer: make freezing() test freeze conditions in effect
instead of TIF_FREEZE", freezing() returns authoritative answer on
whether the current task should freeze or not and freeze_task()
doesn't need or use @sig_only.  Remove it.

While at it, rewrite function comment for freeze_task() and rename
@sig_only to @user_only in try_to_freeze_tasks().

This patch doesn't cause any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/freezer.h |  2 +-
 kernel/cgroup_freezer.c |  4 ++--
 kernel/freezer.c        | 21 +++++++++------------
 kernel/power/process.c  |  8 ++++----
 4 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index a0f1b3a3604f..a28842e588f4 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -48,7 +48,7 @@ static inline bool try_to_freeze(void)
 	return __refrigerator(false);
 }
 
-extern bool freeze_task(struct task_struct *p, bool sig_only);
+extern bool freeze_task(struct task_struct *p);
 extern bool __set_freezable(bool with_signal);
 
 #ifdef CONFIG_CGROUP_FREEZER
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 2327ad11725f..e411a60cc2c8 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -206,7 +206,7 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
 
 	/* Locking avoids race with FREEZING -> THAWED transitions. */
 	if (freezer->state == CGROUP_FREEZING)
-		freeze_task(task, true);
+		freeze_task(task);
 	spin_unlock_irq(&freezer->lock);
 }
 
@@ -274,7 +274,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
-		if (!freeze_task(task, true))
+		if (!freeze_task(task))
 			continue;
 		if (frozen(task))
 			continue;
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 389549f0a94e..2589a61de44c 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -100,20 +100,17 @@ static void fake_signal_wake_up(struct task_struct *p)
 }
 
 /**
- *	freeze_task - send a freeze request to given task
- *	@p: task to send the request to
- *	@sig_only: if set, the request will only be sent if the task has the
- *		PF_FREEZER_NOSIG flag unset
- *	Return value: 'false', if @sig_only is set and the task has
- *		PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ * freeze_task - send a freeze request to given task
+ * @p: task to send the request to
  *
- *	The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- *	either sending a fake signal to it or waking it up, depending on whether
- *	or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
- *	has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- *	TIF_FREEZE flag will not be set.
+ * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE
+ * flag and either sending a fake signal to it or waking it up, depending
+ * on whether it has %PF_FREEZER_NOSIG set.
+ *
+ * RETURNS:
+ * %false, if @p is not freezing or already frozen; %true, otherwise
  */
-bool freeze_task(struct task_struct *p, bool sig_only)
+bool freeze_task(struct task_struct *p)
 {
 	unsigned long flags;
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0beb51e1dec9..77274c9ba2f1 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -22,7 +22,7 @@
  */
 #define TIMEOUT	(20 * HZ)
 
-static int try_to_freeze_tasks(bool sig_only)
+static int try_to_freeze_tasks(bool user_only)
 {
 	struct task_struct *g, *p;
 	unsigned long end_time;
@@ -37,14 +37,14 @@ static int try_to_freeze_tasks(bool sig_only)
 
 	end_time = jiffies + TIMEOUT;
 
-	if (!sig_only)
+	if (!user_only)
 		freeze_workqueues_begin();
 
 	while (true) {
 		todo = 0;
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			if (p == current || !freeze_task(p, sig_only))
+			if (p == current || !freeze_task(p))
 				continue;
 
 			/*
@@ -65,7 +65,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 
-		if (!sig_only) {
+		if (!user_only) {
 			wq_busy = freeze_workqueues_busy();
 			todo += wq_busy;
 		}
-- 
cgit v1.2.3


From 8b60b07805d557542160d852874fa6a1b969184e Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@qca.qualcomm.com>
Date: Tue, 11 Oct 2011 10:59:02 -0700
Subject: cfg80211: process regulatory DFS region for countries

The wireless-regdb now has support for mapping a country to
one DFS region. CRDA sends this to us now so process it
so we can provide that hint to drivers. This will later be
used by code for processing DFS in a way that meets the
criteria for the DFS region the country belongs to.

Signed-off-by: Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h  | 21 +++++++++++++++++++++
 include/net/regulatory.h |  1 +
 net/wireless/nl80211.c   | 15 +++++++++++++++
 net/wireless/reg.c       | 37 +++++++++++++++++++++++++++++++++++++
 net/wireless/reg.h       |  1 +
 5 files changed, 75 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f9261c253735..6396819a7e41 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1170,6 +1170,10 @@ enum nl80211_commands {
  *	probe-response frame. The DA field in the 802.11 header is zero-ed out,
  *	to be filled by the FW.
  *
+ * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country
+ *    abides to when initiating radiation on DFS channels. A country maps
+ *    to one DFS region.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1408,6 +1412,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PROBE_RESP,
 
+	NL80211_ATTR_DFS_REGION,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1916,6 +1922,21 @@ enum nl80211_reg_rule_flags {
 	NL80211_RRF_NO_IBSS		= 1<<8,
 };
 
+/**
+ * enum nl80211_dfs_regions - regulatory DFS regions
+ *
+ * @NL80211_DFS_UNSET: Country has no DFS master region specified
+ * @NL80211_DFS_FCC_: Country follows DFS master rules from FCC
+ * @NL80211_DFS_FCC_: Country follows DFS master rules from ETSI
+ * @NL80211_DFS_JP_: Country follows DFS master rules from JP/MKK/Telec
+ */
+enum nl80211_dfs_regions {
+	NL80211_DFS_UNSET	= 0,
+	NL80211_DFS_FCC		= 1,
+	NL80211_DFS_ETSI	= 2,
+	NL80211_DFS_JP		= 3,
+};
+
 /**
  * enum nl80211_survey_info - survey information
  *
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index eb7d3c2d4274..7399c93cb4bc 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -93,6 +93,7 @@ struct ieee80211_reg_rule {
 struct ieee80211_regdomain {
 	u32 n_reg_rules;
 	char alpha2[2];
+	u8 dfs_region;
 	struct ieee80211_reg_rule reg_rules[];
 };
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6bc7c4b32fa5..50482e129263 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -199,6 +199,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG },
 	[NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY,
 				      .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_DFS_REGION] = { .type = NLA_U8 },
 };
 
 /* policy for the key attributes */
@@ -3382,6 +3383,9 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 
 	NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2,
 		cfg80211_regdomain->alpha2);
+	if (cfg80211_regdomain->dfs_region)
+		NLA_PUT_U8(msg, NL80211_ATTR_DFS_REGION,
+			   cfg80211_regdomain->dfs_region);
 
 	nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES);
 	if (!nl_reg_rules)
@@ -3440,6 +3444,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 	char *alpha2 = NULL;
 	int rem_reg_rules = 0, r = 0;
 	u32 num_rules = 0, rule_idx = 0, size_of_regd;
+	u8 dfs_region = 0;
 	struct ieee80211_regdomain *rd = NULL;
 
 	if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
@@ -3450,6 +3455,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 
 	alpha2 = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
 
+	if (info->attrs[NL80211_ATTR_DFS_REGION])
+		dfs_region = nla_get_u8(info->attrs[NL80211_ATTR_DFS_REGION]);
+
 	nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
 			rem_reg_rules) {
 		num_rules++;
@@ -3477,6 +3485,13 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 	rd->alpha2[0] = alpha2[0];
 	rd->alpha2[1] = alpha2[1];
 
+	/*
+	 * Disable DFS master mode if the DFS region was
+	 * not supported or known on this kernel.
+	 */
+	if (reg_supported_dfs_region(dfs_region))
+		rd->dfs_region = dfs_region;
+
 	nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
 			rem_reg_rules) {
 		nla_parse(tb, NL80211_REG_RULE_ATTR_MAX,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2520a1b7e7db..69141ed1f6df 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1946,6 +1946,42 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 	}
 }
 
+bool reg_supported_dfs_region(u8 dfs_region)
+{
+	switch (dfs_region) {
+	case NL80211_DFS_UNSET:
+	case NL80211_DFS_FCC:
+	case NL80211_DFS_ETSI:
+	case NL80211_DFS_JP:
+		return true;
+	default:
+		REG_DBG_PRINT("Ignoring uknown DFS master region: %d\n",
+			      dfs_region);
+		return false;
+	}
+}
+
+static void print_dfs_region(u8 dfs_region)
+{
+	if (!dfs_region)
+		return;
+
+	switch (dfs_region) {
+	case NL80211_DFS_FCC:
+		pr_info(" DFS Master region FCC");
+		break;
+	case NL80211_DFS_ETSI:
+		pr_info(" DFS Master region ETSI");
+		break;
+	case NL80211_DFS_JP:
+		pr_info(" DFS Master region JP");
+		break;
+	default:
+		pr_info(" DFS Master region Uknown");
+		break;
+	}
+}
+
 static void print_regdomain(const struct ieee80211_regdomain *rd)
 {
 
@@ -1973,6 +2009,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
 			pr_info("Regulatory domain changed to country: %c%c\n",
 				rd->alpha2[0], rd->alpha2[1]);
 	}
+	print_dfs_region(rd->dfs_region);
 	print_rd_rules(rd);
 }
 
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 4a56799d868d..786e414afd91 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -5,6 +5,7 @@ extern const struct ieee80211_regdomain *cfg80211_regdomain;
 
 bool is_world_regdom(const char *alpha2);
 bool reg_is_valid_request(const char *alpha2);
+bool reg_supported_dfs_region(u8 dfs_region);
 
 int regulatory_hint_user(const char *alpha2);
 
-- 
cgit v1.2.3


From 7e7c8926b2f4e3453b8aeb39cd814d2af3fec24f Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Fri, 18 Nov 2011 11:31:59 -0800
Subject: wireless: Support ht-capabilities over-rides.

This allows users to disable features such as HT, HT40,
and to modify the MCS, AMPDU, and AMSDU settings for
drivers that support it.

The MCS, AMPDU, and AMSDU features that may be disabled are
are reported in the phy-info netlink message as a mask.

Attemping to disable features that are not supported will
take no affect, but will not return errors.  This is to aid
backwards compatibility in user-space apps that may not be
clever enough to deal with parsing the the capabilities mask.

This patch only enables the infrastructure.  An additional
patch will enable the feature in mac80211.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 14 ++++++++++++++
 include/net/cfg80211.h  | 27 +++++++++++++++++++++++++++
 net/wireless/core.h     | 10 ++++++++--
 net/wireless/mlme.c     | 37 ++++++++++++++++++++++++++++++++++---
 net/wireless/nl80211.c  | 44 +++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/sme.c      |  7 ++++++-
 6 files changed, 132 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 6396819a7e41..97bfebfcce90 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1169,6 +1169,17 @@ enum nl80211_commands {
  * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire
  *	probe-response frame. The DA field in the 802.11 header is zero-ed out,
  *	to be filled by the FW.
+ * @NL80211_ATTR_DISABLE_HT:  Force HT capable interfaces to disable
+ *      this feature.  Currently, only supported in mac80211 drivers.
+ * @NL80211_ATTR_HT_CAPABILITY_MASK: Specify which bits of the
+ *      ATTR_HT_CAPABILITY to which attention should be paid.
+ *      Currently, only mac80211 NICs support this feature.
+ *      The values that may be configured are:
+ *       MCS rates, MAX-AMSDU, HT-20-40 and HT_CAP_SGI_40
+ *       AMPDU density and AMPDU factor.
+ *      All values are treated as suggestions and may be ignored
+ *      by the driver as required.  The actual values may be seen in
+ *      the station debugfs ht_caps file.
  *
  * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country
  *    abides to when initiating radiation on DFS channels. A country maps
@@ -1414,6 +1425,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_DFS_REGION,
 
+	NL80211_ATTR_DISABLE_HT,
+	NL80211_ATTR_HT_CAPABILITY_MASK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6a1d849c597a..d5e18913f293 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1043,6 +1043,15 @@ struct cfg80211_auth_request {
 	bool local_state_change;
 };
 
+/**
+ * enum cfg80211_assoc_req_flags - Over-ride default behaviour in association.
+ *
+ * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
+ */
+enum cfg80211_assoc_req_flags {
+	ASSOC_REQ_DISABLE_HT		= BIT(0),
+};
+
 /**
  * struct cfg80211_assoc_request - (Re)Association request data
  *
@@ -1054,6 +1063,10 @@ struct cfg80211_auth_request {
  * @use_mfp: Use management frame protection (IEEE 802.11w) in this association
  * @crypto: crypto settings
  * @prev_bssid: previous BSSID, if not %NULL use reassociate frame
+ * @flags:  See &enum cfg80211_assoc_req_flags
+ * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
+ *   will be used in ht_capa.  Un-supported values will be ignored.
+ * @ht_capa_mask:  The bits of ht_capa which are to be used.
  */
 struct cfg80211_assoc_request {
 	struct cfg80211_bss *bss;
@@ -1061,6 +1074,9 @@ struct cfg80211_assoc_request {
 	size_t ie_len;
 	struct cfg80211_crypto_settings crypto;
 	bool use_mfp;
+	u32 flags;
+	struct ieee80211_ht_cap ht_capa;
+	struct ieee80211_ht_cap ht_capa_mask;
 };
 
 /**
@@ -1159,6 +1175,10 @@ struct cfg80211_ibss_params {
  * @key_len: length of WEP key for shared key authentication
  * @key_idx: index of WEP key for shared key authentication
  * @key: WEP key for shared key authentication
+ * @flags:  See &enum cfg80211_assoc_req_flags
+ * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
+ *   will be used in ht_capa.  Un-supported values will be ignored.
+ * @ht_capa_mask:  The bits of ht_capa which are to be used.
  */
 struct cfg80211_connect_params {
 	struct ieee80211_channel *channel;
@@ -1172,6 +1192,9 @@ struct cfg80211_connect_params {
 	struct cfg80211_crypto_settings crypto;
 	const u8 *key;
 	u8 key_len, key_idx;
+	u32 flags;
+	struct ieee80211_ht_cap ht_capa;
+	struct ieee80211_ht_cap ht_capa_mask;
 };
 
 /**
@@ -1938,6 +1961,8 @@ struct wiphy_wowlan_support {
  * @wowlan: WoWLAN support information
  *
  * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features.
+ * @ht_capa_mod_mask:  Specify what ht_cap values can be over-ridden.
+ *	If null, then none can be over-ridden.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -2027,6 +2052,8 @@ struct wiphy {
 	/* dir in debugfs: ieee80211/<wiphyname> */
 	struct dentry *debugfsdir;
 
+	const struct ieee80211_ht_cap *ht_capa_mod_mask;
+
 #ifdef CONFIG_NET_NS
 	/* the network namespace this phy lives in currently */
 	struct net *_net;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 1c7d4df5418c..fb08c28fc90a 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -341,13 +341,17 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  const u8 *bssid, const u8 *prev_bssid,
 			  const u8 *ssid, int ssid_len,
 			  const u8 *ie, int ie_len, bool use_mfp,
-			  struct cfg80211_crypto_settings *crypt);
+			  struct cfg80211_crypto_settings *crypt,
+			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
+			  struct ieee80211_ht_cap *ht_capa_mask);
 int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, struct ieee80211_channel *chan,
 			const u8 *bssid, const u8 *prev_bssid,
 			const u8 *ssid, int ssid_len,
 			const u8 *ie, int ie_len, bool use_mfp,
-			struct cfg80211_crypto_settings *crypt);
+			struct cfg80211_crypto_settings *crypt,
+			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
+			struct ieee80211_ht_cap *ht_capa_mask);
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
 			   const u8 *ie, int ie_len, u16 reason,
@@ -379,6 +383,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, bool no_cck,
 			  bool dont_wait_for_ack, u64 *cookie);
+void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
+			       const struct ieee80211_ht_cap *ht_capa_mask);
 
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 6c1bafd508c8..438dfc105b4a 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -501,13 +501,32 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 	return err;
 }
 
+/*  Do a logical ht_capa &= ht_capa_mask.  */
+void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
+			       const struct ieee80211_ht_cap *ht_capa_mask)
+{
+	int i;
+	u8 *p1, *p2;
+	if (!ht_capa_mask) {
+		memset(ht_capa, 0, sizeof(*ht_capa));
+		return;
+	}
+
+	p1 = (u8*)(ht_capa);
+	p2 = (u8*)(ht_capa_mask);
+	for (i = 0; i<sizeof(*ht_capa); i++)
+		p1[i] &= p2[i];
+}
+
 int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
 			  struct ieee80211_channel *chan,
 			  const u8 *bssid, const u8 *prev_bssid,
 			  const u8 *ssid, int ssid_len,
 			  const u8 *ie, int ie_len, bool use_mfp,
-			  struct cfg80211_crypto_settings *crypt)
+			  struct cfg80211_crypto_settings *crypt,
+			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
+			  struct ieee80211_ht_cap *ht_capa_mask)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_assoc_request req;
@@ -537,6 +556,15 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 	memcpy(&req.crypto, crypt, sizeof(req.crypto));
 	req.use_mfp = use_mfp;
 	req.prev_bssid = prev_bssid;
+	req.flags = assoc_flags;
+	if (ht_capa)
+		memcpy(&req.ht_capa, ht_capa, sizeof(req.ht_capa));
+	if (ht_capa_mask)
+		memcpy(&req.ht_capa_mask, ht_capa_mask,
+		       sizeof(req.ht_capa_mask));
+	cfg80211_oper_and_ht_capa(&req.ht_capa_mask,
+				  rdev->wiphy.ht_capa_mod_mask);
+
 	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
 				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
 	if (!req.bss) {
@@ -574,14 +602,17 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			const u8 *bssid, const u8 *prev_bssid,
 			const u8 *ssid, int ssid_len,
 			const u8 *ie, int ie_len, bool use_mfp,
-			struct cfg80211_crypto_settings *crypt)
+			struct cfg80211_crypto_settings *crypt,
+			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
+			struct ieee80211_ht_cap *ht_capa_mask)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
 	err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
-				    ssid, ssid_len, ie, ie_len, use_mfp, crypt);
+				    ssid, ssid_len, ie, ie_len, use_mfp, crypt,
+				    assoc_flags, ht_capa, ht_capa_mask);
 	wdev_unlock(wdev);
 
 	return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 889f06483862..a1cabde7cb5f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -200,6 +200,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY,
 				      .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_DFS_REGION] = { .type = NLA_U8 },
+	[NL80211_ATTR_DISABLE_HT] = { .type = NLA_FLAG },
+	[NL80211_ATTR_HT_CAPABILITY_MASK] = {
+		.len = NL80211_HT_CAPABILITY_LEN
+	},
 };
 
 /* policy for the key attributes */
@@ -1032,6 +1036,11 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	NLA_PUT_U32(msg, NL80211_ATTR_FEATURE_FLAGS, dev->wiphy.features);
 
+	if (dev->wiphy.ht_capa_mod_mask)
+		NLA_PUT(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
+			sizeof(*dev->wiphy.ht_capa_mod_mask),
+			dev->wiphy.ht_capa_mod_mask);
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -4413,6 +4422,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
 	int err, ssid_len, ie_len = 0;
 	bool use_mfp = false;
+	u32 flags = 0;
+	struct ieee80211_ht_cap *ht_capa = NULL;
+	struct ieee80211_ht_cap *ht_capa_mask = NULL;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -4456,11 +4468,25 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_PREV_BSSID])
 		prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT]))
+		flags |= ASSOC_REQ_DISABLE_HT;
+
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
+		ht_capa_mask =
+			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]);
+
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
+		if (!ht_capa_mask)
+			return -EINVAL;
+		ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
+	}
+
 	err = nl80211_crypto_settings(rdev, info, &crypto, 1);
 	if (!err)
 		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
 					  ssid, ssid_len, ie, ie_len, use_mfp,
-					  &crypto);
+					  &crypto, flags, ht_capa,
+					  ht_capa_mask);
 
 	return err;
 }
@@ -4950,6 +4976,22 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 			return PTR_ERR(connkeys);
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT]))
+		connect.flags |= ASSOC_REQ_DISABLE_HT;
+
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
+		memcpy(&connect.ht_capa_mask,
+		       nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]),
+		       sizeof(connect.ht_capa_mask));
+
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
+		if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
+			return -EINVAL;
+		memcpy(&connect.ht_capa,
+		       nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]),
+		       sizeof(connect.ht_capa));
+	}
+
 	err = cfg80211_connect(rdev, dev, &connect, connkeys);
 	if (err)
 		kfree(connkeys);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 6e86d5acf145..ed9d0e6f4a06 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -189,7 +189,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 					    prev_bssid,
 					    params->ssid, params->ssid_len,
 					    params->ie, params->ie_len,
-					    false, &params->crypto);
+					    false, &params->crypto,
+					    params->flags, &params->ht_capa,
+					    &params->ht_capa_mask);
 		if (err)
 			__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 					       NULL, 0,
@@ -773,6 +775,9 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		wdev->connect_keys = NULL;
 	}
 
+	cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
+				  rdev->wiphy.ht_capa_mod_mask);
+
 	if (connkeys && connkeys->def >= 0) {
 		int idx;
 		u32 cipher;
-- 
cgit v1.2.3


From a2d7ec58ac09f30ab726f216827f7c7095b2a98f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 18 Nov 2011 17:32:46 +0000
Subject: netfilter: use jump_label for nf_hooks

On configs where CONFIG_JUMP_LABEL=y, we can replace in fast path a
load/compare/conditional jump by a single jump with no dcache reference.

Jump target is modified as soon as nf_hooks[pf][hook] switches from
empty state to non empty states. jump_label state is kept outside of
nf_hooks array so has no cost on cpu caches.

This patch removes the test on CONFIG_NETFILTER_DEBUG : No need to call
nf_hook_slow() at all if nf_hooks[pf][hook] is empty, this didnt give
useful information, but slowed down things a lot.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
CC: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 26 +++++++++++++++++++++-----
 net/netfilter/core.c      | 13 ++++++++++++-
 2 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 857f5026ced6..b809265607d0 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -162,6 +162,24 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[];
 
 extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 
+#if defined(CONFIG_JUMP_LABEL)
+#include <linux/jump_label.h>
+extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
+{
+	if (__builtin_constant_p(pf) &&
+	    __builtin_constant_p(hook))
+		return static_branch(&nf_hooks_needed[pf][hook]);
+
+	return !list_empty(&nf_hooks[pf][hook]);
+}
+#else
+static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
+{
+	return !list_empty(&nf_hooks[pf][hook]);
+}
+#endif
+
 int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev, struct net_device *outdev,
 		 int (*okfn)(struct sk_buff *), int thresh);
@@ -179,11 +197,9 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 struct net_device *outdev,
 				 int (*okfn)(struct sk_buff *), int thresh)
 {
-#ifndef CONFIG_NETFILTER_DEBUG
-	if (list_empty(&nf_hooks[pf][hook]))
-		return 1;
-#endif
-	return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
+	if (nf_hooks_active(pf, hook))
+		return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
+	return 1;
 }
 
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index afca6c78948c..4aa0f4b19bd8 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -54,6 +54,12 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
 
 struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
 EXPORT_SYMBOL(nf_hooks);
+
+#if defined(CONFIG_JUMP_LABEL)
+struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+EXPORT_SYMBOL(nf_hooks_needed);
+#endif
+
 static DEFINE_MUTEX(nf_hook_mutex);
 
 int nf_register_hook(struct nf_hook_ops *reg)
@@ -70,6 +76,9 @@ int nf_register_hook(struct nf_hook_ops *reg)
 	}
 	list_add_rcu(&reg->list, elem->list.prev);
 	mutex_unlock(&nf_hook_mutex);
+#if defined(CONFIG_JUMP_LABEL)
+	jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
+#endif
 	return 0;
 }
 EXPORT_SYMBOL(nf_register_hook);
@@ -79,7 +88,9 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
 	mutex_lock(&nf_hook_mutex);
 	list_del_rcu(&reg->list);
 	mutex_unlock(&nf_hook_mutex);
-
+#if defined(CONFIG_JUMP_LABEL)
+	jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+#endif
 	synchronize_net();
 }
 EXPORT_SYMBOL(nf_unregister_hook);
-- 
cgit v1.2.3


From 8c111b3f56332a216b18cd57950bdf04ac8f2a98 Mon Sep 17 00:00:00 2001
From: Yongqiang Yang <xiaoqiangnk@gmail.com>
Date: Sat, 19 Nov 2011 17:34:29 +0800
Subject: jbd: clear revoked flag on buffers before a new transaction started

Currently, we clear revoked flag only when a block is reused.  However,
this can tigger a false journal error.  Consider a situation when a block
is used as a meta block and is deleted(revoked) in ordered mode, then the
block is allocated as a data block to a file.  At this moment, user changes
the file's journal mode from ordered to journaled and truncates the file.
The block will be considered re-revoked by journal because it has revoked
flag still pending from the last transaction and an assertion triggers.

We fix the problem by keeping the revoked status more uptodate - we clear
revoked flag when switching revoke tables to reflect there is no revoked
buffers in current transaction any more.

Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd/commit.c     |  6 ++++++
 fs/jbd/revoke.c     | 34 ++++++++++++++++++++++++++++++++++
 include/linux/jbd.h |  1 +
 3 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 8799207df058..f2b9a571f4cf 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -391,6 +391,12 @@ void journal_commit_transaction(journal_t *journal)
 
 	jbd_debug (3, "JBD: commit phase 1\n");
 
+	/*
+	 * Clear revoked flag to reflect there is no revoked buffers
+	 * in the next transaction which is going to be started.
+	 */
+	journal_clear_buffer_revoked_flags(journal);
+
 	/*
 	 * Switch to a new revoke table.
 	 */
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 305a90763154..25c713e7071c 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -47,6 +47,10 @@
  *   overwriting the new data.  We don't even need to clear the revoke
  *   bit here.
  *
+ * We cache revoke status of a buffer in the current transaction in b_states
+ * bits.  As the name says, revokevalid flag indicates that the cached revoke
+ * status of a buffer is valid and we can rely on the cached status.
+ *
  * Revoke information on buffers is a tri-state value:
  *
  * RevokeValid clear:	no cached revoke status, need to look it up
@@ -479,6 +483,36 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 	return did_revoke;
 }
 
+/*
+ * journal_clear_revoked_flags clears revoked flag of buffers in
+ * revoke table to reflect there is no revoked buffer in the next
+ * transaction which is going to be started.
+ */
+void journal_clear_buffer_revoked_flags(journal_t *journal)
+{
+	struct jbd_revoke_table_s *revoke = journal->j_revoke;
+	int i = 0;
+
+	for (i = 0; i < revoke->hash_size; i++) {
+		struct list_head *hash_list;
+		struct list_head *list_entry;
+		hash_list = &revoke->hash_table[i];
+
+		list_for_each(list_entry, hash_list) {
+			struct jbd_revoke_record_s *record;
+			struct buffer_head *bh;
+			record = (struct jbd_revoke_record_s *)list_entry;
+			bh = __find_get_block(journal->j_fs_dev,
+					      record->blocknr,
+					      journal->j_blocksize);
+			if (bh) {
+				clear_buffer_revoked(bh);
+				__brelse(bh);
+			}
+		}
+	}
+}
+
 /* journal_switch_revoke table select j_revoke for next transaction
  * we do not want to suspend any processing until all revokes are
  * written -bzzz
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index c7acdde3243d..492cc12244fd 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -913,6 +913,7 @@ extern int	journal_set_revoke(journal_t *, unsigned int, tid_t);
 extern int	journal_test_revoke(journal_t *, unsigned int, tid_t);
 extern void	journal_clear_revoke(journal_t *);
 extern void	journal_switch_revoke_table(journal_t *journal);
+extern void	journal_clear_buffer_revoked_flags(journal_t *journal);
 
 /*
  * The log thread user interface:
-- 
cgit v1.2.3


From 1e5f9a23430e64fb56d9d5d8e1ca165ba1cfeb75 Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Wed, 5 Oct 2011 14:40:59 +0100
Subject: ARM: amba: Move definition of struct amba_id to mod_devicetable.h

The general kernel infrastructure for adding module alises during
module post processing expects the affected device type
identification structures in a common header
<linux/mod_devicetable.h>.

This patch simple moves struct amba_id to the common header, and
adds the appropriate include in <linux/amba/bus.h>.

Signed-off-by: Dave Martin <dave.martin@linaro.org>
---
 include/linux/amba/bus.h        |  7 +------
 include/linux/mod_devicetable.h | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index fcbbe71a3cc1..724c69c40bb8 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -16,6 +16,7 @@
 
 #include <linux/clk.h>
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/err.h>
 #include <linux/resource.h>
 #include <linux/regulator/consumer.h>
@@ -35,12 +36,6 @@ struct amba_device {
 	unsigned int		irq[AMBA_NR_IRQS];
 };
 
-struct amba_id {
-	unsigned int		id;
-	unsigned int		mask;
-	void			*data;
-};
-
 struct amba_driver {
 	struct device_driver	drv;
 	int			(*probe)(struct amba_device *, const struct amba_id *);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 468819cdde87..83ac0713ed0a 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -542,4 +542,22 @@ struct isapnp_device_id {
 	kernel_ulong_t driver_data;	/* data private to the driver */
 };
 
+/**
+ * struct amba_id - identifies a device on an AMBA bus
+ * @id: The significant bits if the hardware device ID
+ * @mask: Bitmask specifying which bits of the id field are significant when
+ *	matching.  A driver binds to a device when ((hardware device ID) & mask)
+ *	== id.
+ * @data: Private data used by the driver.
+ */
+struct amba_id {
+	unsigned int		id;
+	unsigned int		mask;
+#ifndef __KERNEL__
+	kernel_ulong_t		data;
+#else
+	void			*data;
+#endif
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
-- 
cgit v1.2.3


From 84e0cdb0a262483a3618091c43dae33d36226430 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Tue, 8 Mar 2011 20:17:06 +0000
Subject: macb: unify at91 and avr32 platform data

Both at91 and avr32 defines its own platform data structure for
the macb driver and both share common structures though at91
includes a currently unused phy_irq_pin.  Create a common
macb_platform_data for macb that both at91 and avr32 can use.  In
future we can use this to support other architectures that use the
same IP block with the macb driver.

v2: rename eth_platform_data to macb_platform_data and allow at91_ether
to share the platform data with macb.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Tested-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
---
 arch/arm/mach-at91/at91cap9_devices.c       |  6 +++---
 arch/arm/mach-at91/at91rm9200_devices.c     |  6 +++---
 arch/arm/mach-at91/at91sam9260_devices.c    |  6 +++---
 arch/arm/mach-at91/at91sam9263_devices.c    |  6 +++---
 arch/arm/mach-at91/at91sam9g45_devices.c    |  6 +++---
 arch/arm/mach-at91/board-1arm.c             |  2 +-
 arch/arm/mach-at91/board-afeb-9260v1.c      |  2 +-
 arch/arm/mach-at91/board-cam60.c            |  2 +-
 arch/arm/mach-at91/board-cap9adk.c          |  2 +-
 arch/arm/mach-at91/board-carmeva.c          |  2 +-
 arch/arm/mach-at91/board-cpu9krea.c         |  2 +-
 arch/arm/mach-at91/board-cpuat91.c          |  2 +-
 arch/arm/mach-at91/board-csb337.c           |  2 +-
 arch/arm/mach-at91/board-csb637.c           |  2 +-
 arch/arm/mach-at91/board-eb9200.c           |  2 +-
 arch/arm/mach-at91/board-ecbat91.c          |  2 +-
 arch/arm/mach-at91/board-eco920.c           |  2 +-
 arch/arm/mach-at91/board-foxg20.c           |  2 +-
 arch/arm/mach-at91/board-gsia18s.c          |  2 +-
 arch/arm/mach-at91/board-kafa.c             |  2 +-
 arch/arm/mach-at91/board-kb9202.c           |  2 +-
 arch/arm/mach-at91/board-neocore926.c       |  2 +-
 arch/arm/mach-at91/board-pcontrol-g20.c     |  2 +-
 arch/arm/mach-at91/board-picotux200.c       |  2 +-
 arch/arm/mach-at91/board-qil-a9260.c        |  2 +-
 arch/arm/mach-at91/board-rm9200dk.c         |  2 +-
 arch/arm/mach-at91/board-rm9200ek.c         |  2 +-
 arch/arm/mach-at91/board-rsi-ews.c          |  2 +-
 arch/arm/mach-at91/board-sam9-l9260.c       |  2 +-
 arch/arm/mach-at91/board-sam9260ek.c        |  2 +-
 arch/arm/mach-at91/board-sam9263ek.c        |  2 +-
 arch/arm/mach-at91/board-sam9g20ek.c        |  2 +-
 arch/arm/mach-at91/board-sam9m10g45ek.c     |  2 +-
 arch/arm/mach-at91/board-snapper9260.c      |  2 +-
 arch/arm/mach-at91/board-stamp9g20.c        |  2 +-
 arch/arm/mach-at91/board-usb-a926x.c        |  2 +-
 arch/arm/mach-at91/board-yl-9200.c          |  2 +-
 arch/arm/mach-at91/include/mach/board.h     | 14 ++------------
 arch/avr32/boards/atngw100/setup.c          |  2 +-
 arch/avr32/boards/atstk1000/atstk1002.c     |  2 +-
 arch/avr32/boards/favr-32/setup.c           |  2 +-
 arch/avr32/boards/hammerhead/setup.c        |  2 +-
 arch/avr32/boards/merisc/setup.c            |  2 +-
 arch/avr32/boards/mimc200/setup.c           |  2 +-
 arch/avr32/mach-at32ap/at32ap700x.c         |  8 ++++----
 arch/avr32/mach-at32ap/include/mach/board.h |  7 ++-----
 drivers/net/ethernet/cadence/at91_ether.c   |  3 ++-
 drivers/net/ethernet/cadence/at91_ether.h   |  4 +++-
 drivers/net/ethernet/cadence/macb.c         | 10 ++++------
 include/linux/platform_data/macb.h          | 17 +++++++++++++++++
 50 files changed, 87 insertions(+), 82 deletions(-)
 create mode 100644 include/linux/platform_data/macb.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/at91cap9_devices.c b/arch/arm/mach-at91/at91cap9_devices.c
index adad70db70eb..695aecab0a67 100644
--- a/arch/arm/mach-at91/at91cap9_devices.c
+++ b/arch/arm/mach-at91/at91cap9_devices.c
@@ -200,7 +200,7 @@ void __init at91_add_device_usba(struct usba_platform_data *data) {}
 
 #if defined(CONFIG_MACB) || defined(CONFIG_MACB_MODULE)
 static u64 eth_dmamask = DMA_BIT_MASK(32);
-static struct at91_eth_data eth_data;
+static struct macb_platform_data eth_data;
 
 static struct resource eth_resources[] = {
 	[0] = {
@@ -227,7 +227,7 @@ static struct platform_device at91cap9_eth_device = {
 	.num_resources	= ARRAY_SIZE(eth_resources),
 };
 
-void __init at91_add_device_eth(struct at91_eth_data *data)
+void __init at91_add_device_eth(struct macb_platform_data *data)
 {
 	if (!data)
 		return;
@@ -264,7 +264,7 @@ void __init at91_add_device_eth(struct at91_eth_data *data)
 	platform_device_register(&at91cap9_eth_device);
 }
 #else
-void __init at91_add_device_eth(struct at91_eth_data *data) {}
+void __init at91_add_device_eth(struct macb_platform_data *data) {}
 #endif
 
 
diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c
index 66591fa53e05..5610f14e342e 100644
--- a/arch/arm/mach-at91/at91rm9200_devices.c
+++ b/arch/arm/mach-at91/at91rm9200_devices.c
@@ -135,7 +135,7 @@ void __init at91_add_device_udc(struct at91_udc_data *data) {}
 
 #if defined(CONFIG_ARM_AT91_ETHER) || defined(CONFIG_ARM_AT91_ETHER_MODULE)
 static u64 eth_dmamask = DMA_BIT_MASK(32);
-static struct at91_eth_data eth_data;
+static struct macb_platform_data eth_data;
 
 static struct resource eth_resources[] = {
 	[0] = {
@@ -162,7 +162,7 @@ static struct platform_device at91rm9200_eth_device = {
 	.num_resources	= ARRAY_SIZE(eth_resources),
 };
 
-void __init at91_add_device_eth(struct at91_eth_data *data)
+void __init at91_add_device_eth(struct macb_platform_data *data)
 {
 	if (!data)
 		return;
@@ -199,7 +199,7 @@ void __init at91_add_device_eth(struct at91_eth_data *data)
 	platform_device_register(&at91rm9200_eth_device);
 }
 #else
-void __init at91_add_device_eth(struct at91_eth_data *data) {}
+void __init at91_add_device_eth(struct macb_platform_data *data) {}
 #endif
 
 
diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c
index 25e3464fb07f..ff75f7d4091b 100644
--- a/arch/arm/mach-at91/at91sam9260_devices.c
+++ b/arch/arm/mach-at91/at91sam9260_devices.c
@@ -136,7 +136,7 @@ void __init at91_add_device_udc(struct at91_udc_data *data) {}
 
 #if defined(CONFIG_MACB) || defined(CONFIG_MACB_MODULE)
 static u64 eth_dmamask = DMA_BIT_MASK(32);
-static struct at91_eth_data eth_data;
+static struct macb_platform_data eth_data;
 
 static struct resource eth_resources[] = {
 	[0] = {
@@ -163,7 +163,7 @@ static struct platform_device at91sam9260_eth_device = {
 	.num_resources	= ARRAY_SIZE(eth_resources),
 };
 
-void __init at91_add_device_eth(struct at91_eth_data *data)
+void __init at91_add_device_eth(struct macb_platform_data *data)
 {
 	if (!data)
 		return;
@@ -200,7 +200,7 @@ void __init at91_add_device_eth(struct at91_eth_data *data)
 	platform_device_register(&at91sam9260_eth_device);
 }
 #else
-void __init at91_add_device_eth(struct at91_eth_data *data) {}
+void __init at91_add_device_eth(struct macb_platform_data *data) {}
 #endif
 
 
diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c
index ad017eb1f8df..68562ce2af94 100644
--- a/arch/arm/mach-at91/at91sam9263_devices.c
+++ b/arch/arm/mach-at91/at91sam9263_devices.c
@@ -144,7 +144,7 @@ void __init at91_add_device_udc(struct at91_udc_data *data) {}
 
 #if defined(CONFIG_MACB) || defined(CONFIG_MACB_MODULE)
 static u64 eth_dmamask = DMA_BIT_MASK(32);
-static struct at91_eth_data eth_data;
+static struct macb_platform_data eth_data;
 
 static struct resource eth_resources[] = {
 	[0] = {
@@ -171,7 +171,7 @@ static struct platform_device at91sam9263_eth_device = {
 	.num_resources	= ARRAY_SIZE(eth_resources),
 };
 
-void __init at91_add_device_eth(struct at91_eth_data *data)
+void __init at91_add_device_eth(struct macb_platform_data *data)
 {
 	if (!data)
 		return;
@@ -208,7 +208,7 @@ void __init at91_add_device_eth(struct at91_eth_data *data)
 	platform_device_register(&at91sam9263_eth_device);
 }
 #else
-void __init at91_add_device_eth(struct at91_eth_data *data) {}
+void __init at91_add_device_eth(struct macb_platform_data *data) {}
 #endif
 
 
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index 09a16d6bd5cd..e2cb835c4d7c 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -284,7 +284,7 @@ void __init at91_add_device_usba(struct usba_platform_data *data) {}
 
 #if defined(CONFIG_MACB) || defined(CONFIG_MACB_MODULE)
 static u64 eth_dmamask = DMA_BIT_MASK(32);
-static struct at91_eth_data eth_data;
+static struct macb_platform_data eth_data;
 
 static struct resource eth_resources[] = {
 	[0] = {
@@ -311,7 +311,7 @@ static struct platform_device at91sam9g45_eth_device = {
 	.num_resources	= ARRAY_SIZE(eth_resources),
 };
 
-void __init at91_add_device_eth(struct at91_eth_data *data)
+void __init at91_add_device_eth(struct macb_platform_data *data)
 {
 	if (!data)
 		return;
@@ -348,7 +348,7 @@ void __init at91_add_device_eth(struct at91_eth_data *data)
 	platform_device_register(&at91sam9g45_eth_device);
 }
 #else
-void __init at91_add_device_eth(struct at91_eth_data *data) {}
+void __init at91_add_device_eth(struct macb_platform_data *data) {}
 #endif
 
 
diff --git a/arch/arm/mach-at91/board-1arm.c b/arch/arm/mach-at91/board-1arm.c
index 367d5cd5e362..a60d98d7c3e2 100644
--- a/arch/arm/mach-at91/board-1arm.c
+++ b/arch/arm/mach-at91/board-1arm.c
@@ -63,7 +63,7 @@ static void __init onearm_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata onearm_eth_data = {
+static struct macb_platform_data __initdata onearm_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC4,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-afeb-9260v1.c b/arch/arm/mach-at91/board-afeb-9260v1.c
index 4282d96dffa8..17fc77925707 100644
--- a/arch/arm/mach-at91/board-afeb-9260v1.c
+++ b/arch/arm/mach-at91/board-afeb-9260v1.c
@@ -103,7 +103,7 @@ static struct spi_board_info afeb9260_spi_devices[] = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata afeb9260_macb_data = {
+static struct macb_platform_data __initdata afeb9260_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PA9,
 	.is_rmii	= 0,
 };
diff --git a/arch/arm/mach-at91/board-cam60.c b/arch/arm/mach-at91/board-cam60.c
index f90cfb32bad2..2037d2c40eb4 100644
--- a/arch/arm/mach-at91/board-cam60.c
+++ b/arch/arm/mach-at91/board-cam60.c
@@ -115,7 +115,7 @@ static struct spi_board_info cam60_spi_devices[] __initdata = {
 /*
  * MACB Ethernet device
  */
-static struct __initdata at91_eth_data cam60_macb_data = {
+static struct __initdata macb_platform_data cam60_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PB5,
 	.is_rmii	= 0,
 };
diff --git a/arch/arm/mach-at91/board-cap9adk.c b/arch/arm/mach-at91/board-cap9adk.c
index 5dffd3be62d2..af5520c366fe 100644
--- a/arch/arm/mach-at91/board-cap9adk.c
+++ b/arch/arm/mach-at91/board-cap9adk.c
@@ -153,7 +153,7 @@ static struct at91_mmc_data __initdata cap9adk_mmc_data = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata cap9adk_macb_data = {
+static struct macb_platform_data __initdata cap9adk_macb_data = {
 	.is_rmii	= 1,
 };
 
diff --git a/arch/arm/mach-at91/board-carmeva.c b/arch/arm/mach-at91/board-carmeva.c
index 774c87fcbd5b..529b356cdb7d 100644
--- a/arch/arm/mach-at91/board-carmeva.c
+++ b/arch/arm/mach-at91/board-carmeva.c
@@ -57,7 +57,7 @@ static void __init carmeva_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata carmeva_eth_data = {
+static struct macb_platform_data __initdata carmeva_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC4,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-cpu9krea.c b/arch/arm/mach-at91/board-cpu9krea.c
index fc885a4ce243..04d2b9b50464 100644
--- a/arch/arm/mach-at91/board-cpu9krea.c
+++ b/arch/arm/mach-at91/board-cpu9krea.c
@@ -99,7 +99,7 @@ static struct at91_udc_data __initdata cpu9krea_udc_data = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata cpu9krea_macb_data = {
+static struct macb_platform_data __initdata cpu9krea_macb_data = {
 	.is_rmii	= 1,
 };
 
diff --git a/arch/arm/mach-at91/board-cpuat91.c b/arch/arm/mach-at91/board-cpuat91.c
index d35e65b08ccd..7a4c82e8da51 100644
--- a/arch/arm/mach-at91/board-cpuat91.c
+++ b/arch/arm/mach-at91/board-cpuat91.c
@@ -82,7 +82,7 @@ static void __init cpuat91_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata cpuat91_eth_data = {
+static struct macb_platform_data __initdata cpuat91_eth_data = {
 	.is_rmii	= 1,
 };
 
diff --git a/arch/arm/mach-at91/board-csb337.c b/arch/arm/mach-at91/board-csb337.c
index c3936665e645..b004b20b8e42 100644
--- a/arch/arm/mach-at91/board-csb337.c
+++ b/arch/arm/mach-at91/board-csb337.c
@@ -58,7 +58,7 @@ static void __init csb337_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata csb337_eth_data = {
+static struct macb_platform_data __initdata csb337_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC2,
 	.is_rmii	= 0,
 };
diff --git a/arch/arm/mach-at91/board-csb637.c b/arch/arm/mach-at91/board-csb637.c
index 586100e2acbb..e966de5219c7 100644
--- a/arch/arm/mach-at91/board-csb637.c
+++ b/arch/arm/mach-at91/board-csb637.c
@@ -52,7 +52,7 @@ static void __init csb637_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata csb637_eth_data = {
+static struct macb_platform_data __initdata csb637_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC0,
 	.is_rmii	= 0,
 };
diff --git a/arch/arm/mach-at91/board-eb9200.c b/arch/arm/mach-at91/board-eb9200.c
index 45db7a3dbef0..3788fa527121 100644
--- a/arch/arm/mach-at91/board-eb9200.c
+++ b/arch/arm/mach-at91/board-eb9200.c
@@ -60,7 +60,7 @@ static void __init eb9200_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata eb9200_eth_data = {
+static struct macb_platform_data __initdata eb9200_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC4,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-ecbat91.c b/arch/arm/mach-at91/board-ecbat91.c
index 2f9c16d29212..af7622eae1a9 100644
--- a/arch/arm/mach-at91/board-ecbat91.c
+++ b/arch/arm/mach-at91/board-ecbat91.c
@@ -64,7 +64,7 @@ static void __init ecb_at91init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata ecb_at91eth_data = {
+static struct macb_platform_data __initdata ecb_at91eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC4,
 	.is_rmii	= 0,
 };
diff --git a/arch/arm/mach-at91/board-eco920.c b/arch/arm/mach-at91/board-eco920.c
index 8252c722607b..8e75867d1d18 100644
--- a/arch/arm/mach-at91/board-eco920.c
+++ b/arch/arm/mach-at91/board-eco920.c
@@ -47,7 +47,7 @@ static void __init eco920_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata eco920_eth_data = {
+static struct macb_platform_data __initdata eco920_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC2,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-foxg20.c b/arch/arm/mach-at91/board-foxg20.c
index f27d1a780cfa..de8e09642f4e 100644
--- a/arch/arm/mach-at91/board-foxg20.c
+++ b/arch/arm/mach-at91/board-foxg20.c
@@ -135,7 +135,7 @@ static struct spi_board_info foxg20_spi_devices[] = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata foxg20_macb_data = {
+static struct macb_platform_data __initdata foxg20_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PA7,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-gsia18s.c b/arch/arm/mach-at91/board-gsia18s.c
index 2e95949737e6..51c82f151119 100644
--- a/arch/arm/mach-at91/board-gsia18s.c
+++ b/arch/arm/mach-at91/board-gsia18s.c
@@ -93,7 +93,7 @@ static struct at91_udc_data __initdata udc_data = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata macb_data = {
+static struct macb_platform_data __initdata macb_data = {
 	.phy_irq_pin	= AT91_PIN_PA28,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-kafa.c b/arch/arm/mach-at91/board-kafa.c
index 3bae73e63633..9628a3defcf4 100644
--- a/arch/arm/mach-at91/board-kafa.c
+++ b/arch/arm/mach-at91/board-kafa.c
@@ -61,7 +61,7 @@ static void __init kafa_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata kafa_eth_data = {
+static struct macb_platform_data __initdata kafa_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC4,
 	.is_rmii	= 0,
 };
diff --git a/arch/arm/mach-at91/board-kb9202.c b/arch/arm/mach-at91/board-kb9202.c
index e61351ffad50..5ba5244cb632 100644
--- a/arch/arm/mach-at91/board-kb9202.c
+++ b/arch/arm/mach-at91/board-kb9202.c
@@ -69,7 +69,7 @@ static void __init kb9202_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata kb9202_eth_data = {
+static struct macb_platform_data __initdata kb9202_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PB29,
 	.is_rmii	= 0,
 };
diff --git a/arch/arm/mach-at91/board-neocore926.c b/arch/arm/mach-at91/board-neocore926.c
index ef816c17dc61..56e7aee11b59 100644
--- a/arch/arm/mach-at91/board-neocore926.c
+++ b/arch/arm/mach-at91/board-neocore926.c
@@ -155,7 +155,7 @@ static struct at91_mmc_data __initdata neocore926_mmc_data = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata neocore926_macb_data = {
+static struct macb_platform_data __initdata neocore926_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PE31,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-pcontrol-g20.c b/arch/arm/mach-at91/board-pcontrol-g20.c
index 49e3f699b48e..c545a3e635a4 100644
--- a/arch/arm/mach-at91/board-pcontrol-g20.c
+++ b/arch/arm/mach-at91/board-pcontrol-g20.c
@@ -122,7 +122,7 @@ static struct at91_udc_data __initdata pcontrol_g20_udc_data = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata macb_data = {
+static struct macb_platform_data __initdata macb_data = {
 	.phy_irq_pin	= AT91_PIN_PA28,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-picotux200.c b/arch/arm/mach-at91/board-picotux200.c
index 0a8fe6a1b7c8..dc18759a24b4 100644
--- a/arch/arm/mach-at91/board-picotux200.c
+++ b/arch/arm/mach-at91/board-picotux200.c
@@ -60,7 +60,7 @@ static void __init picotux200_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata picotux200_eth_data = {
+static struct macb_platform_data __initdata picotux200_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC4,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-qil-a9260.c b/arch/arm/mach-at91/board-qil-a9260.c
index 07421bdb88ea..5444d6ac514a 100644
--- a/arch/arm/mach-at91/board-qil-a9260.c
+++ b/arch/arm/mach-at91/board-qil-a9260.c
@@ -104,7 +104,7 @@ static struct spi_board_info ek_spi_devices[] = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata ek_macb_data = {
+static struct macb_platform_data __initdata ek_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PA31,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-rm9200dk.c b/arch/arm/mach-at91/board-rm9200dk.c
index 80a8c9c6e922..022d0cebda9d 100644
--- a/arch/arm/mach-at91/board-rm9200dk.c
+++ b/arch/arm/mach-at91/board-rm9200dk.c
@@ -65,7 +65,7 @@ static void __init dk_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata dk_eth_data = {
+static struct macb_platform_data __initdata dk_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC4,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-rm9200ek.c b/arch/arm/mach-at91/board-rm9200ek.c
index 99fd7f8aee0e..ed275861adef 100644
--- a/arch/arm/mach-at91/board-rm9200ek.c
+++ b/arch/arm/mach-at91/board-rm9200ek.c
@@ -65,7 +65,7 @@ static void __init ek_init_early(void)
 	at91_set_serial_console(0);
 }
 
-static struct at91_eth_data __initdata ek_eth_data = {
+static struct macb_platform_data __initdata ek_eth_data = {
 	.phy_irq_pin	= AT91_PIN_PC4,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-rsi-ews.c b/arch/arm/mach-at91/board-rsi-ews.c
index e927df0175df..ed3b21f77674 100644
--- a/arch/arm/mach-at91/board-rsi-ews.c
+++ b/arch/arm/mach-at91/board-rsi-ews.c
@@ -60,7 +60,7 @@ static void __init rsi_ews_init_early(void)
 /*
  * Ethernet
  */
-static struct at91_eth_data rsi_ews_eth_data __initdata = {
+static struct macb_platform_data rsi_ews_eth_data __initdata = {
 	.phy_irq_pin	= AT91_PIN_PC4,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-sam9-l9260.c b/arch/arm/mach-at91/board-sam9-l9260.c
index 072d53af98d9..3e4b50e6f6ab 100644
--- a/arch/arm/mach-at91/board-sam9-l9260.c
+++ b/arch/arm/mach-at91/board-sam9-l9260.c
@@ -109,7 +109,7 @@ static struct spi_board_info ek_spi_devices[] = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata ek_macb_data = {
+static struct macb_platform_data __initdata ek_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PA7,
 	.is_rmii	= 0,
 };
diff --git a/arch/arm/mach-at91/board-sam9260ek.c b/arch/arm/mach-at91/board-sam9260ek.c
index 4f10181a0782..13478e14a543 100644
--- a/arch/arm/mach-at91/board-sam9260ek.c
+++ b/arch/arm/mach-at91/board-sam9260ek.c
@@ -151,7 +151,7 @@ static struct spi_board_info ek_spi_devices[] = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata ek_macb_data = {
+static struct macb_platform_data __initdata ek_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PA7,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c
index bccdcf23caa1..fcf194e6e4fe 100644
--- a/arch/arm/mach-at91/board-sam9263ek.c
+++ b/arch/arm/mach-at91/board-sam9263ek.c
@@ -158,7 +158,7 @@ static struct at91_mmc_data __initdata ek_mmc_data = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata ek_macb_data = {
+static struct macb_platform_data __initdata ek_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PE31,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-sam9g20ek.c b/arch/arm/mach-at91/board-sam9g20ek.c
index 64fc75c9d0ac..78d27cc3cc09 100644
--- a/arch/arm/mach-at91/board-sam9g20ek.c
+++ b/arch/arm/mach-at91/board-sam9g20ek.c
@@ -123,7 +123,7 @@ static struct spi_board_info ek_spi_devices[] = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata ek_macb_data = {
+static struct macb_platform_data __initdata ek_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PA7,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-sam9m10g45ek.c b/arch/arm/mach-at91/board-sam9m10g45ek.c
index 92de9127923a..4e1ee9d87096 100644
--- a/arch/arm/mach-at91/board-sam9m10g45ek.c
+++ b/arch/arm/mach-at91/board-sam9m10g45ek.c
@@ -115,7 +115,7 @@ static struct mci_platform_data __initdata mci1_data = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata ek_macb_data = {
+static struct macb_platform_data __initdata ek_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PD5,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-snapper9260.c b/arch/arm/mach-at91/board-snapper9260.c
index 0df01c6e2d0c..fbec934a7ce9 100644
--- a/arch/arm/mach-at91/board-snapper9260.c
+++ b/arch/arm/mach-at91/board-snapper9260.c
@@ -65,7 +65,7 @@ static struct at91_udc_data __initdata snapper9260_udc_data = {
 	.vbus_polled		= 1,
 };
 
-static struct at91_eth_data snapper9260_macb_data = {
+static struct macb_platform_data snapper9260_macb_data = {
 	.is_rmii	= 1,
 };
 
diff --git a/arch/arm/mach-at91/board-stamp9g20.c b/arch/arm/mach-at91/board-stamp9g20.c
index 936e5fd7f406..7c06c07d872b 100644
--- a/arch/arm/mach-at91/board-stamp9g20.c
+++ b/arch/arm/mach-at91/board-stamp9g20.c
@@ -157,7 +157,7 @@ static struct at91_udc_data __initdata stamp9g20evb_udc_data = {
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata macb_data = {
+static struct macb_platform_data __initdata macb_data = {
 	.phy_irq_pin	= AT91_PIN_PA28,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-usb-a926x.c b/arch/arm/mach-at91/board-usb-a926x.c
index 0a20bab21f99..3d84233f78eb 100644
--- a/arch/arm/mach-at91/board-usb-a926x.c
+++ b/arch/arm/mach-at91/board-usb-a926x.c
@@ -146,7 +146,7 @@ static void __init ek_add_device_spi(void)
 /*
  * MACB Ethernet device
  */
-static struct at91_eth_data __initdata ek_macb_data = {
+static struct macb_platform_data __initdata ek_macb_data = {
 	.phy_irq_pin	= AT91_PIN_PE31,
 	.is_rmii	= 1,
 };
diff --git a/arch/arm/mach-at91/board-yl-9200.c b/arch/arm/mach-at91/board-yl-9200.c
index 12a3f955162b..2c40a21b2794 100644
--- a/arch/arm/mach-at91/board-yl-9200.c
+++ b/arch/arm/mach-at91/board-yl-9200.c
@@ -110,7 +110,7 @@ static struct gpio_led yl9200_leds[] = {
 /*
  * Ethernet
  */
-static struct at91_eth_data __initdata yl9200_eth_data = {
+static struct macb_platform_data __initdata yl9200_eth_data = {
 	.phy_irq_pin		= AT91_PIN_PB28,
 	.is_rmii		= 1,
 };
diff --git a/arch/arm/mach-at91/include/mach/board.h b/arch/arm/mach-at91/include/mach/board.h
index eac92e995bb5..e209a2992245 100644
--- a/arch/arm/mach-at91/include/mach/board.h
+++ b/arch/arm/mach-at91/include/mach/board.h
@@ -40,6 +40,7 @@
 #include <linux/atmel-mci.h>
 #include <sound/atmel-ac97c.h>
 #include <linux/serial.h>
+#include <linux/platform_data/macb.h>
 
  /* USB Device */
 struct at91_udc_data {
@@ -81,18 +82,7 @@ extern void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data)
   /* atmel-mci platform config */
 extern void __init at91_add_device_mci(short mmc_id, struct mci_platform_data *data);
 
- /* Ethernet (EMAC & MACB) */
-struct at91_eth_data {
-	u32		phy_mask;
-	u8		phy_irq_pin;	/* PHY IRQ */
-	u8		is_rmii;	/* using RMII interface? */
-};
-extern void __init at91_add_device_eth(struct at91_eth_data *data);
-
-#if defined(CONFIG_ARCH_AT91SAM9260) || defined(CONFIG_ARCH_AT91SAM9263) || defined(CONFIG_ARCH_AT91SAM9G20) || defined(CONFIG_ARCH_AT91CAP9) \
-	|| defined(CONFIG_ARCH_AT91SAM9G45)
-#define eth_platform_data	at91_eth_data
-#endif
+extern void __init at91_add_device_eth(struct macb_platform_data *data);
 
  /* USB Host */
 struct at91_usbh_data {
diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c
index 1f17bde52cd4..7c756fb189f7 100644
--- a/arch/avr32/boards/atngw100/setup.c
+++ b/arch/avr32/boards/atngw100/setup.c
@@ -109,7 +109,7 @@ struct eth_addr {
 	u8 addr[6];
 };
 static struct eth_addr __initdata hw_addr[2];
-static struct eth_platform_data __initdata eth_data[2];
+static struct macb_platform_data __initdata eth_data[2];
 
 static struct spi_board_info spi0_board_info[] __initdata = {
 	{
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
index 4643ff5107c9..c56ddac85d61 100644
--- a/arch/avr32/boards/atstk1000/atstk1002.c
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -105,7 +105,7 @@ struct eth_addr {
 };
 
 static struct eth_addr __initdata hw_addr[2];
-static struct eth_platform_data __initdata eth_data[2] = {
+static struct macb_platform_data __initdata eth_data[2] = {
 	{
 		/*
 		 * The MDIO pullups on STK1000 are a bit too weak for
diff --git a/arch/avr32/boards/favr-32/setup.c b/arch/avr32/boards/favr-32/setup.c
index 86fab77a5a00..27bd6fbe21cb 100644
--- a/arch/avr32/boards/favr-32/setup.c
+++ b/arch/avr32/boards/favr-32/setup.c
@@ -50,7 +50,7 @@ struct eth_addr {
 	u8 addr[6];
 };
 static struct eth_addr __initdata hw_addr[1];
-static struct eth_platform_data __initdata eth_data[1] = {
+static struct macb_platform_data __initdata eth_data[1] = {
 	{
 		.phy_mask	= ~(1U << 1),
 	},
diff --git a/arch/avr32/boards/hammerhead/setup.c b/arch/avr32/boards/hammerhead/setup.c
index da14fbdd4e8e..9d1efd1cd425 100644
--- a/arch/avr32/boards/hammerhead/setup.c
+++ b/arch/avr32/boards/hammerhead/setup.c
@@ -102,7 +102,7 @@ struct eth_addr {
 };
 
 static struct eth_addr __initdata hw_addr[1];
-static struct eth_platform_data __initdata eth_data[1];
+static struct macb_platform_data __initdata eth_data[1];
 
 /*
  * The next two functions should go away as the boot loader is
diff --git a/arch/avr32/boards/merisc/setup.c b/arch/avr32/boards/merisc/setup.c
index e61bc948f959..ed137e335796 100644
--- a/arch/avr32/boards/merisc/setup.c
+++ b/arch/avr32/boards/merisc/setup.c
@@ -52,7 +52,7 @@ struct eth_addr {
 };
 
 static struct eth_addr __initdata hw_addr[2];
-static struct eth_platform_data __initdata eth_data[2];
+static struct macb_platform_data __initdata eth_data[2];
 
 static int ads7846_get_pendown_state_PB26(void)
 {
diff --git a/arch/avr32/boards/mimc200/setup.c b/arch/avr32/boards/mimc200/setup.c
index c4da5cba2dbf..05358aa5ef7d 100644
--- a/arch/avr32/boards/mimc200/setup.c
+++ b/arch/avr32/boards/mimc200/setup.c
@@ -86,7 +86,7 @@ struct eth_addr {
 	u8 addr[6];
 };
 static struct eth_addr __initdata hw_addr[2];
-static struct eth_platform_data __initdata eth_data[2];
+static struct macb_platform_data __initdata eth_data[2];
 
 static struct spi_eeprom eeprom_25lc010 = {
 		.name = "25lc010",
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 7fbf0dcb9afe..402a7bb72669 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1067,7 +1067,7 @@ void __init at32_setup_serial_console(unsigned int usart_id)
  * -------------------------------------------------------------------- */
 
 #ifdef CONFIG_CPU_AT32AP7000
-static struct eth_platform_data macb0_data;
+static struct macb_platform_data macb0_data;
 static struct resource macb0_resource[] = {
 	PBMEM(0xfff01800),
 	IRQ(25),
@@ -1076,7 +1076,7 @@ DEFINE_DEV_DATA(macb, 0);
 DEV_CLK(hclk, macb0, hsb, 8);
 DEV_CLK(pclk, macb0, pbb, 6);
 
-static struct eth_platform_data macb1_data;
+static struct macb_platform_data macb1_data;
 static struct resource macb1_resource[] = {
 	PBMEM(0xfff01c00),
 	IRQ(26),
@@ -1086,7 +1086,7 @@ DEV_CLK(hclk, macb1, hsb, 9);
 DEV_CLK(pclk, macb1, pbb, 7);
 
 struct platform_device *__init
-at32_add_device_eth(unsigned int id, struct eth_platform_data *data)
+at32_add_device_eth(unsigned int id, struct macb_platform_data *data)
 {
 	struct platform_device *pdev;
 	u32 pin_mask;
@@ -1163,7 +1163,7 @@ at32_add_device_eth(unsigned int id, struct eth_platform_data *data)
 		return NULL;
 	}
 
-	memcpy(pdev->dev.platform_data, data, sizeof(struct eth_platform_data));
+	memcpy(pdev->dev.platform_data, data, sizeof(struct macb_platform_data));
 	platform_device_register(pdev);
 
 	return pdev;
diff --git a/arch/avr32/mach-at32ap/include/mach/board.h b/arch/avr32/mach-at32ap/include/mach/board.h
index 5d7ffca7d69f..67b111ce332d 100644
--- a/arch/avr32/mach-at32ap/include/mach/board.h
+++ b/arch/avr32/mach-at32ap/include/mach/board.h
@@ -6,6 +6,7 @@
 
 #include <linux/types.h>
 #include <linux/serial.h>
+#include <linux/platform_data/macb.h>
 
 #define GPIO_PIN_NONE	(-1)
 
@@ -42,12 +43,8 @@ struct atmel_uart_data {
 void at32_map_usart(unsigned int hw_id, unsigned int line, int flags);
 struct platform_device *at32_add_device_usart(unsigned int id);
 
-struct eth_platform_data {
-	u32	phy_mask;
-	u8	is_rmii;
-};
 struct platform_device *
-at32_add_device_eth(unsigned int id, struct eth_platform_data *data);
+at32_add_device_eth(unsigned int id, struct macb_platform_data *data);
 
 struct spi_board_info;
 struct platform_device *
diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c
index 56624d303487..dfeb46cb3f74 100644
--- a/drivers/net/ethernet/cadence/at91_ether.c
+++ b/drivers/net/ethernet/cadence/at91_ether.c
@@ -26,6 +26,7 @@
 #include <linux/skbuff.h>
 #include <linux/dma-mapping.h>
 #include <linux/ethtool.h>
+#include <linux/platform_data/macb.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/gfp.h>
@@ -984,7 +985,7 @@ static const struct net_device_ops at91ether_netdev_ops = {
 static int __init at91ether_setup(unsigned long phy_type, unsigned short phy_address,
 			struct platform_device *pdev, struct clk *ether_clk)
 {
-	struct at91_eth_data *board_data = pdev->dev.platform_data;
+	struct macb_platform_data *board_data = pdev->dev.platform_data;
 	struct net_device *dev;
 	struct at91_private *lp;
 	unsigned int val;
diff --git a/drivers/net/ethernet/cadence/at91_ether.h b/drivers/net/ethernet/cadence/at91_ether.h
index 353f4dab62be..3725fbb0defe 100644
--- a/drivers/net/ethernet/cadence/at91_ether.h
+++ b/drivers/net/ethernet/cadence/at91_ether.h
@@ -85,7 +85,9 @@ struct recv_desc_bufs
 struct at91_private
 {
 	struct mii_if_info mii;			/* ethtool support */
-	struct at91_eth_data board_data;	/* board-specific configuration */
+	struct macb_platform_data board_data;	/* board-specific
+						 * configuration (shared with
+						 * macb for common data */
 	struct clk *ether_clk;			/* clock */
 
 	/* PHY */
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index b0fa47870add..d97d9ce986f8 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -19,12 +19,10 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_data/macb.h>
 #include <linux/platform_device.h>
 #include <linux/phy.h>
 
-#include <mach/board.h>
-#include <mach/cpu.h>
-
 #include "macb.h"
 
 #define RX_BUFFER_SIZE		128
@@ -191,7 +189,7 @@ static int macb_mii_probe(struct net_device *dev)
 {
 	struct macb *bp = netdev_priv(dev);
 	struct phy_device *phydev;
-	struct eth_platform_data *pdata;
+	struct macb_platform_data *pdata;
 	int ret;
 
 	phydev = phy_find_first(bp->mii_bus);
@@ -228,7 +226,7 @@ static int macb_mii_probe(struct net_device *dev)
 
 static int macb_mii_init(struct macb *bp)
 {
-	struct eth_platform_data *pdata;
+	struct macb_platform_data *pdata;
 	int err = -ENXIO, i;
 
 	/* Enable management port */
@@ -1119,7 +1117,7 @@ static const struct net_device_ops macb_netdev_ops = {
 
 static int __init macb_probe(struct platform_device *pdev)
 {
-	struct eth_platform_data *pdata;
+	struct macb_platform_data *pdata;
 	struct resource *regs;
 	struct net_device *dev;
 	struct macb *bp;
diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h
new file mode 100644
index 000000000000..e7c748fb6053
--- /dev/null
+++ b/include/linux/platform_data/macb.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __MACB_PDATA_H__
+#define __MACB_PDATA_H__
+
+struct macb_platform_data {
+	u32		phy_mask;
+	u8		phy_irq_pin;	/* PHY IRQ */
+	u8		is_rmii;	/* using RMII interface? */
+};
+
+#endif /* __MACB_PDATA_H__ */
-- 
cgit v1.2.3


From 570e57bcbcc4df5581b1e9c806ab2b16e96ea7d3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Nov 2011 19:51:34 +0000
Subject: atm: use SKB_TRUESIZE() in atm_guess_pdu2truesize()

SKB_TRUESIZE() provides a better approximation of expected skb truesize.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 49a83ca900ba..43ea1b2de3ee 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -452,7 +452,7 @@ void atm_dev_release_vccs(struct atm_dev *dev);
 
 static inline int atm_guess_pdu2truesize(int size)
 {
-	return SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
+	return SKB_TRUESIZE(size);
 }
 
 
-- 
cgit v1.2.3


From 5bc1421e34ecfe0bd4b26dc3232b7d5e25179144 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 22 Nov 2011 05:10:51 +0000
Subject: net: add network priority cgroup infrastructure (v4)

This patch adds in the infrastructure code to create the network priority
cgroup.  The cgroup, in addition to the standard processes file creates two
control files:

1) prioidx - This is a read-only file that exports the index of this cgroup.
This is a value that is both arbitrary and unique to a cgroup in this subsystem,
and is used to index the per-device priority map

2) priomap - This is a writeable file.  On read it reports a table of 2-tuples
<name:priority> where name is the name of a network interface and priority is
indicates the priority assigned to frames egresessing on the named interface and
originating from a pid in this cgroup

This cgroup allows for skb priority to be set prior to a root qdisc getting
selected. This is benenficial for DCB enabled systems, in that it allows for any
application to use dcb configured priorities so without application modification

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
CC: Robert Love <robert.w.love@intel.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cgroup_subsys.h |   8 +
 include/linux/netdevice.h     |   4 +
 include/net/netprio_cgroup.h  |  65 ++++++++
 include/net/sock.h            |   3 +
 net/Kconfig                   |   7 +
 net/core/Makefile             |   1 +
 net/core/dev.c                |  14 ++
 net/core/netprio_cgroup.c     | 344 ++++++++++++++++++++++++++++++++++++++++++
 net/core/sock.c               |  22 ++-
 net/socket.c                  |   2 +
 10 files changed, 469 insertions(+), 1 deletion(-)
 create mode 100644 include/net/netprio_cgroup.h
 create mode 100644 net/core/netprio_cgroup.c

(limited to 'include/linux')

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index ac663c18776c..0bd390ce98b2 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -59,8 +59,16 @@ SUBSYS(net_cls)
 SUBSYS(blkio)
 #endif
 
+/* */
+
 #ifdef CONFIG_CGROUP_PERF
 SUBSYS(perf)
 #endif
 
 /* */
+
+#ifdef CONFIG_NETPRIO_CGROUP
+SUBSYS(net_prio)
+#endif
+
+/* */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3eb383a9b5ed..999bb264fe27 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -50,6 +50,7 @@
 #ifdef CONFIG_DCB
 #include <net/dcbnl.h>
 #endif
+#include <net/netprio_cgroup.h>
 
 #include <linux/netdev_features.h>
 
@@ -1244,6 +1245,9 @@ struct net_device {
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	/* max exchange id for FCoE LRO by ddp */
 	unsigned int		fcoe_ddp_xid;
+#endif
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+	struct netprio_map __rcu *priomap;
 #endif
 	/* phy device may attach itself for hardware timestamping */
 	struct phy_device *phydev;
diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h
new file mode 100644
index 000000000000..c432e99942af
--- /dev/null
+++ b/include/net/netprio_cgroup.h
@@ -0,0 +1,65 @@
+/*
+ * netprio_cgroup.h			Control Group Priority set
+ *
+ *
+ * Authors:	Neil Horman <nhorman@tuxdriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _NETPRIO_CGROUP_H
+#define _NETPRIO_CGROUP_H
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/hardirq.h>
+#include <linux/rcupdate.h>
+
+struct cgroup_netprio_state
+{
+	struct cgroup_subsys_state css;
+	u32 prioidx;
+};
+
+struct netprio_map {
+	struct rcu_head rcu;
+	u32 priomap_len;
+	u32 priomap[];
+};
+
+#ifdef CONFIG_CGROUPS
+
+#ifndef CONFIG_NETPRIO_CGROUP
+extern int net_prio_subsys_id;
+#endif
+
+extern void sock_update_netprioidx(struct sock *sk);
+
+static inline struct cgroup_netprio_state
+		*task_netprio_state(struct task_struct *p)
+{
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+	return container_of(task_subsys_state(p, net_prio_subsys_id),
+			    struct cgroup_netprio_state, css);
+#else
+	return NULL;
+#endif
+}
+
+#else
+
+#define sock_update_netprioidx(sk)
+#define skb_update_prio(skb)
+
+static inline struct cgroup_netprio_state
+		*task_netprio_state(struct task_struct *p)
+{
+	return NULL;
+}
+
+#endif
+
+#endif  /* _NET_CLS_CGROUP_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 1c28f394d8ec..8ac338cb39ce 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -320,6 +320,9 @@ struct sock {
 	unsigned short		sk_ack_backlog;
 	unsigned short		sk_max_ack_backlog;
 	__u32			sk_priority;
+#ifdef CONFIG_CGROUPS
+	__u32			sk_cgrp_prioidx;
+#endif
 	struct pid		*sk_peer_pid;
 	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
diff --git a/net/Kconfig b/net/Kconfig
index a07314844238..63d2c5dc36ff 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,13 @@ config XPS
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
+config NETPRIO_CGROUP
+	tristate "Network priority cgroup"
+	depends on CGROUPS
+	---help---
+	  Cgroup subsystem for use in assigning processes to network priorities on
+	  a per-interface basis
+
 config HAVE_BPF_JIT
 	bool
 
diff --git a/net/core/Makefile b/net/core/Makefile
index 0d357b1c4e57..3606d40aae62 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
 obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
+obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
diff --git a/net/core/dev.c b/net/core/dev.c
index f78959996148..8afb244b205f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2449,6 +2449,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	return rc;
 }
 
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+static void skb_update_prio(struct sk_buff *skb)
+{
+	struct netprio_map *map = rcu_dereference(skb->dev->priomap);
+
+	if ((!skb->priority) && (skb->sk) && map)
+		skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
+}
+#else
+#define skb_update_prio(skb)
+#endif
+
 static DEFINE_PER_CPU(int, xmit_recursion);
 #define RECURSION_LIMIT 10
 
@@ -2489,6 +2501,8 @@ int dev_queue_xmit(struct sk_buff *skb)
 	 */
 	rcu_read_lock_bh();
 
+	skb_update_prio(skb);
+
 	txq = dev_pick_tx(dev, skb);
 	q = rcu_dereference_bh(txq->qdisc);
 
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
new file mode 100644
index 000000000000..72ad0bc6841e
--- /dev/null
+++ b/net/core/netprio_cgroup.c
@@ -0,0 +1,344 @@
+/*
+ * net/core/netprio_cgroup.c	Priority Control Group
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <linux/rcupdate.h>
+#include <linux/atomic.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+#include <net/sock.h>
+#include <net/netprio_cgroup.h>
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+					       struct cgroup *cgrp);
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
+
+struct cgroup_subsys net_prio_subsys = {
+	.name		= "net_prio",
+	.create		= cgrp_create,
+	.destroy	= cgrp_destroy,
+	.populate	= cgrp_populate,
+#ifdef CONFIG_NETPRIO_CGROUP
+	.subsys_id	= net_prio_subsys_id,
+#endif
+	.module		= THIS_MODULE
+};
+
+#define PRIOIDX_SZ 128
+
+static unsigned long prioidx_map[PRIOIDX_SZ];
+static DEFINE_SPINLOCK(prioidx_map_lock);
+static atomic_t max_prioidx = ATOMIC_INIT(0);
+
+static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
+{
+	return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
+			    struct cgroup_netprio_state, css);
+}
+
+static int get_prioidx(u32 *prio)
+{
+	unsigned long flags;
+	u32 prioidx;
+
+	spin_lock_irqsave(&prioidx_map_lock, flags);
+	prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
+	set_bit(prioidx, prioidx_map);
+	spin_unlock_irqrestore(&prioidx_map_lock, flags);
+	if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ)
+		return -ENOSPC;
+
+	atomic_set(&max_prioidx, prioidx);
+	*prio = prioidx;
+	return 0;
+}
+
+static void put_prioidx(u32 idx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&prioidx_map_lock, flags);
+	clear_bit(idx, prioidx_map);
+	spin_unlock_irqrestore(&prioidx_map_lock, flags);
+}
+
+static void extend_netdev_table(struct net_device *dev, u32 new_len)
+{
+	size_t new_size = sizeof(struct netprio_map) +
+			   ((sizeof(u32) * new_len));
+	struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
+	struct netprio_map *old_priomap;
+	int i;
+
+	old_priomap  = rtnl_dereference(dev->priomap);
+
+	if (!new_priomap) {
+		printk(KERN_WARNING "Unable to alloc new priomap!\n");
+		return;
+	}
+
+	for (i = 0;
+	     old_priomap && (i < old_priomap->priomap_len);
+	     i++)
+		new_priomap->priomap[i] = old_priomap->priomap[i];
+
+	new_priomap->priomap_len = new_len;
+
+	rcu_assign_pointer(dev->priomap, new_priomap);
+	if (old_priomap)
+		kfree_rcu(old_priomap, rcu);
+}
+
+static void update_netdev_tables(void)
+{
+	struct net_device *dev;
+	u32 max_len = atomic_read(&max_prioidx);
+	struct netprio_map *map;
+
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		map = rtnl_dereference(dev->priomap);
+		if ((!map) ||
+		    (map->priomap_len < max_len))
+			extend_netdev_table(dev, max_len);
+	}
+	rtnl_unlock();
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+						 struct cgroup *cgrp)
+{
+	struct cgroup_netprio_state *cs;
+	int ret;
+
+	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+	if (!cs)
+		return ERR_PTR(-ENOMEM);
+
+	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
+		kfree(cs);
+		return ERR_PTR(-EINVAL);
+	}
+
+	ret = get_prioidx(&cs->prioidx);
+	if (ret != 0) {
+		printk(KERN_WARNING "No space in priority index array\n");
+		kfree(cs);
+		return ERR_PTR(ret);
+	}
+
+	return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	struct cgroup_netprio_state *cs;
+	struct net_device *dev;
+	struct netprio_map *map;
+
+	cs = cgrp_netprio_state(cgrp);
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		map = rtnl_dereference(dev->priomap);
+		if (map)
+			map->priomap[cs->prioidx] = 0;
+	}
+	rtnl_unlock();
+	put_prioidx(cs->prioidx);
+	kfree(cs);
+}
+
+static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+{
+	return (u64)cgrp_netprio_state(cgrp)->prioidx;
+}
+
+static int read_priomap(struct cgroup *cont, struct cftype *cft,
+			struct cgroup_map_cb *cb)
+{
+	struct net_device *dev;
+	u32 prioidx = cgrp_netprio_state(cont)->prioidx;
+	u32 priority;
+	struct netprio_map *map;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
+		map = rcu_dereference(dev->priomap);
+		priority = map ? map->priomap[prioidx] : 0;
+		cb->fill(cb, dev->name, priority);
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
+			 const char *buffer)
+{
+	char *devname = kstrdup(buffer, GFP_KERNEL);
+	int ret = -EINVAL;
+	u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
+	unsigned long priority;
+	char *priostr;
+	struct net_device *dev;
+	struct netprio_map *map;
+
+	if (!devname)
+		return -ENOMEM;
+
+	/*
+	 * Minimally sized valid priomap string
+	 */
+	if (strlen(devname) < 3)
+		goto out_free_devname;
+
+	priostr = strstr(devname, " ");
+	if (!priostr)
+		goto out_free_devname;
+
+	/*
+	 *Separate the devname from the associated priority
+	 *and advance the priostr poitner to the priority value
+	 */
+	*priostr = '\0';
+	priostr++;
+
+	/*
+	 * If the priostr points to NULL, we're at the end of the passed
+	 * in string, and its not a valid write
+	 */
+	if (*priostr == '\0')
+		goto out_free_devname;
+
+	ret = kstrtoul(priostr, 10, &priority);
+	if (ret < 0)
+		goto out_free_devname;
+
+	ret = -ENODEV;
+
+	dev = dev_get_by_name(&init_net, devname);
+	if (!dev)
+		goto out_free_devname;
+
+	update_netdev_tables();
+	ret = 0;
+	rcu_read_lock();
+	map = rcu_dereference(dev->priomap);
+	if (map)
+		map->priomap[prioidx] = priority;
+	rcu_read_unlock();
+	dev_put(dev);
+
+out_free_devname:
+	kfree(devname);
+	return ret;
+}
+
+static struct cftype ss_files[] = {
+	{
+		.name = "prioidx",
+		.read_u64 = read_prioidx,
+	},
+	{
+		.name = "ifpriomap",
+		.read_map = read_priomap,
+		.write_string = write_priomap,
+	},
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+static int netprio_device_event(struct notifier_block *unused,
+				unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct netprio_map *old;
+	u32 max_len = atomic_read(&max_prioidx);
+
+	/*
+	 * Note this is called with rtnl_lock held so we have update side
+	 * protection on our rcu assignments
+	 */
+
+	switch (event) {
+
+	case NETDEV_REGISTER:
+		if (max_len)
+			extend_netdev_table(dev, max_len);
+		break;
+	case NETDEV_UNREGISTER:
+		old = rtnl_dereference(dev->priomap);
+		rcu_assign_pointer(dev->priomap, NULL);
+		if (old)
+			kfree_rcu(old, rcu);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block netprio_device_notifier = {
+	.notifier_call = netprio_device_event
+};
+
+static int __init init_cgroup_netprio(void)
+{
+	int ret;
+
+	ret = cgroup_load_subsys(&net_prio_subsys);
+	if (ret)
+		goto out;
+#ifndef CONFIG_NETPRIO_CGROUP
+	smp_wmb();
+	net_prio_subsys_id = net_prio_subsys.subsys_id;
+#endif
+
+	register_netdevice_notifier(&netprio_device_notifier);
+
+out:
+	return ret;
+}
+
+static void __exit exit_cgroup_netprio(void)
+{
+	struct netprio_map *old;
+	struct net_device *dev;
+
+	unregister_netdevice_notifier(&netprio_device_notifier);
+
+	cgroup_unload_subsys(&net_prio_subsys);
+
+#ifndef CONFIG_NETPRIO_CGROUP
+	net_prio_subsys_id = -1;
+	synchronize_rcu();
+#endif
+
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		old = rtnl_dereference(dev->priomap);
+		rcu_assign_pointer(dev->priomap, NULL);
+		if (old)
+			kfree_rcu(old, rcu);
+	}
+	rtnl_unlock();
+}
+
+module_init(init_cgroup_netprio);
+module_exit(exit_cgroup_netprio);
+MODULE_LICENSE("GPL v2");
diff --git a/net/core/sock.c b/net/core/sock.c
index 9a8b3fac1401..16069139797c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -125,6 +125,7 @@
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
 #include <net/cls_cgroup.h>
+#include <net/netprio_cgroup.h>
 
 #include <linux/filter.h>
 
@@ -221,10 +222,16 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
-#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+#if defined(CONFIG_CGROUPS)
+#if !defined(CONFIG_NET_CLS_CGROUP)
 int net_cls_subsys_id = -1;
 EXPORT_SYMBOL_GPL(net_cls_subsys_id);
 #endif
+#if !defined(CONFIG_NETPRIO_CGROUP)
+int net_prio_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_prio_subsys_id);
+#endif
+#endif
 
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
@@ -1120,6 +1127,18 @@ void sock_update_classid(struct sock *sk)
 		sk->sk_classid = classid;
 }
 EXPORT_SYMBOL(sock_update_classid);
+
+void sock_update_netprioidx(struct sock *sk)
+{
+	struct cgroup_netprio_state *state;
+	if (in_interrupt())
+		return;
+	rcu_read_lock();
+	state = task_netprio_state(current);
+	sk->sk_cgrp_prioidx = state ? state->prioidx : 0;
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(sock_update_netprioidx);
 #endif
 
 /**
@@ -1147,6 +1166,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		atomic_set(&sk->sk_wmem_alloc, 1);
 
 		sock_update_classid(sk);
+		sock_update_netprioidx(sk);
 	}
 
 	return sk;
diff --git a/net/socket.c b/net/socket.c
index 425ef4270460..e62b4f055071 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -551,6 +551,8 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
 
 	sock_update_classid(sock->sk);
 
+	sock_update_netprioidx(sock->sk);
+
 	si->sock = sock;
 	si->scm = NULL;
 	si->msg = msg;
-- 
cgit v1.2.3


From 5eccdf5e06eb67779716ae26142402a1ae9b012c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 21 Nov 2011 06:53:46 +0000
Subject: tc: comment spelling fixes

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index c5336705921f..7281d5acf2f9 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -30,7 +30,7 @@
  */
 
 struct tc_stats {
-	__u64	bytes;			/* NUmber of enqueues bytes */
+	__u64	bytes;			/* Number of enqueued bytes */
 	__u32	packets;		/* Number of enqueued packets	*/
 	__u32	drops;			/* Packets dropped because of lack of resources */
 	__u32	overlimits;		/* Number of throttle events when this
@@ -297,7 +297,7 @@ struct tc_htb_glob {
 	__u32 debug;		/* debug flags */
 
 	/* stats */
-	__u32 direct_pkts; /* count of non shapped packets */
+	__u32 direct_pkts; /* count of non shaped packets */
 };
 enum {
 	TCA_HTB_UNSPEC,
@@ -503,7 +503,7 @@ enum {
 };
 #define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
 
-/* State transition probablities for 4 state model */
+/* State transition probabilities for 4 state model */
 struct tc_netem_gimodel {
 	__u32	p13;
 	__u32	p31;
-- 
cgit v1.2.3


From 4e3fd7a06dc20b2d8ec6892233ad2012968fe7b6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Nov 2011 03:39:03 +0000
Subject: net: remove ipv6_addr_copy()

C assignment can handle struct in6_addr copying.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/addr.c         |  6 ++---
 drivers/infiniband/core/cma.c          |  8 +++---
 drivers/net/bonding/bond_ipv6.c        |  8 +++---
 drivers/net/ethernet/broadcom/cnic.c   |  2 +-
 fs/dlm/lowcomms.c                      |  2 +-
 include/linux/sunrpc/clnt.h            |  2 +-
 include/net/inetpeer.h                 |  2 +-
 include/net/ip_vs.h                    |  8 +++---
 include/net/ipv6.h                     |  5 ----
 include/net/xfrm.h                     |  4 +--
 net/bridge/br_multicast.c              | 10 ++++----
 net/core/pktgen.c                      | 15 +++++------
 net/dccp/ipv6.c                        | 42 +++++++++++++++----------------
 net/dccp/minisocks.c                   |  4 +--
 net/ipv4/inet_diag.c                   | 18 +++++--------
 net/ipv4/tcp_minisocks.c               |  4 +--
 net/ipv6/addrconf.c                    |  8 +++---
 net/ipv6/af_inet6.c                    | 14 +++++------
 net/ipv6/ah6.c                         | 12 ++++-----
 net/ipv6/anycast.c                     |  4 +--
 net/ipv6/datagram.c                    | 34 ++++++++++++-------------
 net/ipv6/exthdrs.c                     | 18 ++++++-------
 net/ipv6/fib6_rules.c                  |  2 +-
 net/ipv6/icmp.c                        | 18 ++++++-------
 net/ipv6/inet6_connection_sock.c       | 12 ++++-----
 net/ipv6/ip6_flowlabel.c               |  2 +-
 net/ipv6/ip6_output.c                  | 18 ++++++-------
 net/ipv6/ip6_tunnel.c                  | 12 ++++-----
 net/ipv6/ip6mr.c                       | 12 ++++-----
 net/ipv6/ipv6_sockglue.c               |  8 +++---
 net/ipv6/mcast.c                       |  6 ++---
 net/ipv6/mip6.c                        |  4 +--
 net/ipv6/ndisc.c                       |  6 ++---
 net/ipv6/netfilter/ip6t_REJECT.c       |  8 +++---
 net/ipv6/raw.c                         | 10 ++++----
 net/ipv6/reassembly.c                  |  4 +--
 net/ipv6/route.c                       | 42 +++++++++++++++----------------
 net/ipv6/sit.c                         |  4 +--
 net/ipv6/syncookies.c                  |  8 +++---
 net/ipv6/tcp_ipv6.c                    | 46 ++++++++++++++++------------------
 net/ipv6/udp.c                         |  7 +++---
 net/ipv6/xfrm6_mode_beet.c             |  8 +++---
 net/ipv6/xfrm6_mode_tunnel.c           |  4 +--
 net/ipv6/xfrm6_output.c                |  4 +--
 net/ipv6/xfrm6_policy.c                |  4 +--
 net/ipv6/xfrm6_state.c                 |  4 +--
 net/key/af_key.c                       |  2 +-
 net/netfilter/ipset/ip_set_hash_ip.c   |  2 +-
 net/netfilter/ipset/ip_set_hash_net.c  |  2 +-
 net/netfilter/ipvs/ip_vs_core.c        |  2 +-
 net/netfilter/ipvs/ip_vs_sync.c        |  6 ++---
 net/netfilter/ipvs/ip_vs_xmit.c        | 10 ++++----
 net/netfilter/nf_conntrack_h323_main.c |  4 +--
 net/netfilter/xt_TCPMSS.c              |  2 +-
 net/netfilter/xt_addrtype.c            |  2 +-
 net/netlabel/netlabel_kapi.c           |  4 +--
 net/netlabel/netlabel_mgmt.c           |  4 +--
 net/netlabel/netlabel_unlabeled.c      |  4 +--
 net/sctp/ipv6.c                        | 40 ++++++++++++++---------------
 net/sctp/socket.c                      |  2 +-
 net/sunrpc/svcauth_unix.c              |  6 ++---
 net/sunrpc/svcsock.c                   |  4 +--
 net/xfrm/xfrm_state.c                  | 12 +++------
 security/lsm_audit.c                   |  4 +--
 security/selinux/hooks.c               |  6 ++---
 security/selinux/netnode.c             |  2 +-
 66 files changed, 288 insertions(+), 315 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 691276bafd78..adf0757280ed 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -243,8 +243,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 	int ret;
 
 	memset(&fl6, 0, sizeof fl6);
-	ipv6_addr_copy(&fl6.daddr, &dst_in->sin6_addr);
-	ipv6_addr_copy(&fl6.saddr, &src_in->sin6_addr);
+	fl6.daddr = dst_in->sin6_addr;
+	fl6.saddr = src_in->sin6_addr;
 	fl6.flowi6_oif = addr->bound_dev_if;
 
 	dst = ip6_route_output(&init_net, NULL, &fl6);
@@ -258,7 +258,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 			goto put;
 
 		src_in->sin6_family = AF_INET6;
-		ipv6_addr_copy(&src_in->sin6_addr, &fl6.saddr);
+		src_in->sin6_addr = fl6.saddr;
 	}
 
 	if (dst->dev->flags & IFF_LOOPBACK) {
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 75ff821c0af0..09e66cce05d3 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2005,11 +2005,11 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 	if (cma_zero_addr(src)) {
 		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
 		if ((src->sa_family = dst->sa_family) == AF_INET) {
-			((struct sockaddr_in *) src)->sin_addr.s_addr =
-				((struct sockaddr_in *) dst)->sin_addr.s_addr;
+			((struct sockaddr_in *)src)->sin_addr =
+				((struct sockaddr_in *)dst)->sin_addr;
 		} else {
-			ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
-				       &((struct sockaddr_in6 *) dst)->sin6_addr);
+			((struct sockaddr_in6 *)src)->sin6_addr =
+				((struct sockaddr_in6 *)dst)->sin6_addr;
 		}
 	}
 
diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c
index 027a0ee7d85b..7e6632221a75 100644
--- a/drivers/net/bonding/bond_ipv6.c
+++ b/drivers/net/bonding/bond_ipv6.c
@@ -50,7 +50,7 @@ static void bond_glean_dev_ipv6(struct net_device *dev, struct in6_addr *addr)
 		struct inet6_ifaddr *ifa
 			= list_first_entry(&idev->addr_list,
 					   struct inet6_ifaddr, if_list);
-		ipv6_addr_copy(addr, &ifa->addr);
+		*addr = ifa->addr;
 	} else
 		ipv6_addr_set(addr, 0, 0, 0, 0);
 
@@ -168,8 +168,7 @@ static int bond_inet6addr_event(struct notifier_block *this,
 			switch (event) {
 			case NETDEV_UP:
 				if (ipv6_addr_any(&bond->master_ipv6))
-					ipv6_addr_copy(&bond->master_ipv6,
-						       &ifa->addr);
+					bond->master_ipv6 = ifa->addr;
 				return NOTIFY_OK;
 			case NETDEV_DOWN:
 				if (ipv6_addr_equal(&bond->master_ipv6,
@@ -191,8 +190,7 @@ static int bond_inet6addr_event(struct notifier_block *this,
 				switch (event) {
 				case NETDEV_UP:
 					if (ipv6_addr_any(&vlan->vlan_ipv6))
-						ipv6_addr_copy(&vlan->vlan_ipv6,
-							       &ifa->addr);
+						vlan->vlan_ipv6 = ifa->addr;
 					return NOTIFY_OK;
 				case NETDEV_DOWN:
 					if (ipv6_addr_equal(&vlan->vlan_ipv6,
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 6f10c6939834..099f41d99ec0 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -3475,7 +3475,7 @@ static int cnic_get_v6_route(struct sockaddr_in6 *dst_addr,
 	struct flowi6 fl6;
 
 	memset(&fl6, 0, sizeof(fl6));
-	ipv6_addr_copy(&fl6.daddr, &dst_addr->sin6_addr);
+	fl6.daddr = dst_addr->sin6_addr;
 	if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
 		fl6.flowi6_oif = dst_addr->sin6_scope_id;
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 990626e7da80..0b3109ee4257 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -281,7 +281,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 	} else {
 		struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
 		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
-		ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr);
+		ret6->sin6_addr = in6->sin6_addr;
 	}
 
 	return 0;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 3d8f9c44e27d..f15fd985b08a 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -237,7 +237,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst,
 	struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
 
 	dsin6->sin6_family = ssin6->sin6_family;
-	ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr);
+	dsin6->sin6_addr = ssin6->sin6_addr;
 	return true;
 }
 #else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 78c83e62218f..73a5c26c01ea 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -86,7 +86,7 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
 {
 	struct inetpeer_addr daddr;
 
-	ipv6_addr_copy((struct in6_addr *)daddr.addr.a6, v6daddr);
+	*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
 	return inet_getpeer(&daddr, create);
 }
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 873d5be7926c..48fd12e9d3af 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -21,7 +21,7 @@
 #include <linux/netfilter.h>		/* for union nf_inet_addr */
 #include <linux/ip.h>
 #include <linux/ipv6.h>			/* for struct ipv6hdr */
-#include <net/ipv6.h>			/* for ipv6_addr_copy */
+#include <net/ipv6.h>
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #endif
@@ -119,8 +119,8 @@ ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr)
 		const struct ipv6hdr *iph = nh;
 		iphdr->len = sizeof(struct ipv6hdr);
 		iphdr->protocol = iph->nexthdr;
-		ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr);
-		ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr);
+		iphdr->saddr.in6 = iph->saddr;
+		iphdr->daddr.in6 = iph->daddr;
 	} else
 #endif
 	{
@@ -137,7 +137,7 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
 {
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
-		ipv6_addr_copy(&dst->in6, &src->in6);
+		dst->in6 = src->in6;
 	else
 #endif
 	dst->ip = src->ip;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 3f0258d2ef01..f35188e002d9 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -309,11 +309,6 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
 		  ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3]));
 }
 
-static inline void ipv6_addr_copy(struct in6_addr *a1, const struct in6_addr *a2)
-{
-	memcpy(a1, a2, sizeof(struct in6_addr));
-}
-
 static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
 				    const struct in6_addr *addr,
 				    int plen)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4de7ed9016d9..89174e29dca9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1217,8 +1217,8 @@ void xfrm_flowi_addr_get(const struct flowi *fl,
 		memcpy(&daddr->a4, &fl->u.ip4.daddr, sizeof(daddr->a4));
 		break;
 	case AF_INET6:
-		ipv6_addr_copy((struct in6_addr *)&saddr->a6, &fl->u.ip6.saddr);
-		ipv6_addr_copy((struct in6_addr *)&daddr->a6, &fl->u.ip6.daddr);
+		*(struct in6_addr *)saddr->a6 = fl->u.ip6.saddr;
+		*(struct in6_addr *)daddr->a6 = fl->u.ip6.daddr;
 		break;
 	}
 }
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a5f4e5769809..7743e0d109ea 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -127,7 +127,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(
 {
 	struct br_ip br_dst;
 
-	ipv6_addr_copy(&br_dst.u.ip6, dst);
+	br_dst.u.ip6 = *dst;
 	br_dst.proto = htons(ETH_P_IPV6);
 
 	return br_mdb_ip_get(mdb, &br_dst);
@@ -154,7 +154,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 		break;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case htons(ETH_P_IPV6):
-		ipv6_addr_copy(&ip.u.ip6, &ipv6_hdr(skb)->daddr);
+		ip.u.ip6 = ipv6_hdr(skb)->daddr;
 		break;
 #endif
 	default:
@@ -474,7 +474,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	mldq->mld_cksum = 0;
 	mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
 	mldq->mld_reserved = 0;
-	ipv6_addr_copy(&mldq->mld_mca, group);
+	mldq->mld_mca = *group;
 
 	/* checksum */
 	mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -783,7 +783,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 	if (!ipv6_is_transient_multicast(group))
 		return 0;
 
-	ipv6_addr_copy(&br_group.u.ip6, group);
+	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
 
 	return br_multicast_add_group(br, port, &br_group);
@@ -1344,7 +1344,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 	if (!ipv6_is_transient_multicast(group))
 		return;
 
-	ipv6_addr_copy(&br_group.u.ip6, group);
+	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
 
 	br_multicast_leave_group(br, port, &br_group);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0001c243b35c..aa53a35a631b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1304,7 +1304,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
 		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr);
 
-		ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
+		pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr;
 
 		if (debug)
 			printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf);
@@ -1327,8 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
 		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr);
 
-		ipv6_addr_copy(&pkt_dev->cur_in6_daddr,
-			       &pkt_dev->min_in6_daddr);
+		pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr;
 		if (debug)
 			printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf);
 
@@ -1371,7 +1370,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
 		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr);
 
-		ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
+		pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr;
 
 		if (debug)
 			printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf);
@@ -2079,9 +2078,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 				     ifp = ifp->if_next) {
 					if (ifp->scope == IFA_LINK &&
 					    !(ifp->flags & IFA_F_TENTATIVE)) {
-						ipv6_addr_copy(&pkt_dev->
-							       cur_in6_saddr,
-							       &ifp->addr);
+						pkt_dev->cur_in6_saddr = ifp->addr;
 						err = 0;
 						break;
 					}
@@ -2958,8 +2955,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	iph->payload_len = htons(sizeof(struct udphdr) + datalen);
 	iph->nexthdr = IPPROTO_UDP;
 
-	ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
-	ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
+	iph->daddr = pkt_dev->cur_in6_daddr;
+	iph->saddr = pkt_dev->cur_in6_saddr;
 
 	skb->mac_header = (skb->network_header - ETH_HLEN -
 			   pkt_dev->pkt_overhead);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 17ee85ce148d..ce903f747e64 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -150,8 +150,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			 */
 			memset(&fl6, 0, sizeof(fl6));
 			fl6.flowi6_proto = IPPROTO_DCCP;
-			ipv6_addr_copy(&fl6.daddr, &np->daddr);
-			ipv6_addr_copy(&fl6.saddr, &np->saddr);
+			fl6.daddr = np->daddr;
+			fl6.saddr = np->saddr;
 			fl6.flowi6_oif = sk->sk_bound_dev_if;
 			fl6.fl6_dport = inet->inet_dport;
 			fl6.fl6_sport = inet->inet_sport;
@@ -244,8 +244,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_DCCP;
-	ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
-	ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
+	fl6.daddr = ireq6->rmt_addr;
+	fl6.saddr = ireq6->loc_addr;
 	fl6.flowlabel = 0;
 	fl6.flowi6_oif = ireq6->iif;
 	fl6.fl6_dport = inet_rsk(req)->rmt_port;
@@ -270,7 +270,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 		dh->dccph_checksum = dccp_v6_csum_finish(skb,
 							 &ireq6->loc_addr,
 							 &ireq6->rmt_addr);
-		ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
+		fl6.daddr = ireq6->rmt_addr;
 		err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
@@ -313,8 +313,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 							    &rxip6h->daddr);
 
 	memset(&fl6, 0, sizeof(fl6));
-	ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr);
-	ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr);
+	fl6.daddr = rxip6h->saddr;
+	fl6.saddr = rxip6h->daddr;
 
 	fl6.flowi6_proto = IPPROTO_DCCP;
 	fl6.flowi6_oif = inet6_iif(rxskb);
@@ -419,8 +419,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	ireq6 = inet6_rsk(req);
-	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
-	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+	ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq6->loc_addr = ipv6_hdr(skb)->daddr;
 
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -491,7 +491,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
-		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+		newnp->rcv_saddr = newnp->saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
@@ -526,9 +526,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_DCCP;
-		ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
+		fl6.daddr = ireq6->rmt_addr;
 		final_p = fl6_update_dst(&fl6, opt, &final);
-		ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
+		fl6.saddr = ireq6->loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.fl6_dport = inet_rsk(req)->rmt_port;
 		fl6.fl6_sport = inet_rsk(req)->loc_port;
@@ -559,9 +559,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr);
-	ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr);
-	ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr);
+	newnp->daddr = ireq6->rmt_addr;
+	newnp->saddr = ireq6->loc_addr;
+	newnp->rcv_saddr = ireq6->loc_addr;
 	newsk->sk_bound_dev_if = ireq6->iif;
 
 	/* Now IPv6 options...
@@ -877,7 +877,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
-			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+			usin->sin6_addr = flowlabel->dst;
 			fl6_sock_release(flowlabel);
 		}
 	}
@@ -910,7 +910,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			return -EINVAL;
 	}
 
-	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+	np->daddr = usin->sin6_addr;
 	np->flow_label = fl6.flowlabel;
 
 	/*
@@ -949,8 +949,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		saddr = &np->rcv_saddr;
 
 	fl6.flowi6_proto = IPPROTO_DCCP;
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
-	ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr);
+	fl6.daddr = np->daddr;
+	fl6.saddr = saddr ? *saddr : np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.fl6_dport = usin->sin6_port;
 	fl6.fl6_sport = inet->inet_sport;
@@ -966,11 +966,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (saddr == NULL) {
 		saddr = &fl6.saddr;
-		ipv6_addr_copy(&np->rcv_saddr, saddr);
+		np->rcv_saddr = *saddr;
 	}
 
 	/* set the source address */
-	ipv6_addr_copy(&np->saddr, saddr);
+	np->saddr = *saddr;
 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	__ip6_dst_store(sk, dst, NULL, NULL);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 563b7c74e49d..b50d5fd3d696 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -60,8 +60,8 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 
 			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
 			tw6 = inet6_twsk((struct sock *)tw);
-			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
-			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw6->tw_v6_daddr = np->daddr;
+			tw6->tw_v6_rcv_saddr = np->rcv_saddr;
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 68e8ac514383..bbebdecd7234 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -129,10 +129,8 @@ static int inet_csk_diag_fill(struct sock *sk,
 	if (r->idiag_family == AF_INET6) {
 		const struct ipv6_pinfo *np = inet6_sk(sk);
 
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &np->rcv_saddr);
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &np->daddr);
+		*(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
+		*(struct in6_addr *)r->id.idiag_dst = np->daddr;
 		if (ext & (1 << (INET_DIAG_TCLASS - 1)))
 			RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass);
 	}
@@ -224,10 +222,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 		const struct inet6_timewait_sock *tw6 =
 						inet6_twsk((struct sock *)tw);
 
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &tw6->tw_v6_rcv_saddr);
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &tw6->tw_v6_daddr);
+		*(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
+		*(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
 	}
 #endif
 	nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
@@ -603,10 +599,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	r->idiag_inode = 0;
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 	if (r->idiag_family == AF_INET6) {
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &inet6_rsk(req)->loc_addr);
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &inet6_rsk(req)->rmt_addr);
+		*(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr;
+		*(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
 	}
 #endif
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0a7e3398c461..945efffdd929 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -343,8 +343,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 
 			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
 			tw6 = inet6_twsk((struct sock *)tw);
-			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
-			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw6->tw_v6_daddr = np->daddr;
+			tw6->tw_v6_rcv_saddr = np->rcv_saddr;
 			tw->tw_tclass = np->tclass;
 			tw->tw_ipv6only = np->ipv6only;
 		}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cf88df82e2c2..586051726341 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -636,7 +636,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 		goto out;
 	}
 
-	ipv6_addr_copy(&ifa->addr, addr);
+	ifa->addr = *addr;
 
 	spin_lock_init(&ifa->lock);
 	spin_lock_init(&ifa->state_lock);
@@ -1228,7 +1228,7 @@ try_nextdev:
 	if (!hiscore->ifa)
 		return -EADDRNOTAVAIL;
 
-	ipv6_addr_copy(saddr, &hiscore->ifa->addr);
+	*saddr = hiscore->ifa->addr;
 	in6_ifa_put(hiscore->ifa);
 	return 0;
 }
@@ -1249,7 +1249,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 		list_for_each_entry(ifp, &idev->addr_list, if_list) {
 			if (ifp->scope == IFA_LINK &&
 			    !(ifp->flags & banned_flags)) {
-				ipv6_addr_copy(addr, &ifp->addr);
+				*addr = ifp->addr;
 				err = 0;
 				break;
 			}
@@ -1700,7 +1700,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		.fc_protocol = RTPROT_KERNEL,
 	};
 
-	ipv6_addr_copy(&cfg.fc_dst, pfx);
+	cfg.fc_dst = *pfx;
 
 	/* Prevent useless cloning on PtP SIT.
 	   This thing is done here expecting that the whole
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ee3319487c4f..7694c82e629d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -361,10 +361,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	inet->inet_rcv_saddr = v4addr;
 	inet->inet_saddr = v4addr;
 
-	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+	np->rcv_saddr = addr->sin6_addr;
 
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
-		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+		np->saddr = addr->sin6_addr;
 
 	/* Make sure we are allowed to bind here. */
 	if (sk->sk_prot->get_port(sk, snum)) {
@@ -458,14 +458,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		    peer == 1)
 			return -ENOTCONN;
 		sin->sin6_port = inet->inet_dport;
-		ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
+		sin->sin6_addr = np->daddr;
 		if (np->sndflow)
 			sin->sin6_flowinfo = np->flow_label;
 	} else {
 		if (ipv6_addr_any(&np->rcv_saddr))
-			ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
+			sin->sin6_addr = np->saddr;
 		else
-			ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
+			sin->sin6_addr = np->rcv_saddr;
 
 		sin->sin6_port = inet->inet_sport;
 	}
@@ -660,8 +660,8 @@ int inet6_sk_rebuild_header(struct sock *sk)
 
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = sk->sk_protocol;
-		ipv6_addr_copy(&fl6.daddr, &np->daddr);
-		ipv6_addr_copy(&fl6.saddr, &np->saddr);
+		fl6.daddr = np->daddr;
+		fl6.saddr = np->saddr;
 		fl6.flowlabel = np->flow_label;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.flowi6_mark = sk->sk_mark;
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 4c0f894d0843..2ae79dbeec2f 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -193,9 +193,9 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des
 						printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
 					goto bad;
 				}
-				ipv6_addr_copy(&final_addr, &hao->addr);
-				ipv6_addr_copy(&hao->addr, &iph->saddr);
-				ipv6_addr_copy(&iph->saddr, &final_addr);
+				final_addr = hao->addr;
+				hao->addr = iph->saddr;
+				iph->saddr = final_addr;
 			}
 			break;
 		}
@@ -241,13 +241,13 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
 	segments = rthdr->hdrlen >> 1;
 
 	addrs = ((struct rt0_hdr *)rthdr)->addr;
-	ipv6_addr_copy(&final_addr, addrs + segments - 1);
+	final_addr = addrs[segments - 1];
 
 	addrs += segments - segments_left;
 	memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
 
-	ipv6_addr_copy(addrs, &iph->daddr);
-	ipv6_addr_copy(&iph->daddr, &final_addr);
+	addrs[0] = iph->daddr;
+	iph->daddr = final_addr;
 }
 
 static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 674255f5e6b7..fc1cdcd7041a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -75,7 +75,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	if (pac == NULL)
 		return -ENOMEM;
 	pac->acl_next = NULL;
-	ipv6_addr_copy(&pac->acl_addr, addr);
+	pac->acl_addr = *addr;
 
 	rcu_read_lock();
 	if (ifindex == 0) {
@@ -296,7 +296,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
 		goto out;
 	}
 
-	ipv6_addr_copy(&aca->aca_addr, addr);
+	aca->aca_addr = *addr;
 	aca->aca_idev = idev;
 	aca->aca_rt = rt;
 	aca->aca_users = 1;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 83037af4fa7b..ae08aee1773c 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -71,7 +71,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
-			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+			usin->sin6_addr = flowlabel->dst;
 		}
 	}
 
@@ -143,7 +143,7 @@ ipv4_connected:
 		}
 	}
 
-	ipv6_addr_copy(&np->daddr, daddr);
+	np->daddr = *daddr;
 	np->flow_label = fl6.flowlabel;
 
 	inet->inet_dport = usin->sin6_port;
@@ -154,8 +154,8 @@ ipv4_connected:
 	 */
 
 	fl6.flowi6_proto = sk->sk_protocol;
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
-	ipv6_addr_copy(&fl6.saddr, &np->saddr);
+	fl6.daddr = np->daddr;
+	fl6.saddr = np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = inet->inet_dport;
@@ -179,10 +179,10 @@ ipv4_connected:
 	/* source address lookup done in ip6_dst_lookup */
 
 	if (ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&np->saddr, &fl6.saddr);
+		np->saddr = fl6.saddr;
 
 	if (ipv6_addr_any(&np->rcv_saddr)) {
-		ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr);
+		np->rcv_saddr = fl6.saddr;
 		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 		if (sk->sk_prot->rehash)
 			sk->sk_prot->rehash(sk);
@@ -257,7 +257,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
 	skb_put(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	iph = ipv6_hdr(skb);
-	ipv6_addr_copy(&iph->daddr, &fl6->daddr);
+	iph->daddr = fl6->daddr;
 
 	serr = SKB_EXT_ERR(skb);
 	serr->ee.ee_errno = err;
@@ -294,7 +294,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 	skb_put(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	iph = ipv6_hdr(skb);
-	ipv6_addr_copy(&iph->daddr, &fl6->daddr);
+	iph->daddr = fl6->daddr;
 
 	mtu_info = IP6CBMTU(skb);
 
@@ -303,7 +303,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 	mtu_info->ip6m_addr.sin6_port = 0;
 	mtu_info->ip6m_addr.sin6_flowinfo = 0;
 	mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
-	ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
+	mtu_info->ip6m_addr.sin6_addr = ipv6_hdr(skb)->daddr;
 
 	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
 	skb_reset_transport_header(skb);
@@ -354,8 +354,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_port = serr->port;
 		sin->sin6_scope_id = 0;
 		if (skb->protocol == htons(ETH_P_IPV6)) {
-			ipv6_addr_copy(&sin->sin6_addr,
-				  (struct in6_addr *)(nh + serr->addr_offset));
+			sin->sin6_addr =
+				*(struct in6_addr *)(nh + serr->addr_offset);
 			if (np->sndflow)
 				sin->sin6_flowinfo =
 					(*(__be32 *)(nh + serr->addr_offset - 24) &
@@ -376,7 +376,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_flowinfo = 0;
 		sin->sin6_scope_id = 0;
 		if (skb->protocol == htons(ETH_P_IPV6)) {
-			ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
+			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (np->rxopt.all)
 				datagram_recv_ctl(sk, msg, skb);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -451,7 +451,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_flowinfo = 0;
 		sin->sin6_port = 0;
 		sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
-		ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
+		sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr;
 	}
 
 	put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
@@ -475,7 +475,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		struct in6_pktinfo src_info;
 
 		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
+		src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
 		put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 	}
 
@@ -550,7 +550,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		struct in6_pktinfo src_info;
 
 		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
+		src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 	}
 	if (np->rxopt.bits.rxohlim) {
@@ -584,7 +584,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 			 */
 
 			sin6.sin6_family = AF_INET6;
-			ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
+			sin6.sin6_addr = ipv6_hdr(skb)->daddr;
 			sin6.sin6_port = ports[1];
 			sin6.sin6_flowinfo = 0;
 			sin6.sin6_scope_id = 0;
@@ -659,7 +659,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
 						   strict ? dev : NULL, 0))
 					err = -EINVAL;
 				else
-					ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr);
+					fl6->saddr = src_info->ipi6_addr;
 			}
 
 			rcu_read_unlock();
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index bf22a225f422..3d641b6e9b09 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -243,9 +243,9 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 
-	ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
-	ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
-	ipv6_addr_copy(&hao->addr, &tmp_addr);
+	tmp_addr = ipv6h->saddr;
+	ipv6h->saddr = hao->addr;
+	hao->addr = tmp_addr;
 
 	if (skb->tstamp.tv64 == 0)
 		__net_timestamp(skb);
@@ -461,9 +461,9 @@ looped_back:
 		return -1;
 	}
 
-	ipv6_addr_copy(&daddr, addr);
-	ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
-	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
+	daddr = *addr;
+	*addr = ipv6_hdr(skb)->daddr;
+	ipv6_hdr(skb)->daddr = daddr;
 
 	skb_dst_drop(skb);
 	ip6_route_input(skb);
@@ -690,7 +690,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
 		memcpy(phdr->addr, ihdr->addr + 1,
 		       (hops - 1) * sizeof(struct in6_addr));
 
-	ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
+	phdr->addr[hops - 1] = **addr_p;
 	*addr_p = ihdr->addr;
 
 	phdr->rt_hdr.nexthdr = *proto;
@@ -888,8 +888,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
 	if (!opt || !opt->srcrt)
 		return NULL;
 
-	ipv6_addr_copy(orig, &fl6->daddr);
-	ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr);
+	*orig = fl6->daddr;
+	fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
 	return orig;
 }
 
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 295571576f83..b6c573152067 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -96,7 +96,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
 					       r->src.plen))
 				goto again;
-			ipv6_addr_copy(&flp6->saddr, &saddr);
+			flp6->saddr = saddr;
 		}
 		goto out;
 	}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 90868fb42757..9e2bdccf9143 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -290,9 +290,9 @@ static void mip6_addr_swap(struct sk_buff *skb)
 		if (likely(off >= 0)) {
 			hao = (struct ipv6_destopt_hao *)
 					(skb_network_header(skb) + off);
-			ipv6_addr_copy(&tmp, &iph->saddr);
-			ipv6_addr_copy(&iph->saddr, &hao->addr);
-			ipv6_addr_copy(&hao->addr, &tmp);
+			tmp = iph->saddr;
+			iph->saddr = hao->addr;
+			hao->addr = tmp;
 		}
 	}
 }
@@ -444,9 +444,9 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_ICMPV6;
-	ipv6_addr_copy(&fl6.daddr, &hdr->saddr);
+	fl6.daddr = hdr->saddr;
 	if (saddr)
-		ipv6_addr_copy(&fl6.saddr, saddr);
+		fl6.saddr = *saddr;
 	fl6.flowi6_oif = iif;
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
@@ -538,9 +538,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_ICMPV6;
-	ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
+	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
-		ipv6_addr_copy(&fl6.saddr, saddr);
+		fl6.saddr = *saddr;
 	fl6.flowi6_oif = skb->dev->ifindex;
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -786,8 +786,8 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
 		      int oif)
 {
 	memset(fl6, 0, sizeof(*fl6));
-	ipv6_addr_copy(&fl6->saddr, saddr);
-	ipv6_addr_copy(&fl6->daddr, daddr);
+	fl6->saddr = *saddr;
+	fl6->daddr = *daddr;
 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
 	fl6->fl6_icmp_type	= type;
 	fl6->fl6_icmp_code	= 0;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index fee46d5a2f12..4d7bfb321c75 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -65,9 +65,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
+	fl6.daddr = treq->rmt_addr;
 	final_p = fl6_update_dst(&fl6, np->opt, &final);
-	ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
+	fl6.saddr = treq->loc_addr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = inet_rsk(req)->rmt_port;
@@ -157,7 +157,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
 
 	sin6->sin6_family = AF_INET6;
-	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
+	sin6->sin6_addr = np->daddr;
 	sin6->sin6_port	= inet_sk(sk)->inet_dport;
 	/* We do not store received flowlabel for TCP */
 	sin6->sin6_flowinfo = 0;
@@ -215,8 +215,8 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = sk->sk_protocol;
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
-	ipv6_addr_copy(&fl6.saddr, &np->saddr);
+	fl6.daddr = np->daddr;
+	fl6.saddr = np->saddr;
 	fl6.flowlabel = np->flow_label;
 	IP6_ECN_flow_xmit(sk, fl6.flowlabel);
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
@@ -246,7 +246,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
 	skb_dst_set_noref(skb, dst);
 
 	/* Restore final destination back after routing done */
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
+	fl6.daddr = np->daddr;
 
 	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
 	rcu_read_unlock();
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 4566dbd916d3..b7867a1215b1 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -386,7 +386,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 		err = -EINVAL;
 		goto done;
 	}
-	ipv6_addr_copy(&fl->dst, &freq->flr_dst);
+	fl->dst = freq->flr_dst;
 	atomic_set(&fl->users, 1);
 	switch (fl->share) {
 	case IPV6_FL_S_EXCL:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 68ef97f353b6..a24e15557843 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -238,8 +238,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	hdr->nexthdr = proto;
 	hdr->hop_limit = hlimit;
 
-	ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
-	ipv6_addr_copy(&hdr->daddr, first_hop);
+	hdr->saddr = fl6->saddr;
+	hdr->daddr = *first_hop;
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
@@ -290,8 +290,8 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 	hdr->nexthdr = proto;
 	hdr->hop_limit = np->hop_limit;
 
-	ipv6_addr_copy(&hdr->saddr, saddr);
-	ipv6_addr_copy(&hdr->daddr, daddr);
+	hdr->saddr = *saddr;
+	hdr->daddr = *daddr;
 
 	return 0;
 }
@@ -1063,7 +1063,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 	if (err)
 		return ERR_PTR(err);
 	if (final_dst)
-		ipv6_addr_copy(&fl6->daddr, final_dst);
+		fl6->daddr = *final_dst;
 	if (can_sleep)
 		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
 
@@ -1099,7 +1099,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 	if (err)
 		return ERR_PTR(err);
 	if (final_dst)
-		ipv6_addr_copy(&fl6->daddr, final_dst);
+		fl6->daddr = *final_dst;
 	if (can_sleep)
 		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
 
@@ -1592,7 +1592,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	if (np->pmtudisc < IPV6_PMTUDISC_DO)
 		skb->local_df = 1;
 
-	ipv6_addr_copy(final_dst, &fl6->daddr);
+	*final_dst = fl6->daddr;
 	__skb_pull(skb, skb_network_header_len(skb));
 	if (opt && opt->opt_flen)
 		ipv6_push_frag_opts(skb, opt, &proto);
@@ -1608,8 +1608,8 @@ int ip6_push_pending_frames(struct sock *sk)
 
 	hdr->hop_limit = np->cork.hop_limit;
 	hdr->nexthdr = proto;
-	ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
-	ipv6_addr_copy(&hdr->daddr, final_dst);
+	hdr->saddr = fl6->saddr;
+	hdr->daddr = *final_dst;
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 83f0e31c5fbd..f5f98f558acb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -979,8 +979,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
-	ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr);
-	ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr);
+	ipv6h->saddr = fl6->saddr;
+	ipv6h->daddr = fl6->daddr;
 	nf_reset(skb);
 	pkt_len = skb->len;
 	err = ip6_local_out(skb);
@@ -1155,8 +1155,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
 
 	/* Set up flowi template */
-	ipv6_addr_copy(&fl6->saddr, &p->laddr);
-	ipv6_addr_copy(&fl6->daddr, &p->raddr);
+	fl6->saddr = p->laddr;
+	fl6->daddr = p->raddr;
 	fl6->flowi6_oif = p->link;
 	fl6->flowlabel = 0;
 
@@ -1212,8 +1212,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 static int
 ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
 {
-	ipv6_addr_copy(&t->parms.laddr, &p->laddr);
-	ipv6_addr_copy(&t->parms.raddr, &p->raddr);
+	t->parms.laddr = p->laddr;
+	t->parms.raddr = p->raddr;
 	t->parms.flags = p->flags;
 	t->parms.hop_limit = p->hop_limit;
 	t->parms.encap_limit = p->encap_limit;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 449a9185b8f2..c7e95c8c579f 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1105,8 +1105,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
 		msg->im6_mif = mrt->mroute_reg_vif_num;
 		msg->im6_pad = 0;
-		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
-		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+		msg->im6_src = ipv6_hdr(pkt)->saddr;
+		msg->im6_dst = ipv6_hdr(pkt)->daddr;
 
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else
@@ -1131,8 +1131,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 	msg->im6_msgtype = assert;
 	msg->im6_mif = mifi;
 	msg->im6_pad = 0;
-	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
-	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+	msg->im6_src = ipv6_hdr(pkt)->saddr;
+	msg->im6_dst = ipv6_hdr(pkt)->daddr;
 
 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -2181,8 +2181,8 @@ int ip6mr_get_route(struct net *net,
 		iph->payload_len = 0;
 		iph->nexthdr = IPPROTO_NONE;
 		iph->hop_limit = 0;
-		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
-		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
+		iph->saddr = rt->rt6i_src.addr;
+		iph->daddr = rt->rt6i_dst.addr;
 
 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c99e3ee9781f..29993b7079a5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -435,7 +435,7 @@ sticky_done:
 			goto e_inval;
 
 		np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
-		ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr);
+		np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr;
 		retv = 0;
 		break;
 	}
@@ -980,8 +980,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				struct in6_pktinfo src_info;
 				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
 					np->sticky_pktinfo.ipi6_ifindex;
-				np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
-					ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
+				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxhlim) {
@@ -992,8 +991,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				struct in6_pktinfo src_info;
 				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
 					np->sticky_pktinfo.ipi6_ifindex;
-				np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
-					ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
+				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxohlim) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7b94bebb73b1..6cc4d1fb8c13 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -155,7 +155,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		return -ENOMEM;
 
 	mc_lst->next = NULL;
-	ipv6_addr_copy(&mc_lst->addr, addr);
+	mc_lst->addr = *addr;
 
 	rcu_read_lock();
 	if (ifindex == 0) {
@@ -858,7 +858,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 
 	setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
 
-	ipv6_addr_copy(&mc->mca_addr, addr);
+	mc->mca_addr = *addr;
 	mc->idev = idev; /* (reference taken) */
 	mc->mca_users = 1;
 	/* mca_stamp should be updated upon changes */
@@ -1776,7 +1776,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
 	memset(hdr, 0, sizeof(struct mld_msg));
 	hdr->mld_type = type;
-	ipv6_addr_copy(&hdr->mld_mca, addr);
+	hdr->mld_mca = *addr;
 
 	hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
 					 IPPROTO_ICMPV6,
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 43242e6e6103..7e1e0fbfef21 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -195,8 +195,8 @@ static inline int mip6_report_rl_allow(struct timeval *stamp,
 		mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
 		mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
 		mip6_report_rl.iif = iif;
-		ipv6_addr_copy(&mip6_report_rl.src, src);
-		ipv6_addr_copy(&mip6_report_rl.dst, dst);
+		mip6_report_rl.src = *src;
+		mip6_report_rl.dst = *dst;
 		allow = 1;
 	}
 	spin_unlock_bh(&mip6_report_rl.lock);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d699ddcad4ce..a4769881c5b5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -481,7 +481,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
 
 	opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
 	if (target) {
-		ipv6_addr_copy((struct in6_addr *)opt, target);
+		*(struct in6_addr *)opt = *target;
 		opt += sizeof(*target);
 	}
 
@@ -1622,9 +1622,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	 */
 
 	addrp = (struct in6_addr *)(icmph + 1);
-	ipv6_addr_copy(addrp, target);
+	*addrp = *target;
 	addrp++;
-	ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
+	*addrp = ipv6_hdr(skb)->daddr;
 
 	opt = (u8*) (addrp + 1);
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index a5a4c5dd5396..b5a2aa58a03a 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -93,8 +93,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl6.saddr, &oip6h->daddr);
-	ipv6_addr_copy(&fl6.daddr, &oip6h->saddr);
+	fl6.saddr = oip6h->daddr;
+	fl6.daddr = oip6h->saddr;
 	fl6.fl6_sport = otcph.dest;
 	fl6.fl6_dport = otcph.source;
 	security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
@@ -129,8 +129,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	*(__be32 *)ip6h =  htonl(0x60000000 | (tclass << 20));
 	ip6h->hop_limit = ip6_dst_hoplimit(dst);
 	ip6h->nexthdr = IPPROTO_TCP;
-	ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
-	ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
+	ip6h->saddr = oip6h->daddr;
+	ip6h->daddr = oip6h->saddr;
 
 	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
 	/* Truncate to length (no data) */
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a1aa869a9ce7..a4894f4f1944 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -299,9 +299,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
-	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+	np->rcv_saddr = addr->sin6_addr;
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
-		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+		np->saddr = addr->sin6_addr;
 	err = 0;
 out_unlock:
 	rcu_read_unlock();
@@ -495,7 +495,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (sin6) {
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = 0;
-		ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
+		sin6->sin6_addr = ipv6_hdr(skb)->saddr;
 		sin6->sin6_flowinfo = 0;
 		sin6->sin6_scope_id = 0;
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -846,11 +846,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		goto out;
 
 	if (!ipv6_addr_any(daddr))
-		ipv6_addr_copy(&fl6.daddr, daddr);
+		fl6.daddr = *daddr;
 	else
 		fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
 	if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&fl6.saddr, &np->saddr);
+		fl6.saddr = np->saddr;
 
 	final_p = fl6_update_dst(&fl6, opt, &final);
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index dfb164e9051a..b69fae76a6f1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -153,8 +153,8 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
 
 	fq->id = arg->id;
 	fq->user = arg->user;
-	ipv6_addr_copy(&fq->saddr, arg->src);
-	ipv6_addr_copy(&fq->daddr, arg->dst);
+	fq->saddr = *arg->src;
+	fq->daddr = *arg->dst;
 }
 EXPORT_SYMBOL(ip6_frag_init);
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 05c89be04c9f..2897403fdaff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -729,14 +729,14 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
 			if (rt->rt6i_dst.plen != 128 &&
 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 				rt->rt6i_flags |= RTF_ANYCAST;
-			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
+			rt->rt6i_gateway = *daddr;
 		}
 
 		rt->rt6i_flags |= RTF_CACHE;
 
 #ifdef CONFIG_IPV6_SUBTREES
 		if (rt->rt6i_src.plen && saddr) {
-			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
+			rt->rt6i_src.addr = *saddr;
 			rt->rt6i_src.plen = 128;
 		}
 #endif
@@ -932,7 +932,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 			in6_dev_hold(rt->rt6i_idev);
 		rt->rt6i_expires = 0;
 
-		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
+		rt->rt6i_gateway = ort->rt6i_gateway;
 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 		rt->rt6i_metric = 0;
 
@@ -1087,7 +1087,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->dst.output  = ip6_output;
 	dst_set_neighbour(&rt->dst, neigh);
 	atomic_set(&rt->dst.__refcnt, 1);
-	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_idev     = idev;
 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
@@ -1324,7 +1324,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		int gwa_type;
 
 		gw_addr = &cfg->fc_gateway;
-		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
+		rt->rt6i_gateway = *gw_addr;
 		gwa_type = ipv6_addr_type(gw_addr);
 
 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -1378,7 +1378,7 @@ int ip6_route_add(struct fib6_config *cfg)
 			err = -EINVAL;
 			goto out;
 		}
-		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
+		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
 		rt->rt6i_prefsrc.plen = 128;
 	} else
 		rt->rt6i_prefsrc.plen = 0;
@@ -1575,7 +1575,7 @@ static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
 		},
 	};
 
-	ipv6_addr_copy(&rdfl.gateway, gateway);
+	rdfl.gateway = *gateway;
 
 	if (rt6_need_strict(dest))
 		flags |= RT6_LOOKUP_F_IFACE;
@@ -1631,7 +1631,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
 	if (on_link)
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
-	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
+	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
 
 	if (ip6_ins_rt(nrt))
@@ -1777,7 +1777,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
 		rt->dst.output = ort->dst.output;
 		rt->dst.flags |= DST_HOST;
 
-		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
+		rt->rt6i_dst.addr = *dest;
 		rt->rt6i_dst.plen = 128;
 		dst_copy_metrics(&rt->dst, &ort->dst);
 		rt->dst.error = ort->dst.error;
@@ -1787,7 +1787,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
 		rt->dst.lastuse = jiffies;
 		rt->rt6i_expires = 0;
 
-		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
+		rt->rt6i_gateway = ort->rt6i_gateway;
 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 		rt->rt6i_metric = 0;
 
@@ -1850,8 +1850,8 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 		.fc_nlinfo.nl_net = net,
 	};
 
-	ipv6_addr_copy(&cfg.fc_dst, prefix);
-	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
+	cfg.fc_dst = *prefix;
+	cfg.fc_gateway = *gwaddr;
 
 	/* We should treat it as a default route if prefix length is 0. */
 	if (!prefixlen)
@@ -1900,7 +1900,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 		.fc_nlinfo.nl_net = dev_net(dev),
 	};
 
-	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
+	cfg.fc_gateway = *gwaddr;
 
 	ip6_route_add(&cfg);
 
@@ -1946,9 +1946,9 @@ static void rtmsg_to_fib6_config(struct net *net,
 
 	cfg->fc_nlinfo.nl_net = net;
 
-	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
-	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
-	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
+	cfg->fc_dst = rtmsg->rtmsg_dst;
+	cfg->fc_src = rtmsg->rtmsg_src;
+	cfg->fc_gateway = rtmsg->rtmsg_gateway;
 }
 
 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
@@ -2082,7 +2082,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	}
 	dst_set_neighbour(&rt->dst, neigh);
 
-	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
 
@@ -2100,7 +2100,7 @@ int ip6_route_get_saddr(struct net *net,
 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
 	int err = 0;
 	if (rt->rt6i_prefsrc.plen)
-		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
+		*saddr = rt->rt6i_prefsrc.addr;
 	else
 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
 					 daddr, prefs, saddr);
@@ -2439,7 +2439,7 @@ static int rt6_fill_node(struct net *net,
 
 	if (rt->rt6i_prefsrc.plen) {
 		struct in6_addr saddr_buf;
-		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
+		saddr_buf = rt->rt6i_prefsrc.addr;
 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
 
@@ -2513,14 +2513,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
 			goto errout;
 
-		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
+		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
 	}
 
 	if (tb[RTA_DST]) {
 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
 			goto errout;
 
-		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
+		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
 	}
 
 	if (tb[RTA_IIF])
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cec09382282d..50968f226e75 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -914,7 +914,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 				goto done;
 #ifdef CONFIG_IPV6_SIT_6RD
 		} else {
-			ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix);
+			ip6rd.prefix = t->ip6rd.prefix;
 			ip6rd.relay_prefix = t->ip6rd.relay_prefix;
 			ip6rd.prefixlen = t->ip6rd.prefixlen;
 			ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
@@ -1082,7 +1082,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			if (relay_prefix != ip6rd.relay_prefix)
 				goto done;
 
-			ipv6_addr_copy(&t->ip6rd.prefix, &prefix);
+			t->ip6rd.prefix = prefix;
 			t->ip6rd.relay_prefix = relay_prefix;
 			t->ip6rd.prefixlen = ip6rd.prefixlen;
 			t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5a0d6648bbbc..8e951d8d3b81 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -200,8 +200,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->mss = mss;
 	ireq->rmt_port = th->source;
 	ireq->loc_port = th->dest;
-	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
-	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+	ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq6->loc_addr = ipv6_hdr(skb)->daddr;
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -237,9 +237,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		struct flowi6 fl6;
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_TCP;
-		ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
+		fl6.daddr = ireq6->rmt_addr;
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
-		ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
+		fl6.saddr = ireq6->loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.flowi6_mark = sk->sk_mark;
 		fl6.fl6_dport = inet_rsk(req)->rmt_port;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 36131d122a6f..fd98dd010fcb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -153,7 +153,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
-			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+			usin->sin6_addr = flowlabel->dst;
 			fl6_sock_release(flowlabel);
 		}
 	}
@@ -195,7 +195,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		tp->write_seq = 0;
 	}
 
-	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+	np->daddr = usin->sin6_addr;
 	np->flow_label = fl6.flowlabel;
 
 	/*
@@ -244,9 +244,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		saddr = &np->rcv_saddr;
 
 	fl6.flowi6_proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
-	ipv6_addr_copy(&fl6.saddr,
-		       (saddr ? saddr : &np->saddr));
+	fl6.daddr = np->daddr;
+	fl6.saddr = saddr ? *saddr : np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = usin->sin6_port;
@@ -264,11 +263,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (saddr == NULL) {
 		saddr = &fl6.saddr;
-		ipv6_addr_copy(&np->rcv_saddr, saddr);
+		np->rcv_saddr = *saddr;
 	}
 
 	/* set the source address */
-	ipv6_addr_copy(&np->saddr, saddr);
+	np->saddr = *saddr;
 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	sk->sk_gso_type = SKB_GSO_TCPV6;
@@ -398,8 +397,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			 */
 			memset(&fl6, 0, sizeof(fl6));
 			fl6.flowi6_proto = IPPROTO_TCP;
-			ipv6_addr_copy(&fl6.daddr, &np->daddr);
-			ipv6_addr_copy(&fl6.saddr, &np->saddr);
+			fl6.daddr = np->daddr;
+			fl6.saddr = np->saddr;
 			fl6.flowi6_oif = sk->sk_bound_dev_if;
 			fl6.flowi6_mark = sk->sk_mark;
 			fl6.fl6_dport = inet->inet_dport;
@@ -489,8 +488,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
-	ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
+	fl6.daddr = treq->rmt_addr;
+	fl6.saddr = treq->loc_addr;
 	fl6.flowlabel = 0;
 	fl6.flowi6_oif = treq->iif;
 	fl6.flowi6_mark = sk->sk_mark;
@@ -512,7 +511,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	if (skb) {
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 
-		ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
+		fl6.daddr = treq->rmt_addr;
 		err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
@@ -617,8 +616,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
 			tp->md5sig_info->alloced6++;
 		}
 
-		ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
-			       peer);
+		tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr = *peer;
 		tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
 		tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
 
@@ -750,8 +748,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
 
 	bp = &hp->md5_blk.ip6;
 	/* 1. TCP pseudo-header (RFC2460) */
-	ipv6_addr_copy(&bp->saddr, saddr);
-	ipv6_addr_copy(&bp->daddr, daddr);
+	bp->saddr = *saddr;
+	bp->daddr = *daddr;
 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
 	bp->len = cpu_to_be32(nbytes);
 
@@ -1039,8 +1037,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 #endif
 
 	memset(&fl6, 0, sizeof(fl6));
-	ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
-	ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr);
+	fl6.daddr = ipv6_hdr(skb)->saddr;
+	fl6.saddr = ipv6_hdr(skb)->daddr;
 
 	buff->ip_summed = CHECKSUM_PARTIAL;
 	buff->csum = 0;
@@ -1250,8 +1248,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_openreq_init(req, &tmp_opt, skb);
 
 	treq = inet6_rsk(req);
-	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
-	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
+	treq->rmt_addr = ipv6_hdr(skb)->saddr;
+	treq->loc_addr = ipv6_hdr(skb)->daddr;
 	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
@@ -1380,7 +1378,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
-		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+		newnp->rcv_saddr = newnp->saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1444,9 +1442,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
-	ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
-	ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
+	newnp->daddr = treq->rmt_addr;
+	newnp->saddr = treq->loc_addr;
+	newnp->rcv_saddr = treq->loc_addr;
 	newsk->sk_bound_dev_if = treq->iif;
 
 	/* Now IPv6 options...
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ccfb0451b1c3..84ec9db86ee0 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -417,8 +417,7 @@ try_again:
 			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
 					       &sin6->sin6_addr);
 		else {
-			ipv6_addr_copy(&sin6->sin6_addr,
-				       &ipv6_hdr(skb)->saddr);
+			sin6->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin6->sin6_scope_id = IP6CB(skb)->iif;
 		}
@@ -1115,11 +1114,11 @@ do_udp_sendmsg:
 
 	fl6.flowi6_proto = sk->sk_protocol;
 	if (!ipv6_addr_any(daddr))
-		ipv6_addr_copy(&fl6.daddr, daddr);
+		fl6.daddr = *daddr;
 	else
 		fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
 	if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&fl6.saddr, &np->saddr);
+		fl6.saddr = np->saddr;
 	fl6.fl6_sport = inet->inet_sport;
 
 	final_p = fl6_update_dst(&fl6, opt, &final);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 3437d7d4eed6..a81ce9450750 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -72,8 +72,8 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 		top_iph->nexthdr = IPPROTO_BEETPH;
 	}
 
-	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
-	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
 	return 0;
 }
 
@@ -99,8 +99,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(skb->len - size);
-	ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
-	ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
+	ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6;
+	ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6;
 	err = 0;
 out:
 	return err;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 4d6edff0498f..261e6e6f487e 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -55,8 +55,8 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 		dsfield &= ~INET_ECN_MASK;
 	ipv6_change_dsfield(top_iph, 0, dsfield);
 	top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
-	ipv6_addr_copy(&top_iph->saddr, (const struct in6_addr *)&x->props.saddr);
-	ipv6_addr_copy(&top_iph->daddr, (const struct in6_addr *)&x->id.daddr);
+	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
 	return 0;
 }
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index faae41737fca..4eeff89c1aaa 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -49,7 +49,7 @@ static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
 	struct sock *sk = skb->sk;
 
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
-	ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr);
+	fl6.daddr = ipv6_hdr(skb)->daddr;
 
 	ipv6_local_rxpmtu(sk, &fl6, mtu);
 }
@@ -60,7 +60,7 @@ static void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
 	struct sock *sk = skb->sk;
 
 	fl6.fl6_dport = inet_sk(sk)->inet_dport;
-	ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr);
+	fl6.daddr = ipv6_hdr(skb)->daddr;
 
 	ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
 }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d879f7efbd10..8ea65e032733 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -132,8 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->flowi6_mark = skb->mark;
 
-	ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr);
-	ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr);
+	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
+	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
 
 	while (nh + offset + 1 < skb->data ||
 	       pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index f2d72b8a3faa..3f2f7c4ab721 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -27,8 +27,8 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
 
 	/* Initialize temporary selector matching only
 	 * to current session. */
-	ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr);
-	ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr);
+	*(struct in6_addr *)&sel->daddr = fl6->daddr;
+	*(struct in6_addr *)&sel->saddr = fl6->saddr;
 	sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
 	sel->dport_mask = htons(0xffff);
 	sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1e733e9073d0..bfc0bef170cb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -712,7 +712,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = port;
 		sin6->sin6_flowinfo = 0;
-		ipv6_addr_copy(&sin6->sin6_addr, (const struct in6_addr *)xaddr->a6);
+		sin6->sin6_addr = *(struct in6_addr *)xaddr->a6;
 		sin6->sin6_scope_id = 0;
 		return 128;
 	    }
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index f2d576e6b769..4015fcaf87bc 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -241,7 +241,7 @@ hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
 static inline void
 hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)
 {
-	ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
+	dst->ip.in6 = src->ip.in6;
 }
 
 static inline void
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 60d016541c58..28988196775e 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -267,7 +267,7 @@ static inline void
 hash_net6_data_copy(struct hash_net6_elem *dst,
 		    const struct hash_net6_elem *src)
 {
-	ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
+	dst->ip.in6 = src->ip.in6;
 	dst->cidr = src->cidr;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 093cc327020f..611c3359b94d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -983,7 +983,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
 	if (!cp)
 		return NF_ACCEPT;
 
-	ipv6_addr_copy(&snet.in6, &iph->saddr);
+	snet.in6 = iph->saddr;
 	return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
 				    pp, offset, sizeof(struct ipv6hdr));
 }
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3cdd479f9b5d..bcf5563e4837 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -603,9 +603,9 @@ sloop:
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6) {
 		p += sizeof(struct ip_vs_sync_v6);
-		ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
-		ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
-		ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+		s->v6.caddr = cp->caddr.in6;
+		s->v6.vaddr = cp->vaddr.in6;
+		s->v6.daddr = cp->daddr.in6;
 	} else
 #endif
 	{
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index aa2d7206ee8a..38a576d05b4b 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -235,7 +235,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
 			goto out_err;
 		}
 	}
-	ipv6_addr_copy(ret_saddr, &fl6.saddr);
+	*ret_saddr = fl6.saddr;
 	return dst;
 
 out_err:
@@ -279,7 +279,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
 				  atomic_read(&rt->dst.__refcnt));
 		}
 		if (ret_saddr)
-			ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6);
+			*ret_saddr = dest->dst_saddr.in6;
 		spin_unlock(&dest->dst_lock);
 	} else {
 		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
@@ -705,7 +705,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
 		goto tx_error;
-	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
+	ipv6_hdr(skb)->daddr = cp->daddr.in6;
 
 	if (!local || !skb->dev) {
 		/* drop the old route when skb is not shared */
@@ -967,8 +967,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
 	iph->priority		=	old_iph->priority;
 	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
-	ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
-	ipv6_addr_copy(&iph->saddr, &saddr);
+	iph->daddr = cp->daddr.in6;
+	iph->saddr = saddr;
 	iph->hop_limit		=	old_iph->hop_limit;
 
 	/* Another hack: avoid icmp_send in ip_fragment */
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f03c2d4539f6..f9368f33e7af 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -750,10 +750,10 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 		struct rt6_info *rt1, *rt2;
 
 		memset(&fl1, 0, sizeof(fl1));
-		ipv6_addr_copy(&fl1.daddr, &src->in6);
+		fl1.daddr = src->in6;
 
 		memset(&fl2, 0, sizeof(fl2));
-		ipv6_addr_copy(&fl2.daddr, &dst->in6);
+		fl2.daddr = dst->in6;
 		if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
 				   flowi6_to_flowi(&fl1), false)) {
 			if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 9e63b43faeed..3ecade3966d5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -161,7 +161,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 		struct flowi6 *fl6 = &fl.u.ip6;
 
 		memset(fl6, 0, sizeof(*fl6));
-		ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr);
+		fl6->daddr = ipv6_hdr(skb)->saddr;
 	}
 	rcu_read_lock();
 	ai = nf_get_afinfo(family);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index b77d383cec78..c047de2046ad 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -42,7 +42,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 	int route_err;
 
 	memset(&flow, 0, sizeof(flow));
-	ipv6_addr_copy(&flow.daddr, addr);
+	flow.daddr = *addr;
 	if (dev)
 		flow.flowi6_oif = dev->ifindex;
 
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 9c24de10a657..8ed67dccf11d 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -155,12 +155,12 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
 			if (map6 == NULL)
 				goto cfg_unlbl_map_add_failure;
 			map6->type = NETLBL_NLTYPE_UNLABELED;
-			ipv6_addr_copy(&map6->list.addr, addr6);
+			map6->list.addr = *addr6;
 			map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0];
 			map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1];
 			map6->list.addr.s6_addr32[2] &= mask6->s6_addr32[2];
 			map6->list.addr.s6_addr32[3] &= mask6->s6_addr32[3];
-			ipv6_addr_copy(&map6->list.mask, mask6);
+			map6->list.mask = *mask6;
 			map6->list.valid = 1;
 			ret_val = netlbl_af4list_add(&map4->list,
 						     &addrmap->list4);
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index bfa555869775..9879300beefd 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -216,12 +216,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 			ret_val = -ENOMEM;
 			goto add_failure;
 		}
-		ipv6_addr_copy(&map->list.addr, addr);
+		map->list.addr = *addr;
 		map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
 		map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
 		map->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
 		map->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
-		ipv6_addr_copy(&map->list.mask, mask);
+		map->list.mask = *mask;
 		map->list.valid = 1;
 		map->type = entry->type;
 
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index e251c2c88521..049ccd2447d7 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -300,12 +300,12 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
 	if (entry == NULL)
 		return -ENOMEM;
 
-	ipv6_addr_copy(&entry->list.addr, addr);
+	entry->list.addr = *addr;
 	entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
 	entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
 	entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
 	entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
-	ipv6_addr_copy(&entry->list.mask, mask);
+	entry->list.mask = *mask;
 	entry->list.valid = 1;
 	entry->secid = secid;
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 810427833bcd..91f479121c55 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -107,7 +107,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 		if (addr) {
 			addr->a.v6.sin6_family = AF_INET6;
 			addr->a.v6.sin6_port = 0;
-			ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifa->addr);
+			addr->a.v6.sin6_addr = ifa->addr;
 			addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
 			addr->valid = 1;
 			spin_lock_bh(&sctp_local_addr_lock);
@@ -219,8 +219,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	/* Fill in the dest address from the route entry passed with the skb
 	 * and the source address from the transport.
 	 */
-	ipv6_addr_copy(&fl6.daddr, &transport->ipaddr.v6.sin6_addr);
-	ipv6_addr_copy(&fl6.saddr, &transport->saddr.v6.sin6_addr);
+	fl6.daddr = transport->ipaddr.v6.sin6_addr;
+	fl6.saddr = transport->saddr.v6.sin6_addr;
 
 	fl6.flowlabel = np->flow_label;
 	IP6_ECN_flow_xmit(sk, fl6.flowlabel);
@@ -231,7 +231,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		ipv6_addr_copy(&fl6.daddr, rt0->addr);
+		fl6.daddr = *rt0->addr;
 	}
 
 	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n",
@@ -265,7 +265,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	sctp_scope_t scope;
 
 	memset(fl6, 0, sizeof(struct flowi6));
-	ipv6_addr_copy(&fl6->daddr, &daddr->v6.sin6_addr);
+	fl6->daddr = daddr->v6.sin6_addr;
 	fl6->fl6_dport = daddr->v6.sin6_port;
 	fl6->flowi6_proto = IPPROTO_SCTP;
 	if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -277,7 +277,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		fl6->fl6_sport = htons(asoc->base.bind_addr.port);
 
 	if (saddr) {
-		ipv6_addr_copy(&fl6->saddr, &saddr->v6.sin6_addr);
+		fl6->saddr = saddr->v6.sin6_addr;
 		fl6->fl6_sport = saddr->v6.sin6_port;
 		SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr);
 	}
@@ -334,7 +334,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	}
 	rcu_read_unlock();
 	if (baddr) {
-		ipv6_addr_copy(&fl6->saddr, &baddr->v6.sin6_addr);
+		fl6->saddr = baddr->v6.sin6_addr;
 		fl6->fl6_sport = baddr->v6.sin6_port;
 		dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
 	}
@@ -375,7 +375,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
 
 	if (t->dst) {
 		saddr->v6.sin6_family = AF_INET6;
-		ipv6_addr_copy(&saddr->v6.sin6_addr, &fl6->saddr);
+		saddr->v6.sin6_addr = fl6->saddr;
 	}
 }
 
@@ -400,7 +400,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 		if (addr) {
 			addr->a.v6.sin6_family = AF_INET6;
 			addr->a.v6.sin6_port = 0;
-			ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr);
+			addr->a.v6.sin6_addr = ifp->addr;
 			addr->a.v6.sin6_scope_id = dev->ifindex;
 			addr->valid = 1;
 			INIT_LIST_HEAD(&addr->list);
@@ -416,7 +416,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
 			     int is_saddr)
 {
-	void *from;
 	__be16 *port;
 	struct sctphdr *sh;
 
@@ -428,12 +427,11 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
 	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
-		from = &ipv6_hdr(skb)->saddr;
+		addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
 	} else {
 		*port = sh->dest;
-		from = &ipv6_hdr(skb)->daddr;
+		addr->v6.sin6_addr = ipv6_hdr(skb)->daddr;
 	}
-	ipv6_addr_copy(&addr->v6.sin6_addr, from);
 }
 
 /* Initialize an sctp_addr from a socket. */
@@ -441,7 +439,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
 {
 	addr->v6.sin6_family = AF_INET6;
 	addr->v6.sin6_port = 0;
-	ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr);
+	addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
 }
 
 /* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -454,7 +452,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
 		inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
 			addr->v4.sin_addr.s_addr;
 	} else {
-		ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr);
+		inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
 	}
 }
 
@@ -467,7 +465,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
 		inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
 		inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
 	} else {
-		ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr);
+		inet6_sk(sk)->daddr = addr->v6.sin6_addr;
 	}
 }
 
@@ -479,7 +477,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr,
 	addr->v6.sin6_family = AF_INET6;
 	addr->v6.sin6_port = port;
 	addr->v6.sin6_flowinfo = 0; /* BUG */
-	ipv6_addr_copy(&addr->v6.sin6_addr, &param->v6.addr);
+	addr->v6.sin6_addr = param->v6.addr;
 	addr->v6.sin6_scope_id = iif;
 }
 
@@ -493,7 +491,7 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr,
 
 	param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
 	param->v6.param_hdr.length = htons(length);
-	ipv6_addr_copy(&param->v6.addr, &addr->v6.sin6_addr);
+	param->v6.addr = addr->v6.sin6_addr;
 
 	return length;
 }
@@ -504,7 +502,7 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
 {
 	addr->sa.sa_family = AF_INET6;
 	addr->v6.sin6_port = port;
-	ipv6_addr_copy(&addr->v6.sin6_addr, saddr);
+	addr->v6.sin6_addr = *saddr;
 }
 
 /* Compare addresses exactly.
@@ -759,7 +757,7 @@ static void sctp_inet6_event_msgname(struct sctp_ulpevent *event,
 		}
 
 		sin6from = &asoc->peer.primary_addr.v6;
-		ipv6_addr_copy(&sin6->sin6_addr, &sin6from->sin6_addr);
+		sin6->sin6_addr = sin6from->sin6_addr;
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 			sin6->sin6_scope_id = sin6from->sin6_scope_id;
 	}
@@ -787,7 +785,7 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 		}
 
 		/* Otherwise, just copy the v6 address. */
-		ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
+		sin6->sin6_addr = ipv6_hdr(skb)->saddr;
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
 			struct sctp_ulpevent *ev = sctp_skb2event(skb);
 			sin6->sin6_scope_id = ev->iif;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13bf5fcdbff1..d56c07a3d435 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -804,7 +804,7 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)addrs;
-				ipv6_addr_copy(&asoc->asconf_addr_del_pending->v6.sin6_addr, &sin6->sin6_addr);
+				asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr;
 			}
 			SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ",
 			    " at %p\n", asoc, asoc->asconf_addr_del_pending,
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index ce136323da8b..fe258fc37f50 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -134,7 +134,7 @@ static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
 	struct ip_map *item = container_of(citem, struct ip_map, h);
 
 	strcpy(new->m_class, item->m_class);
-	ipv6_addr_copy(&new->m_addr, &item->m_addr);
+	new->m_addr = item->m_addr;
 }
 static void update(struct cache_head *cnew, struct cache_head *citem)
 {
@@ -274,7 +274,7 @@ static int ip_map_show(struct seq_file *m,
 	}
 	im = container_of(h, struct ip_map, h);
 	/* class addr domain */
-	ipv6_addr_copy(&addr, &im->m_addr);
+	addr = im->m_addr;
 
 	if (test_bit(CACHE_VALID, &h->flags) &&
 	    !test_bit(CACHE_NEGATIVE, &h->flags))
@@ -297,7 +297,7 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
 	struct cache_head *ch;
 
 	strcpy(ip.m_class, class);
-	ipv6_addr_copy(&ip.m_addr, addr);
+	ip.m_addr = *addr;
 	ch = sunrpc_cache_lookup(cd, &ip.h,
 				 hash_str(class, IP_HASHBITS) ^
 				 hash_ip6(*addr));
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 71bed1c1c77a..4653286fcc9e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -157,7 +157,7 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 			cmh->cmsg_level = SOL_IPV6;
 			cmh->cmsg_type = IPV6_PKTINFO;
 			pki->ipi6_ifindex = daddr->sin6_scope_id;
-			ipv6_addr_copy(&pki->ipi6_addr,	&daddr->sin6_addr);
+			pki->ipi6_addr = daddr->sin6_addr;
 			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
 		}
 		break;
@@ -523,7 +523,7 @@ static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
 		return 0;
 
 	daddr->sin6_family = AF_INET6;
-	ipv6_addr_copy(&daddr->sin6_addr, &pki->ipi6_addr);
+	daddr->sin6_addr = pki->ipi6_addr;
 	daddr->sin6_scope_id = pki->ipi6_ifindex;
 	return 1;
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9414b9c5b1e4..5b228f97d4b3 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1035,16 +1035,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
 			break;
 
 		case AF_INET6:
-			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
-				       (const struct in6_addr *)daddr);
-			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
-				       (const struct in6_addr *)saddr);
+			*(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr;
+			*(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr;
 			x->sel.prefixlen_d = 128;
 			x->sel.prefixlen_s = 128;
-			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
-				       (const struct in6_addr *)saddr);
-			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
-				       (const struct in6_addr *)daddr);
+			*(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr;
+			*(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr;
 			break;
 		}
 
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 893af8a2fa1e..199616bb68d3 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -118,8 +118,8 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb,
 	ip6 = ipv6_hdr(skb);
 	if (ip6 == NULL)
 		return -EINVAL;
-	ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr);
-	ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr);
+	ad->u.net.v6info.saddr = ip6->saddr;
+	ad->u.net.v6info.daddr = ip6->daddr;
 	ret = 0;
 	/* IPv6 can have several extension header before the Transport header
 	 * skip them */
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1126c10a5e82..7e6c2564e741 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3567,8 +3567,8 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb,
 	if (ip6 == NULL)
 		goto out;
 
-	ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr);
-	ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr);
+	ad->u.net.v6info.saddr = ip6->saddr;
+	ad->u.net.v6info.daddr = ip6->daddr;
 	ret = 0;
 
 	nexthdr = ip6->nexthdr;
@@ -3871,7 +3871,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
 		if (family == PF_INET)
 			ad.u.net.v4info.saddr = addr4->sin_addr.s_addr;
 		else
-			ipv6_addr_copy(&ad.u.net.v6info.saddr, &addr6->sin6_addr);
+			ad.u.net.v6info.saddr = addr6->sin6_addr;
 
 		err = avc_has_perm(sksec->sid, sid,
 				   sksec->sclass, node_perm, &ad);
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index 3bf46abaa688..86365857c088 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -220,7 +220,7 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid)
 	case PF_INET6:
 		ret = security_node_sid(PF_INET6,
 					addr, sizeof(struct in6_addr), sid);
-		ipv6_addr_copy(&new->nsec.addr.ipv6, addr);
+		new->nsec.addr.ipv6 = *(struct in6_addr *)addr;
 		break;
 	default:
 		BUG();
-- 
cgit v1.2.3


From 1f2149c1df50c8c712950872675f46e6e44629f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Nov 2011 10:57:41 +0000
Subject: net: remove netdev_alloc_page and use __GFP_COLD

Given we dont use anymore the struct net_device *dev argument, and this
interface brings litle benefit, remove netdev_{alloc|free}_page(), to
debloat include/linux/skbuff.h a bit.

(Some drivers used a mix of these interfaces and alloc_pages())

When allocating a page given to device for DMA transfer (device to
memory), it makes sense to use a cold one (__GFP_COLD)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
CC: Dimitris Michailidis <dm@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/sge.c          |  6 ++---
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c        |  5 ++--
 drivers/net/ethernet/intel/igb/igb_main.c         |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |  2 +-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  2 +-
 drivers/net/usb/cdc-phonet.c                      | 10 +++----
 drivers/usb/gadget/f_phonet.c                     | 11 ++++----
 include/linux/skbuff.h                            | 32 -----------------------
 8 files changed, 18 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 140254c7cba9..2dae7959f000 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -491,7 +491,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 	__be64 *d = &q->desc[q->pidx];
 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 
-	gfp |= __GFP_NOWARN;         /* failures are expected */
+	gfp |= __GFP_NOWARN | __GFP_COLD;
 
 #if FL_PG_ORDER > 0
 	/*
@@ -528,7 +528,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 #endif
 
 	while (n--) {
-		pg = __netdev_alloc_page(adap->port[0], gfp);
+		pg = alloc_page(gfp);
 		if (unlikely(!pg)) {
 			q->alloc_failed++;
 			break;
@@ -537,7 +537,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 		mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE,
 				       PCI_DMA_FROMDEVICE);
 		if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
-			netdev_free_page(adap->port[0], pg);
+			put_page(pg);
 			goto out;
 		}
 		*d++ = cpu_to_be64(mapping);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 8d5d55ad102d..c381db23e713 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -653,8 +653,7 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl,
 
 alloc_small_pages:
 	while (n--) {
-		page = __netdev_alloc_page(adapter->port[0],
-					   gfp | __GFP_NOWARN);
+		page = alloc_page(gfp | __GFP_NOWARN | __GFP_COLD);
 		if (unlikely(!page)) {
 			fl->alloc_failed++;
 			break;
@@ -664,7 +663,7 @@ alloc_small_pages:
 		dma_addr = dma_map_page(adapter->pdev_dev, page, 0, PAGE_SIZE,
 				       PCI_DMA_FROMDEVICE);
 		if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
-			netdev_free_page(adapter->port[0], page);
+			put_page(page);
 			break;
 		}
 		*d++ = cpu_to_be64(dma_addr);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index bd9b30e6ae9d..b66b8aa751e7 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6135,7 +6135,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
 		return true;
 
 	if (!page) {
-		page = netdev_alloc_page(rx_ring->netdev);
+		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
 		bi->page = page;
 		if (unlikely(!page)) {
 			rx_ring->rx_stats.alloc_failed++;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 820fc040c241..1b28ed9d8cc1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1140,7 +1140,7 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count)
 
 		if (ring_is_ps_enabled(rx_ring)) {
 			if (!bi->page) {
-				bi->page = netdev_alloc_page(rx_ring->netdev);
+				bi->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
 				if (!bi->page) {
 					rx_ring->rx_stats.alloc_rx_page_failed++;
 					goto no_buffers;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 0c39bb1ac3bb..5d1a64398169 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -366,7 +366,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
 		if (!bi->page_dma &&
 		    (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED)) {
 			if (!bi->page) {
-				bi->page = netdev_alloc_page(adapter->netdev);
+				bi->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
 				if (!bi->page) {
 					adapter->alloc_rx_page_failed++;
 					goto no_buffers;
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index a60d0069cc45..331e44056f5a 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -130,7 +130,7 @@ static int rx_submit(struct usbpn_dev *pnd, struct urb *req, gfp_t gfp_flags)
 	struct page *page;
 	int err;
 
-	page = __netdev_alloc_page(dev, gfp_flags);
+	page = alloc_page(gfp_flags);
 	if (!page)
 		return -ENOMEM;
 
@@ -140,7 +140,7 @@ static int rx_submit(struct usbpn_dev *pnd, struct urb *req, gfp_t gfp_flags)
 	err = usb_submit_urb(req, gfp_flags);
 	if (unlikely(err)) {
 		dev_dbg(&dev->dev, "RX submit error (%d)\n", err);
-		netdev_free_page(dev, page);
+		put_page(page);
 	}
 	return err;
 }
@@ -208,9 +208,9 @@ static void rx_complete(struct urb *req)
 	dev->stats.rx_errors++;
 resubmit:
 	if (page)
-		netdev_free_page(dev, page);
+		put_page(page);
 	if (req)
-		rx_submit(pnd, req, GFP_ATOMIC);
+		rx_submit(pnd, req, GFP_ATOMIC | __GFP_COLD);
 }
 
 static int usbpn_close(struct net_device *dev);
@@ -229,7 +229,7 @@ static int usbpn_open(struct net_device *dev)
 	for (i = 0; i < rxq_size; i++) {
 		struct urb *req = usb_alloc_urb(0, GFP_KERNEL);
 
-		if (!req || rx_submit(pnd, req, GFP_KERNEL)) {
+		if (!req || rx_submit(pnd, req, GFP_KERNEL | __GFP_COLD)) {
 			usbpn_close(dev);
 			return -ENOMEM;
 		}
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index 16a509ae517b..7cdcb63b21ff 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -298,11 +298,10 @@ static void pn_net_setup(struct net_device *dev)
 static int
 pn_rx_submit(struct f_phonet *fp, struct usb_request *req, gfp_t gfp_flags)
 {
-	struct net_device *dev = fp->dev;
 	struct page *page;
 	int err;
 
-	page = __netdev_alloc_page(dev, gfp_flags);
+	page = alloc_page(gfp_flags);
 	if (!page)
 		return -ENOMEM;
 
@@ -312,7 +311,7 @@ pn_rx_submit(struct f_phonet *fp, struct usb_request *req, gfp_t gfp_flags)
 
 	err = usb_ep_queue(fp->out_ep, req, gfp_flags);
 	if (unlikely(err))
-		netdev_free_page(dev, page);
+		put_page(page);
 	return err;
 }
 
@@ -374,9 +373,9 @@ static void pn_rx_complete(struct usb_ep *ep, struct usb_request *req)
 	}
 
 	if (page)
-		netdev_free_page(dev, page);
+		put_page(page);
 	if (req)
-		pn_rx_submit(fp, req, GFP_ATOMIC);
+		pn_rx_submit(fp, req, GFP_ATOMIC | __GFP_COLD);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -436,7 +435,7 @@ static int pn_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
 
 			netif_carrier_on(dev);
 			for (i = 0; i < phonet_rxq_size; i++)
-				pn_rx_submit(fp, fp->out_reqv[i], GFP_ATOMIC);
+				pn_rx_submit(fp, fp->out_reqv[i], GFP_ATOMIC | __GFP_COLD);
 		}
 		spin_unlock(&port->lock);
 		return 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 09b7ea566d66..cec0657d0d32 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1668,38 +1668,6 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 	return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
 }
 
-/**
- *	__netdev_alloc_page - allocate a page for ps-rx on a specific device
- *	@dev: network device to receive on
- *	@gfp_mask: alloc_pages_node mask
- *
- * 	Allocate a new page. dev currently unused.
- *
- * 	%NULL is returned if there is no free memory.
- */
-static inline struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
-{
-	return alloc_pages_node(NUMA_NO_NODE, gfp_mask, 0);
-}
-
-/**
- *	netdev_alloc_page - allocate a page for ps-rx on a specific device
- *	@dev: network device to receive on
- *
- * 	Allocate a new page. dev currently unused.
- *
- * 	%NULL is returned if there is no free memory.
- */
-static inline struct page *netdev_alloc_page(struct net_device *dev)
-{
-	return __netdev_alloc_page(dev, GFP_ATOMIC);
-}
-
-static inline void netdev_free_page(struct net_device *dev, struct page *page)
-{
-	__free_page(page);
-}
-
 /**
  * skb_frag_page - retrieve the page refered to by a paged fragment
  * @frag: the paged fragment
-- 
cgit v1.2.3


From 5151412dd4338b273afdb107c3772528e9e67d92 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 23 Nov 2011 10:59:13 +0100
Subject: block: initialize request_queue's numa node during

struct request_queue is allocated with __GFP_ZERO so its "node" field is
zero before initialization.  This causes an oops if node 0 is offline in
the page allocator because its zonelists are not initialized.  From Dave
Young's dmesg:

	SRAT: Node 1 PXM 2 0-d0000000
	SRAT: Node 1 PXM 2 100000000-330000000
	SRAT: Node 0 PXM 1 330000000-630000000
	Initmem setup node 1 0000000000000000-000000000affb000
	...
	Built 1 zonelists in Node order, mobility grouping on.
	...
	BUG: unable to handle kernel paging request at 0000000000001c08
	IP: [<ffffffff8111c355>] __alloc_pages_nodemask+0xb5/0x870

and __alloc_pages_nodemask+0xb5 translates to a NULL pointer on
zonelist->_zonerefs.

The fix is to initialize q->node at the time of allocation so the correct
node is passed to the slab allocator later.

Since blk_init_allocated_queue_node() is no longer needed, merge it with
blk_init_allocated_queue().

[rientjes@google.com: changelog, initializing q->node]
Cc: stable@vger.kernel.org [2.6.37+]
Reported-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Tested-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 14 +++-----------
 include/linux/blkdev.h |  3 ---
 2 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index ea70e6c80cd3..20d69f6beb6b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -467,6 +467,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
+	q->node = node_id;
 
 	err = bdi_init(&q->backing_dev_info);
 	if (err) {
@@ -551,7 +552,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 	if (!uninit_q)
 		return NULL;
 
-	q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
+	q = blk_init_allocated_queue(uninit_q, rfn, lock);
 	if (!q)
 		blk_cleanup_queue(uninit_q);
 
@@ -562,19 +563,10 @@ EXPORT_SYMBOL(blk_init_queue_node);
 struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 			 spinlock_t *lock)
-{
-	return blk_init_allocated_queue_node(q, rfn, lock, -1);
-}
-EXPORT_SYMBOL(blk_init_allocated_queue);
-
-struct request_queue *
-blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
-			      spinlock_t *lock, int node_id)
 {
 	if (!q)
 		return NULL;
 
-	q->node = node_id;
 	if (blk_init_free_list(q))
 		return NULL;
 
@@ -604,7 +596,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 
 	return NULL;
 }
-EXPORT_SYMBOL(blk_init_allocated_queue_node);
+EXPORT_SYMBOL(blk_init_allocated_queue);
 
 int blk_get_queue(struct request_queue *q)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c7a6d3b5bc7b..94acd8172b5b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -805,9 +805,6 @@ extern void blk_unprep_request(struct request *);
  */
 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
 					spinlock_t *lock, int node_id);
-extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *,
-							   request_fn_proc *,
-							   spinlock_t *, int node_id);
 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
 						      request_fn_proc *, spinlock_t *);
-- 
cgit v1.2.3


From 72c108cc4947db2fcdd3f3e8a2b60bd65e74a1cc Mon Sep 17 00:00:00 2001
From: Kyle Manna <kyle.manna@fuel7.com>
Date: Thu, 3 Nov 2011 12:08:05 -0500
Subject: regulator: TPS65910: Move regulator defs to header

Move the regulator defintions to the header so that platform board file
can use them to configure specific regulators.

Signed-off-by: Kyle Manna <kyle.manna@fuel7.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/tps65910-regulator.c | 24 ------------------------
 include/linux/mfd/tps65910.h           | 25 +++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 66d2d60b436a..44b4f22a7f9f 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -25,30 +25,6 @@
 #include <linux/gpio.h>
 #include <linux/mfd/tps65910.h>
 
-#define TPS65910_REG_VRTC		0
-#define TPS65910_REG_VIO		1
-#define TPS65910_REG_VDD1		2
-#define TPS65910_REG_VDD2		3
-#define TPS65910_REG_VDD3		4
-#define TPS65910_REG_VDIG1		5
-#define TPS65910_REG_VDIG2		6
-#define TPS65910_REG_VPLL		7
-#define TPS65910_REG_VDAC		8
-#define TPS65910_REG_VAUX1		9
-#define TPS65910_REG_VAUX2		10
-#define TPS65910_REG_VAUX33		11
-#define TPS65910_REG_VMMC		12
-
-#define TPS65911_REG_VDDCTRL		4
-#define TPS65911_REG_LDO1		5
-#define TPS65911_REG_LDO2		6
-#define TPS65911_REG_LDO3		7
-#define TPS65911_REG_LDO4		8
-#define TPS65911_REG_LDO5		9
-#define TPS65911_REG_LDO6		10
-#define TPS65911_REG_LDO7		11
-#define TPS65911_REG_LDO8		12
-
 #define TPS65910_SUPPLY_STATE_ENABLED	0x1
 
 /* supported VIO voltages in milivolts */
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 82b4c8801a4f..fc5205d0bd14 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -739,6 +739,31 @@
 #define TPS65910_GPIO_STS				BIT(1)
 #define TPS65910_GPIO_SET				BIT(0)
 
+/* Regulator Index Definitions */
+#define TPS65910_REG_VRTC				0
+#define TPS65910_REG_VIO				1
+#define TPS65910_REG_VDD1				2
+#define TPS65910_REG_VDD2				3
+#define TPS65910_REG_VDD3				4
+#define TPS65910_REG_VDIG1				5
+#define TPS65910_REG_VDIG2				6
+#define TPS65910_REG_VPLL				7
+#define TPS65910_REG_VDAC				8
+#define TPS65910_REG_VAUX1				9
+#define TPS65910_REG_VAUX2				10
+#define TPS65910_REG_VAUX33				11
+#define TPS65910_REG_VMMC				12
+
+#define TPS65911_REG_VDDCTRL				4
+#define TPS65911_REG_LDO1				5
+#define TPS65911_REG_LDO2				6
+#define TPS65911_REG_LDO3				7
+#define TPS65911_REG_LDO4				8
+#define TPS65911_REG_LDO5				9
+#define TPS65911_REG_LDO6				10
+#define TPS65911_REG_LDO7				11
+#define TPS65911_REG_LDO8				12
+
 /**
  * struct tps65910_board
  * Board platform data may be used to initialize regulators.
-- 
cgit v1.2.3


From c1fc1480249dfe059254779a4bb7ca27cf5f8038 Mon Sep 17 00:00:00 2001
From: Kyle Manna <kyle.manna@fuel7.com>
Date: Thu, 3 Nov 2011 12:08:06 -0500
Subject: regulator: TPS65910: Create an array for init data

Create an array of fixed size for the platform to pass regulator
initalization data through.

Passing an array of pointers to init data also allows more flexible
definition of init data as well as prevents reading past the end of the
array should the platform define an incorrectly sized array.

Signed-off-by: Kyle Manna <kyle.manna@fuel7.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/tps65910-regulator.c | 13 ++++++++++---
 include/linux/mfd/tps65910.h           |  5 ++++-
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 44b4f22a7f9f..a620e25c85c4 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -861,8 +861,6 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 	if (!pmic_plat_data)
 		return -EINVAL;
 
-	reg_data = pmic_plat_data->tps65910_pmic_init_data;
-
 	pmic = kzalloc(sizeof(*pmic), GFP_KERNEL);
 	if (!pmic)
 		return -ENOMEM;
@@ -913,7 +911,16 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 		goto err_free_info;
 	}
 
-	for (i = 0; i < pmic->num_regulators; i++, info++, reg_data++) {
+	for (i = 0; i < pmic->num_regulators && i < TPS65910_NUM_REGS;
+			i++, info++) {
+
+		reg_data = pmic_plat_data->tps65910_pmic_init_data[i];
+
+		/* Regulator API handles empty constraints but not NULL
+		 * constraints */
+		if (!reg_data)
+			continue;
+
 		/* Register the regulators */
 		pmic->info[i] = info;
 
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index fc5205d0bd14..03a035d2236b 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -764,6 +764,9 @@
 #define TPS65911_REG_LDO7				11
 #define TPS65911_REG_LDO8				12
 
+/* Max number of TPS65910/11 regulators */
+#define TPS65910_NUM_REGS				13
+
 /**
  * struct tps65910_board
  * Board platform data may be used to initialize regulators.
@@ -775,7 +778,7 @@ struct tps65910_board {
 	int irq_base;
 	int vmbch_threshold;
 	int vmbch2_threshold;
-	struct regulator_init_data *tps65910_pmic_init_data;
+	struct regulator_init_data *tps65910_pmic_init_data[TPS65910_NUM_REGS];
 };
 
 /**
-- 
cgit v1.2.3


From 34b087e48367c252e343c2f8de65676a78af1e4a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Nov 2011 09:28:17 -0800
Subject: freezer: kill unused set_freezable_with_signal()

There's no in-kernel user of set_freezable_with_signal() left.  Mixing
TIF_SIGPENDING with kernel threads can lead to nasty corner cases as
kernel threads never travel signal delivery path on their own.

e.g. the current implementation is buggy in the cancelation path of
__thaw_task().  It calls recalc_sigpending_and_wake() in an attempt to
clear TIF_SIGPENDING but the function never clears it regardless of
sigpending state.  This means that signallable freezable kthreads may
continue executing with !freezing() && stuck TIF_SIGPENDING, which can
be troublesome.

This patch removes set_freezable_with_signal() along with
PF_FREEZER_NOSIG and recalc_sigpending*() calls in freezer.  User
tasks get TIF_SIGPENDING, kernel tasks get woken up and the spurious
sigpending is dealt with in the usual signal delivery path.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/freezer.h | 20 +-------------------
 include/linux/sched.h   |  1 -
 kernel/freezer.c        | 27 ++++++---------------------
 kernel/kthread.c        |  2 +-
 4 files changed, 8 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index a28842e588f4..a33550fc05c5 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -49,7 +49,7 @@ static inline bool try_to_freeze(void)
 }
 
 extern bool freeze_task(struct task_struct *p);
-extern bool __set_freezable(bool with_signal);
+extern bool set_freezable(void);
 
 #ifdef CONFIG_CGROUP_FREEZER
 extern bool cgroup_freezing(struct task_struct *task);
@@ -104,23 +104,6 @@ static inline int freezer_should_skip(struct task_struct *p)
 	return !!(p->flags & PF_FREEZER_SKIP);
 }
 
-/*
- * Tell the freezer that the current task should be frozen by it
- */
-static inline bool set_freezable(void)
-{
-	return __set_freezable(false);
-}
-
-/*
- * Tell the freezer that the current task should be frozen by it and that it
- * should send a fake signal to the task to freeze it.
- */
-static inline bool set_freezable_with_signal(void)
-{
-	return __set_freezable(true);
-}
-
 /*
  * Freezer-friendly wrappers around wait_event_interruptible(),
  * wait_event_killable() and wait_event_interruptible_timeout(), originally
@@ -176,7 +159,6 @@ static inline void freezer_do_not_count(void) {}
 static inline void freezer_count(void) {}
 static inline int freezer_should_skip(struct task_struct *p) { return 0; }
 static inline void set_freezable(void) {}
-static inline void set_freezable_with_signal(void) {}
 
 #define wait_event_freezable(wq, condition)				\
 		wait_event_interruptible(wq, condition)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d12bd03b688f..2f90470ad843 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1788,7 +1788,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
-#define PF_FREEZER_NOSIG 0x80000000	/* Freezer won't send signals to it */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 2589a61de44c..9815b8d1eed5 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -39,7 +39,7 @@ bool freezing_slow_path(struct task_struct *p)
 	if (pm_nosig_freezing || cgroup_freezing(p))
 		return true;
 
-	if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG))
+	if (pm_freezing && !(p->flags & PF_KTHREAD))
 		return true;
 
 	return false;
@@ -72,10 +72,6 @@ bool __refrigerator(bool check_kthr_stop)
 		schedule();
 	}
 
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
-	spin_unlock_irq(&current->sighand->siglock);
-
 	pr_debug("%s left refrigerator\n", current->comm);
 
 	/*
@@ -120,7 +116,7 @@ bool freeze_task(struct task_struct *p)
 		return false;
 	}
 
-	if (!(p->flags & PF_FREEZER_NOSIG)) {
+	if (!(p->flags & PF_KTHREAD)) {
 		fake_signal_wake_up(p);
 		/*
 		 * fake_signal_wake_up() goes through p's scheduler
@@ -145,28 +141,19 @@ void __thaw_task(struct task_struct *p)
 	 * be visible to @p as waking up implies wmb.  Waking up inside
 	 * freezer_lock also prevents wakeups from leaking outside
 	 * refrigerator.
-	 *
-	 * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to
-	 * avoid leaving dangling TIF_SIGPENDING behind.
 	 */
 	spin_lock_irqsave(&freezer_lock, flags);
-	if (frozen(p)) {
+	if (frozen(p))
 		wake_up_process(p);
-	} else {
-		spin_lock(&p->sighand->siglock);
-		recalc_sigpending_and_wake(p);
-		spin_unlock(&p->sighand->siglock);
-	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
 
 /**
- * __set_freezable - make %current freezable
- * @with_signal: do we want %TIF_SIGPENDING for notification too?
+ * set_freezable - make %current freezable
  *
  * Mark %current freezable and enter refrigerator if necessary.
  */
-bool __set_freezable(bool with_signal)
+bool set_freezable(void)
 {
 	might_sleep();
 
@@ -177,10 +164,8 @@ bool __set_freezable(bool with_signal)
 	 */
 	spin_lock_irq(&freezer_lock);
 	current->flags &= ~PF_NOFREEZE;
-	if (with_signal)
-		current->flags &= ~PF_FREEZER_NOSIG;
 	spin_unlock_irq(&freezer_lock);
 
 	return try_to_freeze();
 }
-EXPORT_SYMBOL(__set_freezable);
+EXPORT_SYMBOL(set_freezable);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 1c36deaae2f1..3d3de633702e 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -282,7 +282,7 @@ int kthreadd(void *unused)
 	set_cpus_allowed_ptr(tsk, cpu_all_mask);
 	set_mems_allowed(node_states[N_HIGH_MEMORY]);
 
-	current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
+	current->flags |= PF_NOFREEZE;
 
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
-- 
cgit v1.2.3


From 24b7ead3fb0bae267c2ee50898eb4c13aedd1e9f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 23 Nov 2011 09:28:17 -0800
Subject: freezer: fix wait_event_freezable/__thaw_task races

wait_event_freezable() and friends stop the waiting if try_to_freeze()
fails. This is not right, we can race with __thaw_task() and in this
case

	- wait_event_freezable() returns the wrong ERESTARTSYS

	- wait_event_freezable_timeout() can return the positive
	  value while condition == F

Change the code to always check __retval/condition before return.

Note: with or without this patch the timeout logic looks strange,
probably we should recalc timeout if try_to_freeze() returns T.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 include/linux/freezer.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index a33550fc05c5..09570ac22be6 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -122,28 +122,30 @@ static inline int freezer_should_skip(struct task_struct *p)
 #define wait_event_freezable(wq, condition)				\
 ({									\
 	int __retval;							\
-	do {								\
+	for (;;) {							\
 		__retval = wait_event_interruptible(wq, 		\
 				(condition) || freezing(current));	\
-		if (__retval && !freezing(current))			\
+		if (__retval || (condition))				\
 			break;						\
-		else if (!(condition))					\
-			__retval = -ERESTARTSYS;			\
-	} while (try_to_freeze());					\
+		try_to_freeze();					\
+	}								\
 	__retval;							\
 })
 
-
 #define wait_event_freezable_timeout(wq, condition, timeout)		\
 ({									\
 	long __retval = timeout;					\
-	do {								\
+	for (;;) {							\
 		__retval = wait_event_interruptible_timeout(wq,		\
 				(condition) || freezing(current),	\
 				__retval); 				\
-	} while (try_to_freeze());					\
+		if (__retval <= 0 || (condition))			\
+			break;						\
+		try_to_freeze();					\
+	}								\
 	__retval;							\
 })
+
 #else /* !CONFIG_FREEZER */
 static inline bool frozen(struct task_struct *p) { return false; }
 static inline bool freezing(struct task_struct *p) { return false; }
-- 
cgit v1.2.3


From b84d435cc228e87951f3bbabf6cc4a5f25d5fb16 Mon Sep 17 00:00:00 2001
From: Christine Chan <cschan@codeaurora.org>
Date: Mon, 7 Nov 2011 19:48:27 -0800
Subject: debugobjects: Extend to assert that an object is initialized

Calling del_timer_sync() on an uninitialized timer leads to a
never ending loop in lock_timer_base() that spins checking for a
non-NULL timer base. Add an assertion to debugobjects to catch
usage of uninitialized objects so that we can initialize timers
in the del_timer_sync() path before it calls lock_timer_base().

[ sboyd@codeaurora.org: Clarify commit message ]

Signed-off-by: Christine Chan <cschan@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/1320724108-20788-3-git-send-email-sboyd@codeaurora.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/DocBook/debugobjects.tmpl | 50 +++++++++++++++++++++++++++++++++
 include/linux/debugobjects.h            |  6 ++++
 lib/debugobjects.c                      | 38 +++++++++++++++++++++++++
 3 files changed, 94 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
index 08ff908aa7a2..24979f691e3e 100644
--- a/Documentation/DocBook/debugobjects.tmpl
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -96,6 +96,7 @@
 	<listitem><para>debug_object_deactivate</para></listitem>
 	<listitem><para>debug_object_destroy</para></listitem>
 	<listitem><para>debug_object_free</para></listitem>
+	<listitem><para>debug_object_assert_init</para></listitem>
       </itemizedlist>
       Each of these functions takes the address of the real object and
       a pointer to the object type specific debug description
@@ -273,6 +274,26 @@
 	debug checks.
       </para>
     </sect1>
+
+    <sect1 id="debug_object_assert_init">
+      <title>debug_object_assert_init</title>
+      <para>
+	This function is called to assert that an object has been
+	initialized.
+      </para>
+      <para>
+	When the real object is not tracked by debugobjects, it calls
+	fixup_assert_init of the object type description structure
+	provided by the caller, with the hardcoded object state
+	ODEBUG_NOT_AVAILABLE. The fixup function can correct the problem
+	by calling debug_object_init and other specific initializing
+	functions.
+      </para>
+      <para>
+	When the real object is already tracked by debugobjects it is
+	ignored.
+      </para>
+    </sect1>
   </chapter>
   <chapter id="fixupfunctions">
     <title>Fixup functions</title>
@@ -381,6 +402,35 @@
 	statistics.
       </para>
     </sect1>
+    <sect1 id="fixup_assert_init">
+      <title>fixup_assert_init</title>
+      <para>
+	This function is called from the debug code whenever a problem
+	in debug_object_assert_init is detected.
+      </para>
+      <para>
+	Called from debug_object_assert_init() with a hardcoded state
+	ODEBUG_STATE_NOTAVAILABLE when the object is not found in the
+	debug bucket.
+      </para>
+      <para>
+	The function returns 1 when the fixup was successful,
+	otherwise 0. The return value is used to update the
+	statistics.
+      </para>
+      <para>
+	Note, this function should make sure debug_object_init() is
+	called before returning.
+      </para>
+      <para>
+	The handling of statically initialized objects is a special
+	case. The fixup function should check if this is a legitimate
+	case of a statically initialized object or not. In this case only
+	debug_object_init() should be called to make the object known to
+	the tracker. Then the function should return 0 because this is not
+	a real fixup.
+      </para>
+    </sect1>
   </chapter>
   <chapter id="bugs">
     <title>Known Bugs And Assumptions</title>
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 65970b811e22..0e5f5785d9f2 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -46,6 +46,8 @@ struct debug_obj {
  *			fails
  * @fixup_free:		fixup function, which is called when the free check
  *			fails
+ * @fixup_assert_init:  fixup function, which is called when the assert_init
+ *			check fails
  */
 struct debug_obj_descr {
 	const char		*name;
@@ -54,6 +56,7 @@ struct debug_obj_descr {
 	int (*fixup_activate)	(void *addr, enum debug_obj_state state);
 	int (*fixup_destroy)	(void *addr, enum debug_obj_state state);
 	int (*fixup_free)	(void *addr, enum debug_obj_state state);
+	int (*fixup_assert_init)(void *addr, enum debug_obj_state state);
 };
 
 #ifdef CONFIG_DEBUG_OBJECTS
@@ -64,6 +67,7 @@ extern void debug_object_activate  (void *addr, struct debug_obj_descr *descr);
 extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr);
 extern void debug_object_destroy   (void *addr, struct debug_obj_descr *descr);
 extern void debug_object_free      (void *addr, struct debug_obj_descr *descr);
+extern void debug_object_assert_init(void *addr, struct debug_obj_descr *descr);
 
 /*
  * Active state:
@@ -89,6 +93,8 @@ static inline void
 debug_object_destroy   (void *addr, struct debug_obj_descr *descr) { }
 static inline void
 debug_object_free      (void *addr, struct debug_obj_descr *descr) { }
+static inline void
+debug_object_assert_init(void *addr, struct debug_obj_descr *descr) { }
 
 static inline void debug_objects_early_init(void) { }
 static inline void debug_objects_mem_init(void) { }
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index b7a530504b38..77cb245f8e7b 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -570,6 +570,44 @@ out_unlock:
 	raw_spin_unlock_irqrestore(&db->lock, flags);
 }
 
+/**
+ * debug_object_assert_init - debug checks when object should be init-ed
+ * @addr:	address of the object
+ * @descr:	pointer to an object specific debug description structure
+ */
+void debug_object_assert_init(void *addr, struct debug_obj_descr *descr)
+{
+	struct debug_bucket *db;
+	struct debug_obj *obj;
+	unsigned long flags;
+
+	if (!debug_objects_enabled)
+		return;
+
+	db = get_bucket((unsigned long) addr);
+
+	raw_spin_lock_irqsave(&db->lock, flags);
+
+	obj = lookup_object(addr, db);
+	if (!obj) {
+		struct debug_obj o = { .object = addr,
+				       .state = ODEBUG_STATE_NOTAVAILABLE,
+				       .descr = descr };
+
+		raw_spin_unlock_irqrestore(&db->lock, flags);
+		/*
+		 * Maybe the object is static.  Let the type specific
+		 * code decide what to do.
+		 */
+		if (debug_object_fixup(descr->fixup_assert_init, addr,
+				       ODEBUG_STATE_NOTAVAILABLE))
+			debug_print_object(&o, "assert_init");
+		return;
+	}
+
+	raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
 /**
  * debug_object_active_state - debug checks object usage state machine
  * @addr:	address of the object
-- 
cgit v1.2.3


From 8f446e6fa1d506be2cb80f91c214f1705327c7f9 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@ti.com>
Date: Fri, 18 Nov 2011 16:47:17 +0530
Subject: regulator: helper routine to extract regulator_init_data

The helper routine is meant to be used by the regulator drivers
to extract the regulator_init_data structure from the data
that is passed from device tree.
'consumer_supplies' which is part of regulator_init_data is not extracted
as the regulator consumer mappings are passed through DT differently,
implemented in subsequent patches.
Similarly the regulator<-->parent/supply mapping is handled in
subsequent patches.

Also add documentation for regulator bindings to be used to pass
regulator_init_data struct information from device tree.

Some of the regulator properties which are linux and board specific,
are left out since its not clear if they can
be in someway embedded into the kernel or passed in from DT.
They will be revisited later.

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 .../devicetree/bindings/regulator/regulator.txt    | 54 +++++++++++++++
 drivers/regulator/Makefile                         |  1 +
 drivers/regulator/of_regulator.c                   | 81 ++++++++++++++++++++++
 include/linux/regulator/of_regulator.h             | 20 ++++++
 4 files changed, 156 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/regulator.txt
 create mode 100644 drivers/regulator/of_regulator.c
 create mode 100644 include/linux/regulator/of_regulator.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt
new file mode 100644
index 000000000000..82bef20d4c49
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -0,0 +1,54 @@
+Voltage/Current Regulators
+
+Optional properties:
+- regulator-name: A string used as a descriptive name for regulator outputs
+- regulator-min-microvolt: smallest voltage consumers may set
+- regulator-max-microvolt: largest voltage consumers may set
+- regulator-microvolt-offset: Offset applied to voltages to compensate for voltage drops
+- regulator-min-microamp: smallest current consumers may set
+- regulator-max-microamp: largest current consumers may set
+- regulator-always-on: boolean, regulator should never be disabled
+- regulator-boot-on: bootloader/firmware enabled regulator
+- <name>-supply: phandle to the parent supply/regulator node
+
+Example:
+
+	xyzreg: regulator@0 {
+		regulator-min-microvolt = <1000000>;
+		regulator-max-microvolt = <2500000>;
+		regulator-always-on;
+		vin-supply = <&vin>;
+	};
+
+Regulator Consumers:
+Consumer nodes can reference one or more of its supplies/
+regulators using the below bindings.
+
+- <name>-supply: phandle to the regulator node
+
+These are the same bindings that a regulator in the above
+example used to reference its own supply, in which case
+its just seen as a special case of a regulator being a
+consumer itself.
+
+Example of a consumer device node (mmc) referencing two
+regulators (twl-reg1 and twl-reg2),
+
+	twl-reg1: regulator@0 {
+		...
+		...
+		...
+	};
+
+	twl-reg2: regulator@1 {
+		...
+		...
+		...
+	};
+
+	mmc: mmc@0x0 {
+		...
+		...
+		vmmc-supply = <&twl-reg1>;
+		vmmcaux-supply = <&twl-reg2>;
+	};
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 93a6318f5328..c75a5229cb27 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -4,6 +4,7 @@
 
 
 obj-$(CONFIG_REGULATOR) += core.o dummy.o
+obj-$(CONFIG_OF) += of_regulator.o
 obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
 obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
 obj-$(CONFIG_REGULATOR_USERSPACE_CONSUMER) += userspace-consumer.o
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
new file mode 100644
index 000000000000..76673c784ab8
--- /dev/null
+++ b/drivers/regulator/of_regulator.c
@@ -0,0 +1,81 @@
+/*
+ * OF helpers for regulator framework
+ *
+ * Copyright (C) 2011 Texas Instruments, Inc.
+ * Rajendra Nayak <rnayak@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/regulator/machine.h>
+
+static void of_get_regulation_constraints(struct device_node *np,
+					struct regulator_init_data **init_data)
+{
+	const __be32 *min_uV, *max_uV, *uV_offset;
+	const __be32 *min_uA, *max_uA;
+	struct regulation_constraints *constraints = &(*init_data)->constraints;
+
+	constraints->name = of_get_property(np, "regulator-name", NULL);
+
+	min_uV = of_get_property(np, "regulator-min-microvolt", NULL);
+	if (min_uV)
+		constraints->min_uV = be32_to_cpu(*min_uV);
+	max_uV = of_get_property(np, "regulator-max-microvolt", NULL);
+	if (max_uV)
+		constraints->max_uV = be32_to_cpu(*max_uV);
+
+	/* Voltage change possible? */
+	if (constraints->min_uV != constraints->max_uV)
+		constraints->valid_ops_mask |= REGULATOR_CHANGE_VOLTAGE;
+
+	uV_offset = of_get_property(np, "regulator-microvolt-offset", NULL);
+	if (uV_offset)
+		constraints->uV_offset = be32_to_cpu(*uV_offset);
+	min_uA = of_get_property(np, "regulator-min-microamp", NULL);
+	if (min_uA)
+		constraints->min_uA = be32_to_cpu(*min_uA);
+	max_uA = of_get_property(np, "regulator-max-microamp", NULL);
+	if (max_uA)
+		constraints->max_uA = be32_to_cpu(*max_uA);
+
+	/* Current change possible? */
+	if (constraints->min_uA != constraints->max_uA)
+		constraints->valid_ops_mask |= REGULATOR_CHANGE_CURRENT;
+
+	if (of_find_property(np, "regulator-boot-on", NULL))
+		constraints->boot_on = true;
+
+	if (of_find_property(np, "regulator-always-on", NULL))
+		constraints->always_on = true;
+	else /* status change should be possible if not always on. */
+		constraints->valid_ops_mask |= REGULATOR_CHANGE_STATUS;
+}
+
+/**
+ * of_get_regulator_init_data - extract regulator_init_data structure info
+ * @dev: device requesting for regulator_init_data
+ *
+ * Populates regulator_init_data structure by extracting data from device
+ * tree node, returns a pointer to the populated struture or NULL if memory
+ * alloc fails.
+ */
+struct regulator_init_data *of_get_regulator_init_data(struct device *dev)
+{
+	struct regulator_init_data *init_data;
+
+	if (!dev->of_node)
+		return NULL;
+
+	init_data = devm_kzalloc(dev, sizeof(*init_data), GFP_KERNEL);
+	if (!init_data)
+		return NULL; /* Out of memory? */
+
+	of_get_regulation_constraints(dev->of_node, &init_data);
+	return init_data;
+}
diff --git a/include/linux/regulator/of_regulator.h b/include/linux/regulator/of_regulator.h
new file mode 100644
index 000000000000..d83a98d3e3fd
--- /dev/null
+++ b/include/linux/regulator/of_regulator.h
@@ -0,0 +1,20 @@
+/*
+ * OpenFirmware regulator support routines
+ *
+ */
+
+#ifndef __LINUX_OF_REG_H
+#define __LINUX_OF_REG_H
+
+#if defined(CONFIG_OF)
+extern struct regulator_init_data
+	*of_get_regulator_init_data(struct device *dev);
+#else
+static inline struct regulator_init_data
+	*of_get_regulator_init_data(struct device *dev)
+{
+	return NULL;
+}
+#endif /* CONFIG_OF */
+
+#endif /* __LINUX_OF_REG_H */
-- 
cgit v1.2.3


From 2c043bcbf287dc69848054d5c02c55c20f7a7bc5 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@ti.com>
Date: Fri, 18 Nov 2011 16:47:19 +0530
Subject: regulator: pass additional of_node to regulator_register()

With device tree support for regulators, its needed that the
regulator_dev->dev device has the right of_node attached.
To be able to do this add an additional parameter to the
regulator_register() api, wherein the dt-adapted driver can
then pass this additional info onto the regulator core.

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/88pm8607.c           | 2 +-
 drivers/regulator/aat2870-regulator.c  | 2 +-
 drivers/regulator/ab3100.c             | 2 +-
 drivers/regulator/ab8500.c             | 2 +-
 drivers/regulator/ad5398.c             | 2 +-
 drivers/regulator/bq24022.c            | 2 +-
 drivers/regulator/core.c               | 3 ++-
 drivers/regulator/da903x.c             | 2 +-
 drivers/regulator/db8500-prcmu.c       | 2 +-
 drivers/regulator/dummy.c              | 2 +-
 drivers/regulator/fixed.c              | 2 +-
 drivers/regulator/isl6271a-regulator.c | 2 +-
 drivers/regulator/lp3971.c             | 2 +-
 drivers/regulator/lp3972.c             | 2 +-
 drivers/regulator/max1586.c            | 2 +-
 drivers/regulator/max8649.c            | 2 +-
 drivers/regulator/max8660.c            | 2 +-
 drivers/regulator/max8925-regulator.c  | 2 +-
 drivers/regulator/max8952.c            | 2 +-
 drivers/regulator/max8997.c            | 2 +-
 drivers/regulator/max8998.c            | 2 +-
 drivers/regulator/mc13783-regulator.c  | 2 +-
 drivers/regulator/mc13892-regulator.c  | 2 +-
 drivers/regulator/pcap-regulator.c     | 2 +-
 drivers/regulator/pcf50633-regulator.c | 2 +-
 drivers/regulator/tps6105x-regulator.c | 3 ++-
 drivers/regulator/tps65023-regulator.c | 2 +-
 drivers/regulator/tps6507x-regulator.c | 2 +-
 drivers/regulator/tps6524x-regulator.c | 2 +-
 drivers/regulator/tps6586x-regulator.c | 2 +-
 drivers/regulator/tps65910-regulator.c | 2 +-
 drivers/regulator/tps65912-regulator.c | 2 +-
 drivers/regulator/twl-regulator.c      | 2 +-
 drivers/regulator/wm831x-dcdc.c        | 8 ++++----
 drivers/regulator/wm831x-isink.c       | 2 +-
 drivers/regulator/wm831x-ldo.c         | 6 +++---
 drivers/regulator/wm8350-regulator.c   | 2 +-
 drivers/regulator/wm8400-regulator.c   | 2 +-
 drivers/regulator/wm8994-regulator.c   | 2 +-
 include/linux/regulator/driver.h       | 2 +-
 sound/soc/codecs/sgtl5000.c            | 2 +-
 41 files changed, 48 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/88pm8607.c b/drivers/regulator/88pm8607.c
index ca0d608f8248..df33530cec4a 100644
--- a/drivers/regulator/88pm8607.c
+++ b/drivers/regulator/88pm8607.c
@@ -427,7 +427,7 @@ static int __devinit pm8607_regulator_probe(struct platform_device *pdev)
 
 	/* replace driver_data with info */
 	info->regulator = regulator_register(&info->desc, &pdev->dev,
-					     pdata, info);
+					     pdata, info, NULL);
 	if (IS_ERR(info->regulator)) {
 		dev_err(&pdev->dev, "failed to register regulator %s\n",
 			info->desc.name);
diff --git a/drivers/regulator/aat2870-regulator.c b/drivers/regulator/aat2870-regulator.c
index 5abeb3ac3e8d..07e98ec6a324 100644
--- a/drivers/regulator/aat2870-regulator.c
+++ b/drivers/regulator/aat2870-regulator.c
@@ -188,7 +188,7 @@ static int aat2870_regulator_probe(struct platform_device *pdev)
 	ri->pdev = pdev;
 
 	rdev = regulator_register(&ri->desc, &pdev->dev,
-				  pdev->dev.platform_data, ri);
+				  pdev->dev.platform_data, ri, NULL);
 	if (IS_ERR(rdev)) {
 		dev_err(&pdev->dev, "Failed to register regulator %s\n",
 			ri->desc.name);
diff --git a/drivers/regulator/ab3100.c b/drivers/regulator/ab3100.c
index 585e4946fe0a..042271aace6a 100644
--- a/drivers/regulator/ab3100.c
+++ b/drivers/regulator/ab3100.c
@@ -634,7 +634,7 @@ static int __devinit ab3100_regulators_probe(struct platform_device *pdev)
 		rdev = regulator_register(&ab3100_regulator_desc[i],
 					  &pdev->dev,
 					  &plfdata->reg_constraints[i],
-					  reg);
+					  reg, NULL);
 
 		if (IS_ERR(rdev)) {
 			err = PTR_ERR(rdev);
diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c
index 6e1ae69646b3..e91b8ddc2793 100644
--- a/drivers/regulator/ab8500.c
+++ b/drivers/regulator/ab8500.c
@@ -822,7 +822,7 @@ static __devinit int ab8500_regulator_probe(struct platform_device *pdev)
 
 		/* register regulator with framework */
 		info->regulator = regulator_register(&info->desc, &pdev->dev,
-				&pdata->regulator[i], info);
+				&pdata->regulator[i], info, NULL);
 		if (IS_ERR(info->regulator)) {
 			err = PTR_ERR(info->regulator);
 			dev_err(&pdev->dev, "failed to register regulator %s\n",
diff --git a/drivers/regulator/ad5398.c b/drivers/regulator/ad5398.c
index a4be41614eeb..483c80930852 100644
--- a/drivers/regulator/ad5398.c
+++ b/drivers/regulator/ad5398.c
@@ -233,7 +233,7 @@ static int __devinit ad5398_probe(struct i2c_client *client,
 	chip->current_mask = (chip->current_level - 1) << chip->current_offset;
 
 	chip->rdev = regulator_register(&ad5398_reg, &client->dev,
-					init_data, chip);
+					init_data, chip, NULL);
 	if (IS_ERR(chip->rdev)) {
 		ret = PTR_ERR(chip->rdev);
 		dev_err(&client->dev, "failed to register %s %s\n",
diff --git a/drivers/regulator/bq24022.c b/drivers/regulator/bq24022.c
index e24d1b7d97a8..9fab6d1bbe80 100644
--- a/drivers/regulator/bq24022.c
+++ b/drivers/regulator/bq24022.c
@@ -107,7 +107,7 @@ static int __init bq24022_probe(struct platform_device *pdev)
 	ret = gpio_direction_output(pdata->gpio_nce, 1);
 
 	bq24022 = regulator_register(&bq24022_desc, &pdev->dev,
-				     pdata->init_data, pdata);
+				     pdata->init_data, pdata, NULL);
 	if (IS_ERR(bq24022)) {
 		dev_dbg(&pdev->dev, "couldn't register regulator\n");
 		ret = PTR_ERR(bq24022);
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 669d02160221..8b01eb06ba64 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2637,7 +2637,7 @@ static void rdev_init_debugfs(struct regulator_dev *rdev)
  */
 struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 	struct device *dev, const struct regulator_init_data *init_data,
-	void *driver_data)
+	void *driver_data, struct device_node *of_node)
 {
 	static atomic_t regulator_no = ATOMIC_INIT(0);
 	struct regulator_dev *rdev;
@@ -2696,6 +2696,7 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 
 	/* register with sysfs */
 	rdev->dev.class = &regulator_class;
+	rdev->dev.of_node = of_node;
 	rdev->dev.parent = dev;
 	dev_set_name(&rdev->dev, "regulator.%d",
 		     atomic_inc_return(&regulator_no) - 1);
diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index e23ddfa8b2c6..8dbc54da7d70 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -537,7 +537,7 @@ static int __devinit da903x_regulator_probe(struct platform_device *pdev)
 		ri->desc.ops = &da9030_regulator_ldo1_15_ops;
 
 	rdev = regulator_register(&ri->desc, &pdev->dev,
-				  pdev->dev.platform_data, ri);
+				  pdev->dev.platform_data, ri, NULL);
 	if (IS_ERR(rdev)) {
 		dev_err(&pdev->dev, "failed to register regulator %s\n",
 				ri->desc.name);
diff --git a/drivers/regulator/db8500-prcmu.c b/drivers/regulator/db8500-prcmu.c
index 78329751af54..515443fcd26b 100644
--- a/drivers/regulator/db8500-prcmu.c
+++ b/drivers/regulator/db8500-prcmu.c
@@ -486,7 +486,7 @@ static int __devinit db8500_regulator_probe(struct platform_device *pdev)
 
 		/* register with the regulator framework */
 		info->rdev = regulator_register(&info->desc, &pdev->dev,
-				init_data, info);
+				init_data, info, NULL);
 		if (IS_ERR(info->rdev)) {
 			err = PTR_ERR(info->rdev);
 			dev_err(&pdev->dev, "failed to register %s: err %i\n",
diff --git a/drivers/regulator/dummy.c b/drivers/regulator/dummy.c
index b8f520513ce7..0ee00de4be72 100644
--- a/drivers/regulator/dummy.c
+++ b/drivers/regulator/dummy.c
@@ -42,7 +42,7 @@ static int __devinit dummy_regulator_probe(struct platform_device *pdev)
 	int ret;
 
 	dummy_regulator_rdev = regulator_register(&dummy_desc, NULL,
-						  &dummy_initdata, NULL);
+						  &dummy_initdata, NULL, NULL);
 	if (IS_ERR(dummy_regulator_rdev)) {
 		ret = PTR_ERR(dummy_regulator_rdev);
 		pr_err("Failed to register regulator: %d\n", ret);
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 0650856b93cb..db90919f99c2 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -234,7 +234,7 @@ static int __devinit reg_fixed_voltage_probe(struct platform_device *pdev)
 	}
 
 	drvdata->dev = regulator_register(&drvdata->desc, &pdev->dev,
-					  config->init_data, drvdata);
+					  config->init_data, drvdata, NULL);
 	if (IS_ERR(drvdata->dev)) {
 		ret = PTR_ERR(drvdata->dev);
 		dev_err(&pdev->dev, "Failed to register regulator: %d\n", ret);
diff --git a/drivers/regulator/isl6271a-regulator.c b/drivers/regulator/isl6271a-regulator.c
index e4b3592e8176..c1a456c4257c 100644
--- a/drivers/regulator/isl6271a-regulator.c
+++ b/drivers/regulator/isl6271a-regulator.c
@@ -170,7 +170,7 @@ static int __devinit isl6271a_probe(struct i2c_client *i2c,
 
 	for (i = 0; i < 3; i++) {
 		pmic->rdev[i] = regulator_register(&isl_rd[i], &i2c->dev,
-						init_data, pmic);
+						init_data, pmic, NULL);
 		if (IS_ERR(pmic->rdev[i])) {
 			dev_err(&i2c->dev, "failed to register %s\n", id->name);
 			err = PTR_ERR(pmic->rdev[i]);
diff --git a/drivers/regulator/lp3971.c b/drivers/regulator/lp3971.c
index 72b16b5f3db6..0cfabd318a59 100644
--- a/drivers/regulator/lp3971.c
+++ b/drivers/regulator/lp3971.c
@@ -451,7 +451,7 @@ static int __devinit setup_regulators(struct lp3971 *lp3971,
 	for (i = 0; i < pdata->num_regulators; i++) {
 		struct lp3971_regulator_subdev *reg = &pdata->regulators[i];
 		lp3971->rdev[i] = regulator_register(&regulators[reg->id],
-					lp3971->dev, reg->initdata, lp3971);
+				lp3971->dev, reg->initdata, lp3971, NULL);
 
 		if (IS_ERR(lp3971->rdev[i])) {
 			err = PTR_ERR(lp3971->rdev[i]);
diff --git a/drivers/regulator/lp3972.c b/drivers/regulator/lp3972.c
index fbc5e3741bef..49a15eefe5fe 100644
--- a/drivers/regulator/lp3972.c
+++ b/drivers/regulator/lp3972.c
@@ -555,7 +555,7 @@ static int __devinit setup_regulators(struct lp3972 *lp3972,
 	for (i = 0; i < pdata->num_regulators; i++) {
 		struct lp3972_regulator_subdev *reg = &pdata->regulators[i];
 		lp3972->rdev[i] = regulator_register(&regulators[reg->id],
-					lp3972->dev, reg->initdata, lp3972);
+				lp3972->dev, reg->initdata, lp3972, NULL);
 
 		if (IS_ERR(lp3972->rdev[i])) {
 			err = PTR_ERR(lp3972->rdev[i]);
diff --git a/drivers/regulator/max1586.c b/drivers/regulator/max1586.c
index 3f49512c5134..40e7a4db2853 100644
--- a/drivers/regulator/max1586.c
+++ b/drivers/regulator/max1586.c
@@ -214,7 +214,7 @@ static int __devinit max1586_pmic_probe(struct i2c_client *client,
 		}
 		rdev[i] = regulator_register(&max1586_reg[id], &client->dev,
 					     pdata->subdevs[i].platform_data,
-					     max1586);
+					     max1586, NULL);
 		if (IS_ERR(rdev[i])) {
 			ret = PTR_ERR(rdev[i]);
 			dev_err(&client->dev, "failed to register %s\n",
diff --git a/drivers/regulator/max8649.c b/drivers/regulator/max8649.c
index 1062cf9f02dc..524a5da23c75 100644
--- a/drivers/regulator/max8649.c
+++ b/drivers/regulator/max8649.c
@@ -347,7 +347,7 @@ static int __devinit max8649_regulator_probe(struct i2c_client *client,
 	}
 
 	info->regulator = regulator_register(&dcdc_desc, &client->dev,
-					     pdata->regulator, info);
+					     pdata->regulator, info, NULL);
 	if (IS_ERR(info->regulator)) {
 		dev_err(info->dev, "failed to register regulator %s\n",
 			dcdc_desc.name);
diff --git a/drivers/regulator/max8660.c b/drivers/regulator/max8660.c
index 33f5d9a492ef..a838e664569f 100644
--- a/drivers/regulator/max8660.c
+++ b/drivers/regulator/max8660.c
@@ -449,7 +449,7 @@ static int __devinit max8660_probe(struct i2c_client *client,
 
 		rdev[i] = regulator_register(&max8660_reg[id], &client->dev,
 					     pdata->subdevs[i].platform_data,
-					     max8660);
+					     max8660, NULL);
 		if (IS_ERR(rdev[i])) {
 			ret = PTR_ERR(rdev[i]);
 			dev_err(&client->dev, "failed to register %s\n",
diff --git a/drivers/regulator/max8925-regulator.c b/drivers/regulator/max8925-regulator.c
index cc9ec0e03271..f976e5d0867e 100644
--- a/drivers/regulator/max8925-regulator.c
+++ b/drivers/regulator/max8925-regulator.c
@@ -266,7 +266,7 @@ static int __devinit max8925_regulator_probe(struct platform_device *pdev)
 	ri->chip = chip;
 
 	rdev = regulator_register(&ri->desc, &pdev->dev,
-				  pdata->regulator[pdev->id], ri);
+				  pdata->regulator[pdev->id], ri, NULL);
 	if (IS_ERR(rdev)) {
 		dev_err(&pdev->dev, "failed to register regulator %s\n",
 				ri->desc.name);
diff --git a/drivers/regulator/max8952.c b/drivers/regulator/max8952.c
index 3883d85c5b88..75d89400c123 100644
--- a/drivers/regulator/max8952.c
+++ b/drivers/regulator/max8952.c
@@ -208,7 +208,7 @@ static int __devinit max8952_pmic_probe(struct i2c_client *client,
 	max8952->pdata = pdata;
 
 	max8952->rdev = regulator_register(&regulator, max8952->dev,
-			&pdata->reg_data, max8952);
+			&pdata->reg_data, max8952, NULL);
 
 	if (IS_ERR(max8952->rdev)) {
 		ret = PTR_ERR(max8952->rdev);
diff --git a/drivers/regulator/max8997.c b/drivers/regulator/max8997.c
index 6176129a27e5..d26e8646277b 100644
--- a/drivers/regulator/max8997.c
+++ b/drivers/regulator/max8997.c
@@ -1146,7 +1146,7 @@ static __devinit int max8997_pmic_probe(struct platform_device *pdev)
 			regulators[id].n_voltages = 16;
 
 		rdev[i] = regulator_register(&regulators[id], max8997->dev,
-				pdata->regulators[i].initdata, max8997);
+				pdata->regulators[i].initdata, max8997, NULL);
 		if (IS_ERR(rdev[i])) {
 			ret = PTR_ERR(rdev[i]);
 			dev_err(max8997->dev, "regulator init failed for %d\n",
diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c
index 41a1495eec2b..2d38c2493a07 100644
--- a/drivers/regulator/max8998.c
+++ b/drivers/regulator/max8998.c
@@ -847,7 +847,7 @@ static __devinit int max8998_pmic_probe(struct platform_device *pdev)
 			regulators[index].n_voltages = count;
 		}
 		rdev[i] = regulator_register(&regulators[index], max8998->dev,
-				pdata->regulators[i].initdata, max8998);
+				pdata->regulators[i].initdata, max8998, NULL);
 		if (IS_ERR(rdev[i])) {
 			ret = PTR_ERR(rdev[i]);
 			dev_err(max8998->dev, "regulator init failed\n");
diff --git a/drivers/regulator/mc13783-regulator.c b/drivers/regulator/mc13783-regulator.c
index 8479082e1aea..56d4a677c404 100644
--- a/drivers/regulator/mc13783-regulator.c
+++ b/drivers/regulator/mc13783-regulator.c
@@ -357,7 +357,7 @@ static int __devinit mc13783_regulator_probe(struct platform_device *pdev)
 		init_data = &pdata->regulators[i];
 		priv->regulators[i] = regulator_register(
 				&mc13783_regulators[init_data->id].desc,
-				&pdev->dev, init_data->init_data, priv);
+				&pdev->dev, init_data->init_data, priv, NULL);
 
 		if (IS_ERR(priv->regulators[i])) {
 			dev_err(&pdev->dev, "failed to register regulator %s\n",
diff --git a/drivers/regulator/mc13892-regulator.c b/drivers/regulator/mc13892-regulator.c
index 023d17d022cf..2824804a2892 100644
--- a/drivers/regulator/mc13892-regulator.c
+++ b/drivers/regulator/mc13892-regulator.c
@@ -573,7 +573,7 @@ static int __devinit mc13892_regulator_probe(struct platform_device *pdev)
 		init_data = &pdata->regulators[i];
 		priv->regulators[i] = regulator_register(
 			&mc13892_regulators[init_data->id].desc,
-			&pdev->dev, init_data->init_data, priv);
+			&pdev->dev, init_data->init_data, priv, NULL);
 
 		if (IS_ERR(priv->regulators[i])) {
 			dev_err(&pdev->dev, "failed to register regulator %s\n",
diff --git a/drivers/regulator/pcap-regulator.c b/drivers/regulator/pcap-regulator.c
index 31f6e11a7f16..a5aab1b08bcf 100644
--- a/drivers/regulator/pcap-regulator.c
+++ b/drivers/regulator/pcap-regulator.c
@@ -277,7 +277,7 @@ static int __devinit pcap_regulator_probe(struct platform_device *pdev)
 	void *pcap = dev_get_drvdata(pdev->dev.parent);
 
 	rdev = regulator_register(&pcap_regulators[pdev->id], &pdev->dev,
-				pdev->dev.platform_data, pcap);
+				pdev->dev.platform_data, pcap, NULL);
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
 
diff --git a/drivers/regulator/pcf50633-regulator.c b/drivers/regulator/pcf50633-regulator.c
index 69a11d9dd87f..1d1c31056297 100644
--- a/drivers/regulator/pcf50633-regulator.c
+++ b/drivers/regulator/pcf50633-regulator.c
@@ -320,7 +320,7 @@ static int __devinit pcf50633_regulator_probe(struct platform_device *pdev)
 	pcf = dev_to_pcf50633(pdev->dev.parent);
 
 	rdev = regulator_register(&regulators[pdev->id], &pdev->dev,
-				  pdev->dev.platform_data, pcf);
+				  pdev->dev.platform_data, pcf, NULL);
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
 
diff --git a/drivers/regulator/tps6105x-regulator.c b/drivers/regulator/tps6105x-regulator.c
index 1011873896dc..d9278da18a9e 100644
--- a/drivers/regulator/tps6105x-regulator.c
+++ b/drivers/regulator/tps6105x-regulator.c
@@ -151,7 +151,8 @@ static int __devinit tps6105x_regulator_probe(struct platform_device *pdev)
 	/* Register regulator with framework */
 	tps6105x->regulator = regulator_register(&tps6105x_regulator_desc,
 					     &tps6105x->client->dev,
-					     pdata->regulator_data, tps6105x);
+					     pdata->regulator_data, tps6105x,
+					     NULL);
 	if (IS_ERR(tps6105x->regulator)) {
 		ret = PTR_ERR(tps6105x->regulator);
 		dev_err(&tps6105x->client->dev,
diff --git a/drivers/regulator/tps65023-regulator.c b/drivers/regulator/tps65023-regulator.c
index 9fb4c7b81753..7fd3b9092d1b 100644
--- a/drivers/regulator/tps65023-regulator.c
+++ b/drivers/regulator/tps65023-regulator.c
@@ -496,7 +496,7 @@ static int __devinit tps_65023_probe(struct i2c_client *client,
 
 		/* Register the regulators */
 		rdev = regulator_register(&tps->desc[i], &client->dev,
-					  init_data, tps);
+					  init_data, tps, NULL);
 		if (IS_ERR(rdev)) {
 			dev_err(&client->dev, "failed to register %s\n",
 				id->name);
diff --git a/drivers/regulator/tps6507x-regulator.c b/drivers/regulator/tps6507x-regulator.c
index bdef70365f52..0b63ef71a5fe 100644
--- a/drivers/regulator/tps6507x-regulator.c
+++ b/drivers/regulator/tps6507x-regulator.c
@@ -599,7 +599,7 @@ int tps6507x_pmic_probe(struct platform_device *pdev)
 		tps->desc[i].owner = THIS_MODULE;
 
 		rdev = regulator_register(&tps->desc[i],
-					tps6507x_dev->dev, init_data, tps);
+					tps6507x_dev->dev, init_data, tps, NULL);
 		if (IS_ERR(rdev)) {
 			dev_err(tps6507x_dev->dev,
 				"failed to register %s regulator\n",
diff --git a/drivers/regulator/tps6524x-regulator.c b/drivers/regulator/tps6524x-regulator.c
index 9166aa0a9df7..70b7b1f4f000 100644
--- a/drivers/regulator/tps6524x-regulator.c
+++ b/drivers/regulator/tps6524x-regulator.c
@@ -651,7 +651,7 @@ static int __devinit pmic_probe(struct spi_device *spi)
 			hw->desc[i].n_voltages = 1;
 
 		hw->rdev[i] = regulator_register(&hw->desc[i], dev,
-						 init_data, hw);
+						 init_data, hw, NULL);
 		if (IS_ERR(hw->rdev[i])) {
 			ret = PTR_ERR(hw->rdev[i]);
 			hw->rdev[i] = NULL;
diff --git a/drivers/regulator/tps6586x-regulator.c b/drivers/regulator/tps6586x-regulator.c
index 14b9389dd52a..c75fb20faa57 100644
--- a/drivers/regulator/tps6586x-regulator.c
+++ b/drivers/regulator/tps6586x-regulator.c
@@ -396,7 +396,7 @@ static int __devinit tps6586x_regulator_probe(struct platform_device *pdev)
 		return err;
 
 	rdev = regulator_register(&ri->desc, &pdev->dev,
-				  pdev->dev.platform_data, ri);
+				  pdev->dev.platform_data, ri, NULL);
 	if (IS_ERR(rdev)) {
 		dev_err(&pdev->dev, "failed to register regulator %s\n",
 				ri->desc.name);
diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 66d2d60b436a..eaea9b4a09d0 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -963,7 +963,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 		pmic->desc[i].owner = THIS_MODULE;
 
 		rdev = regulator_register(&pmic->desc[i],
-				tps65910->dev, reg_data, pmic);
+				tps65910->dev, reg_data, pmic, NULL);
 		if (IS_ERR(rdev)) {
 			dev_err(tps65910->dev,
 				"failed to register %s regulator\n",
diff --git a/drivers/regulator/tps65912-regulator.c b/drivers/regulator/tps65912-regulator.c
index 39d4a1749e71..da00d88f94b7 100644
--- a/drivers/regulator/tps65912-regulator.c
+++ b/drivers/regulator/tps65912-regulator.c
@@ -727,7 +727,7 @@ static __devinit int tps65912_probe(struct platform_device *pdev)
 		pmic->desc[i].owner = THIS_MODULE;
 		range = tps65912_get_range(pmic, i);
 		rdev = regulator_register(&pmic->desc[i],
-					tps65912->dev, reg_data, pmic);
+					tps65912->dev, reg_data, pmic, NULL);
 		if (IS_ERR(rdev)) {
 			dev_err(tps65912->dev,
 				"failed to register %s regulator\n",
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index ee8747f4fa08..9a2e07a094b3 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -1070,7 +1070,7 @@ static int __devinit twlreg_probe(struct platform_device *pdev)
 		break;
 	}
 
-	rdev = regulator_register(&info->desc, &pdev->dev, initdata, info);
+	rdev = regulator_register(&info->desc, &pdev->dev, initdata, info, NULL);
 	if (IS_ERR(rdev)) {
 		dev_err(&pdev->dev, "can't register %s, %ld\n",
 				info->desc.name, PTR_ERR(rdev));
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index bd3531d8b2ac..7558a9666a50 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -553,7 +553,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 		wm831x_buckv_dvs_init(dcdc, pdata->dcdc[id]->driver_data);
 
 	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
-					     pdata->dcdc[id], dcdc);
+					     pdata->dcdc[id], dcdc, NULL);
 	if (IS_ERR(dcdc->regulator)) {
 		ret = PTR_ERR(dcdc->regulator);
 		dev_err(wm831x->dev, "Failed to register DCDC%d: %d\n",
@@ -747,7 +747,7 @@ static __devinit int wm831x_buckp_probe(struct platform_device *pdev)
 	dcdc->desc.owner = THIS_MODULE;
 
 	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
-					     pdata->dcdc[id], dcdc);
+					     pdata->dcdc[id], dcdc, NULL);
 	if (IS_ERR(dcdc->regulator)) {
 		ret = PTR_ERR(dcdc->regulator);
 		dev_err(wm831x->dev, "Failed to register DCDC%d: %d\n",
@@ -874,7 +874,7 @@ static __devinit int wm831x_boostp_probe(struct platform_device *pdev)
 	dcdc->desc.owner = THIS_MODULE;
 
 	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
-					     pdata->dcdc[id], dcdc);
+					     pdata->dcdc[id], dcdc, NULL);
 	if (IS_ERR(dcdc->regulator)) {
 		ret = PTR_ERR(dcdc->regulator);
 		dev_err(wm831x->dev, "Failed to register DCDC%d: %d\n",
@@ -973,7 +973,7 @@ static __devinit int wm831x_epe_probe(struct platform_device *pdev)
 	dcdc->desc.owner = THIS_MODULE;
 
 	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
-					     pdata->epe[id], dcdc);
+					     pdata->epe[id], dcdc, NULL);
 	if (IS_ERR(dcdc->regulator)) {
 		ret = PTR_ERR(dcdc->regulator);
 		dev_err(wm831x->dev, "Failed to register EPE%d: %d\n",
diff --git a/drivers/regulator/wm831x-isink.c b/drivers/regulator/wm831x-isink.c
index 01f27c7f4236..d3ad3f5cff46 100644
--- a/drivers/regulator/wm831x-isink.c
+++ b/drivers/regulator/wm831x-isink.c
@@ -189,7 +189,7 @@ static __devinit int wm831x_isink_probe(struct platform_device *pdev)
 	isink->desc.owner = THIS_MODULE;
 
 	isink->regulator = regulator_register(&isink->desc, &pdev->dev,
-					     pdata->isink[id], isink);
+					     pdata->isink[id], isink, NULL);
 	if (IS_ERR(isink->regulator)) {
 		ret = PTR_ERR(isink->regulator);
 		dev_err(wm831x->dev, "Failed to register ISINK%d: %d\n",
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
index 6709710a059e..5e96a2386b1d 100644
--- a/drivers/regulator/wm831x-ldo.c
+++ b/drivers/regulator/wm831x-ldo.c
@@ -351,7 +351,7 @@ static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev)
 	ldo->desc.owner = THIS_MODULE;
 
 	ldo->regulator = regulator_register(&ldo->desc, &pdev->dev,
-					     pdata->ldo[id], ldo);
+					     pdata->ldo[id], ldo, NULL);
 	if (IS_ERR(ldo->regulator)) {
 		ret = PTR_ERR(ldo->regulator);
 		dev_err(wm831x->dev, "Failed to register LDO%d: %d\n",
@@ -621,7 +621,7 @@ static __devinit int wm831x_aldo_probe(struct platform_device *pdev)
 	ldo->desc.owner = THIS_MODULE;
 
 	ldo->regulator = regulator_register(&ldo->desc, &pdev->dev,
-					     pdata->ldo[id], ldo);
+					     pdata->ldo[id], ldo, NULL);
 	if (IS_ERR(ldo->regulator)) {
 		ret = PTR_ERR(ldo->regulator);
 		dev_err(wm831x->dev, "Failed to register LDO%d: %d\n",
@@ -818,7 +818,7 @@ static __devinit int wm831x_alive_ldo_probe(struct platform_device *pdev)
 	ldo->desc.owner = THIS_MODULE;
 
 	ldo->regulator = regulator_register(&ldo->desc, &pdev->dev,
-					     pdata->ldo[id], ldo);
+					     pdata->ldo[id], ldo, NULL);
 	if (IS_ERR(ldo->regulator)) {
 		ret = PTR_ERR(ldo->regulator);
 		dev_err(wm831x->dev, "Failed to register LDO%d: %d\n",
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 1bcb22c44095..6894009d815a 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1428,7 +1428,7 @@ static int wm8350_regulator_probe(struct platform_device *pdev)
 	/* register regulator */
 	rdev = regulator_register(&wm8350_reg[pdev->id], &pdev->dev,
 				  pdev->dev.platform_data,
-				  dev_get_drvdata(&pdev->dev));
+				  dev_get_drvdata(&pdev->dev), NULL);
 	if (IS_ERR(rdev)) {
 		dev_err(&pdev->dev, "failed to register %s\n",
 			wm8350_reg[pdev->id].name);
diff --git a/drivers/regulator/wm8400-regulator.c b/drivers/regulator/wm8400-regulator.c
index 71632ddc3781..706f39563a7b 100644
--- a/drivers/regulator/wm8400-regulator.c
+++ b/drivers/regulator/wm8400-regulator.c
@@ -326,7 +326,7 @@ static int __devinit wm8400_regulator_probe(struct platform_device *pdev)
 	struct regulator_dev *rdev;
 
 	rdev = regulator_register(&regulators[pdev->id], &pdev->dev,
-				  pdev->dev.platform_data, wm8400);
+				  pdev->dev.platform_data, wm8400, NULL);
 
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c
index b87bf5c841f8..435e335d6e67 100644
--- a/drivers/regulator/wm8994-regulator.c
+++ b/drivers/regulator/wm8994-regulator.c
@@ -269,7 +269,7 @@ static __devinit int wm8994_ldo_probe(struct platform_device *pdev)
 		ldo->is_enabled = true;
 
 	ldo->regulator = regulator_register(&wm8994_ldo_desc[id], &pdev->dev,
-					     pdata->ldo[id].init_data, ldo);
+					     pdata->ldo[id].init_data, ldo, NULL);
 	if (IS_ERR(ldo->regulator)) {
 		ret = PTR_ERR(ldo->regulator);
 		dev_err(wm8994->dev, "Failed to register LDO%d: %d\n",
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 52c89ae32f64..8fbb6964bb7e 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -212,7 +212,7 @@ struct regulator_dev {
 
 struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 	struct device *dev, const struct regulator_init_data *init_data,
-	void *driver_data);
+	void *driver_data, struct device_node *of_node);
 void regulator_unregister(struct regulator_dev *rdev);
 
 int regulator_notifier_call_chain(struct regulator_dev *rdev,
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index d15695d1c273..fc7ab30572d0 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -833,7 +833,7 @@ static int ldo_regulator_register(struct snd_soc_codec *codec,
 	ldo->voltage = voltage;
 
 	ldo->dev = regulator_register(&ldo->desc, codec->dev,
-					  init_data, ldo);
+					  init_data, ldo, NULL);
 	if (IS_ERR(ldo->dev)) {
 		int ret = PTR_ERR(ldo->dev);
 
-- 
cgit v1.2.3


From 69511a452e6dc6b74fe4f3671a51b1b44b9c57e3 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@ti.com>
Date: Fri, 18 Nov 2011 16:47:20 +0530
Subject: regulator: map consumer regulator based on device tree

Device nodes in DT can associate themselves with one or more
regulators/supply by providing a list of phandles (to regulator nodes)
and corresponding supply names.

For Example:
	devicenode: node@0x0 {
		...
		...
		vmmc-supply = <&regulator1>;
		vpll-supply = <&regulator2>;
	};

The driver would then do a regulator_get(dev, "vmmc"); to get
regulator1 and do a regulator_get(dev, "vpll"); to get
regulator2.

of_get_regulator() extracts the regulator node for a given
device, based on the supply name.

Use it to look up the regulator for a given consumer from device tree, during
a regulator_get(). If not found fallback and lookup through
the regulator_map_list instead.

Also, since the regulator dt nodes can use the same binding to
associate with a parent regulator/supply, allow the drivers to
specify a supply_name, which can then be used to lookup dt
to find the parent phandle.

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c         | 79 ++++++++++++++++++++++++++++++++++------
 include/linux/regulator/driver.h |  2 +
 2 files changed, 69 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 8b01eb06ba64..9867ebc00bed 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -25,6 +25,8 @@
 #include <linux/mutex.h>
 #include <linux/suspend.h>
 #include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/regulator/of_regulator.h>
 #include <linux/regulator/consumer.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
@@ -132,6 +134,33 @@ static struct regulator *get_device_regulator(struct device *dev)
 	return NULL;
 }
 
+/**
+ * of_get_regulator - get a regulator device node based on supply name
+ * @dev: Device pointer for the consumer (of regulator) device
+ * @supply: regulator supply name
+ *
+ * Extract the regulator device node corresponding to the supply name.
+ * retruns the device node corresponding to the regulator if found, else
+ * returns NULL.
+ */
+static struct device_node *of_get_regulator(struct device *dev, const char *supply)
+{
+	struct device_node *regnode = NULL;
+	char prop_name[32]; /* 32 is max size of property name */
+
+	dev_dbg(dev, "Looking up %s-supply from device tree\n", supply);
+
+	snprintf(prop_name, 32, "%s-supply", supply);
+	regnode = of_parse_phandle(dev->of_node, prop_name, 0);
+
+	if (!regnode) {
+		dev_warn(dev, "%s property in node %s references invalid phandle",
+				prop_name, dev->of_node->full_name);
+		return NULL;
+	}
+	return regnode;
+}
+
 /* Platform voltage constraint check */
 static int regulator_check_voltage(struct regulator_dev *rdev,
 				   int *min_uV, int *max_uV)
@@ -1148,6 +1177,30 @@ static int _regulator_get_enable_time(struct regulator_dev *rdev)
 	return rdev->desc->ops->enable_time(rdev);
 }
 
+static struct regulator_dev *regulator_dev_lookup(struct device *dev,
+							 const char *supply)
+{
+	struct regulator_dev *r;
+	struct device_node *node;
+
+	/* first do a dt based lookup */
+	if (dev && dev->of_node) {
+		node = of_get_regulator(dev, supply);
+		if (node)
+			list_for_each_entry(r, &regulator_list, list)
+				if (r->dev.parent &&
+					node == r->dev.of_node)
+					return r;
+	}
+
+	/* if not found, try doing it non-dt way */
+	list_for_each_entry(r, &regulator_list, list)
+		if (strcmp(rdev_get_name(r), supply) == 0)
+			return r;
+
+	return NULL;
+}
+
 /* Internal regulator request function */
 static struct regulator *_regulator_get(struct device *dev, const char *id,
 					int exclusive)
@@ -1168,6 +1221,10 @@ static struct regulator *_regulator_get(struct device *dev, const char *id,
 
 	mutex_lock(&regulator_list_mutex);
 
+	rdev = regulator_dev_lookup(dev, id);
+	if (rdev)
+		goto found;
+
 	list_for_each_entry(map, &regulator_map_list, list) {
 		/* If the mapping has a device set up it must match */
 		if (map->dev_name &&
@@ -2642,6 +2699,7 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 	static atomic_t regulator_no = ATOMIC_INIT(0);
 	struct regulator_dev *rdev;
 	int ret, i;
+	const char *supply = NULL;
 
 	if (regulator_desc == NULL)
 		return ERR_PTR(-EINVAL);
@@ -2718,21 +2776,18 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 	if (ret < 0)
 		goto scrub;
 
-	if (init_data->supply_regulator) {
+	if (init_data->supply_regulator)
+		supply = init_data->supply_regulator;
+	else if (regulator_desc->supply_name)
+		supply = regulator_desc->supply_name;
+
+	if (supply) {
 		struct regulator_dev *r;
-		int found = 0;
 
-		list_for_each_entry(r, &regulator_list, list) {
-			if (strcmp(rdev_get_name(r),
-				   init_data->supply_regulator) == 0) {
-				found = 1;
-				break;
-			}
-		}
+		r = regulator_dev_lookup(dev, supply);
 
-		if (!found) {
-			dev_err(dev, "Failed to find supply %s\n",
-				init_data->supply_regulator);
+		if (!r) {
+			dev_err(dev, "Failed to find supply %s\n", supply);
 			ret = -ENODEV;
 			goto scrub;
 		}
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 8fbb6964bb7e..4214b9a9d1c9 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -154,6 +154,7 @@ enum regulator_type {
  * this type.
  *
  * @name: Identifying name for the regulator.
+ * @supply_name: Identifying the regulator supply
  * @id: Numerical identifier for the regulator.
  * @n_voltages: Number of selectors available for ops.list_voltage().
  * @ops: Regulator operations table.
@@ -163,6 +164,7 @@ enum regulator_type {
  */
 struct regulator_desc {
 	const char *name;
+	const char *supply_name;
 	int id;
 	unsigned n_voltages;
 	struct regulator_ops *ops;
-- 
cgit v1.2.3


From 6a76b7a9cc93dec6ae58d70f1257d234291908e0 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Mon, 21 Nov 2011 23:32:35 +0100
Subject: PM / Memory-hotplug: Avoid task freezing failures

The lock_system_sleep() function is used in the memory hotplug code at
several places in order to implement mutual exclusion with hibernation.
However, this function tries to acquire the 'pm_mutex' lock using
mutex_lock() and hence blocks in TASK_UNINTERRUPTIBLE state if it doesn't
get the lock. This would lead to task freezing failures and hence
hibernation failure as a consequence, even though the hibernation call path
successfully acquired the lock.

But it is to be noted that, since this task tries to acquire pm_mutex, if it
blocks due to this, we are *100% sure* that this task is not going to run
as long as hibernation sequence is in progress, since hibernation releases
'pm_mutex' only at the very end, when everything is done.
And this means, this task is going to be anyway blocked for much more longer
than what the freezer intends to achieve; which means, freezing and thawing
doesn't really make any difference to this task!

So, to fix freezing failures, we just ask the freezer to skip freezing this
task, since it is already "frozen enough".

But instead of calling freezer_do_not_count() and freezer_count() as it is,
we use only the relevant parts of those functions, because restrictions
such as 'the task should be a userspace one' etc., might not be relevant in
this scenario.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/suspend.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 57a692432f8a..1f7fff47cfac 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -380,12 +380,16 @@ static inline void unlock_system_sleep(void) {}
 
 static inline void lock_system_sleep(void)
 {
+	/* simplified freezer_do_not_count() */
+	current->flags |= PF_FREEZER_SKIP;
 	mutex_lock(&pm_mutex);
 }
 
 static inline void unlock_system_sleep(void)
 {
 	mutex_unlock(&pm_mutex);
+	/* simplified freezer_count() */
+	current->flags &= ~PF_FREEZER_SKIP;
 }
 #endif
 
-- 
cgit v1.2.3


From 56242a1fc595d158eddefbb4d6d76e82c2535f55 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 21 Nov 2011 21:33:18 -0800
Subject: sh: clkfwk: setup clock parent from current register value

Some clocks can select its parent clock by CPG register.
But it might have been modified by boot-loader or something.
This patch removed fixed initial parent clock,
and setup it from their current register settings.
It works on div6 reparent clocks for now.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/clock-sh7372.c  |  6 +++---
 arch/sh/kernel/cpu/sh4a/clock-sh7724.c |  4 ++--
 drivers/sh/clk/cpg.c                   | 35 ++++++++++++++++++++++++++++++++++
 include/linux/sh_clk.h                 |  9 ++++++---
 4 files changed, 46 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 995a9c3aec8f..e349c22a0d71 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -411,11 +411,11 @@ static struct clk *fsibckcr_parent[] = {
 };
 
 static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = {
-	[DIV6_HDMI] = SH_CLK_DIV6_EXT(&pllc1_div2_clk, HDMICKCR, 0,
+	[DIV6_HDMI] = SH_CLK_DIV6_EXT(HDMICKCR, 0,
 				      hdmi_parent, ARRAY_SIZE(hdmi_parent), 6, 2),
-	[DIV6_FSIA] = SH_CLK_DIV6_EXT(&pllc1_div2_clk, FSIACKCR, 0,
+	[DIV6_FSIA] = SH_CLK_DIV6_EXT(FSIACKCR, 0,
 				      fsiackcr_parent, ARRAY_SIZE(fsiackcr_parent), 6, 2),
-	[DIV6_FSIB] = SH_CLK_DIV6_EXT(&pllc1_div2_clk, FSIBCKCR, 0,
+	[DIV6_FSIB] = SH_CLK_DIV6_EXT(FSIBCKCR, 0,
 				      fsibckcr_parent, ARRAY_SIZE(fsibckcr_parent), 6, 2),
 };
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
index 8668f557e0ac..77118387f1cf 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -189,9 +189,9 @@ static struct clk *fclkbcr_parent[] = {
 };
 
 static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = {
-	[DIV6_FA] = SH_CLK_DIV6_EXT(&div3_clk, FCLKACR, 0,
+	[DIV6_FA] = SH_CLK_DIV6_EXT(FCLKACR, 0,
 				      fclkacr_parent, ARRAY_SIZE(fclkacr_parent), 6, 2),
-	[DIV6_FB] = SH_CLK_DIV6_EXT(&div3_clk, FCLKBCR, 0,
+	[DIV6_FB] = SH_CLK_DIV6_EXT(FCLKBCR, 0,
 				      fclkbcr_parent, ARRAY_SIZE(fclkbcr_parent), 6, 2),
 };
 
diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c
index 82dd6fb17838..5e4301b936e7 100644
--- a/drivers/sh/clk/cpg.c
+++ b/drivers/sh/clk/cpg.c
@@ -167,6 +167,38 @@ static struct clk_ops sh_clk_div6_reparent_clk_ops = {
 	.set_parent	= sh_clk_div6_set_parent,
 };
 
+static int __init sh_clk_init_parent(struct clk *clk)
+{
+	u32 val;
+
+	if (clk->parent)
+		return 0;
+
+	if (!clk->parent_table || !clk->parent_num)
+		return 0;
+
+	if (!clk->src_width) {
+		pr_err("sh_clk_init_parent: cannot select parent clock\n");
+		return -EINVAL;
+	}
+
+	val  = (__raw_readl(clk->enable_reg) >> clk->src_shift);
+	val &= (1 << clk->src_width) - 1;
+
+	if (val >= clk->parent_num) {
+		pr_err("sh_clk_init_parent: parent table size failed\n");
+		return -EINVAL;
+	}
+
+	clk->parent = clk->parent_table[val];
+	if (!clk->parent) {
+		pr_err("sh_clk_init_parent: unable to set parent");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int __init sh_clk_div6_register_ops(struct clk *clks, int nr,
 					   struct clk_ops *ops)
 {
@@ -190,6 +222,9 @@ static int __init sh_clk_div6_register_ops(struct clk *clks, int nr,
 		clkp->ops = ops;
 		clkp->freq_table = freq_table + (k * freq_table_size);
 		clkp->freq_table[nr_divs].frequency = CPUFREQ_TABLE_END;
+		ret = sh_clk_init_parent(clkp);
+		if (ret < 0)
+			break;
 
 		ret = clk_register(clkp);
 	}
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index a20831cf336a..e834304c0b6a 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -131,10 +131,9 @@ int sh_clk_div4_enable_register(struct clk *clks, int nr,
 int sh_clk_div4_reparent_register(struct clk *clks, int nr,
 			 struct clk_div4_table *table);
 
-#define SH_CLK_DIV6_EXT(_parent, _reg, _flags, _parents,	\
+#define SH_CLK_DIV6_EXT(_reg, _flags, _parents,			\
 			_num_parents, _src_shift, _src_width)	\
 {								\
-	.parent = _parent,					\
 	.enable_reg = (void __iomem *)_reg,			\
 	.flags = _flags,					\
 	.parent_table = _parents,				\
@@ -144,7 +143,11 @@ int sh_clk_div4_reparent_register(struct clk *clks, int nr,
 }
 
 #define SH_CLK_DIV6(_parent, _reg, _flags)			\
-	SH_CLK_DIV6_EXT(_parent, _reg, _flags, NULL, 0, 0, 0)
+{								\
+	.parent		= _parent,				\
+	.enable_reg	= (void __iomem *)_reg,			\
+	.flags		= _flags,				\
+}
 
 int sh_clk_div6_register(struct clk *clks, int nr);
 int sh_clk_div6_reparent_register(struct clk *clks, int nr);
-- 
cgit v1.2.3


From 62c9ea6b120688d800b4d892eaf737c20a73e86b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 25 Nov 2011 00:44:55 +0100
Subject: Freezer: fix more fallout from the thaw_process rename

Commit 944e192db53c "freezer: rename thaw_process() to __thaw_task()
and simplify the implementation" did not create a !CONFIG_FREEZER version
of __thaw_task().

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/freezer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 09570ac22be6..c1ee2833655e 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -149,6 +149,7 @@ static inline int freezer_should_skip(struct task_struct *p)
 #else /* !CONFIG_FREEZER */
 static inline bool frozen(struct task_struct *p) { return false; }
 static inline bool freezing(struct task_struct *p) { return false; }
+static inline void __thaw_task(struct task_struct *t) {}
 
 static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
-- 
cgit v1.2.3


From 92de378b739115c8afaae5cd3f25159406bb9914 Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Fri, 25 Nov 2011 23:19:37 +0400
Subject: max8925_power: No temperature interrupts if temperature not connected

Brownstone does not have temperature reading circuit hooked up.
This leads to spurious interrupts.

Allow the platform layer to indicate no temperature circuit
and do not activate interrupts if no temperature control is set

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/max8925_power.c | 12 ++++++++----
 include/linux/mfd/max8925.h   |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/max8925_power.c b/drivers/power/max8925_power.c
index b16bd71f24fb..83b827cf004b 100644
--- a/drivers/power/max8925_power.c
+++ b/drivers/power/max8925_power.c
@@ -78,6 +78,7 @@ struct max8925_power_info {
 	unsigned		batt_detect:1;	/* detecing MB by ID pin */
 	unsigned		topoff_threshold:2;
 	unsigned		fast_charge:3;
+	unsigned		no_temp_support:1;
 
 	int (*set_charger) (int);
 };
@@ -116,7 +117,7 @@ static irqreturn_t max8925_charger_handler(int irq, void *data)
 	case MAX8925_IRQ_VCHG_DC_F:
 		info->ac_online = 0;
 		__set_charger(info, 0);
-		dev_dbg(chip->dev, "Adapter is removal\n");
+		dev_dbg(chip->dev, "Adapter removed\n");
 		break;
 	case MAX8925_IRQ_VCHG_USB_R:
 		info->usb_online = 1;
@@ -126,7 +127,7 @@ static irqreturn_t max8925_charger_handler(int irq, void *data)
 	case MAX8925_IRQ_VCHG_USB_F:
 		info->usb_online = 0;
 		__set_charger(info, 0);
-		dev_dbg(chip->dev, "USB is removal\n");
+		dev_dbg(chip->dev, "USB removed\n");
 		break;
 	case MAX8925_IRQ_VCHG_THM_OK_F:
 		/* Battery is not ready yet */
@@ -369,8 +370,10 @@ static __devinit int max8925_init_charger(struct max8925_chip *chip,
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_OVP, "usb-ovp");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_F, "usb-remove");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_R, "usb-insert");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_R, "batt-temp-in-range");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_F, "batt-temp-out-range");
+	if (!info->no_temp_support) {
+		REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_R, "batt-temp-in-range");
+		REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_F, "batt-temp-out-range");
+	}
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_SYSLOW_F, "vsys-high");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_SYSLOW_R, "vsys-low");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_RST, "charger-reset");
@@ -477,6 +480,7 @@ static __devinit int max8925_power_probe(struct platform_device *pdev)
 	info->topoff_threshold = pdata->topoff_threshold;
 	info->fast_charge = pdata->fast_charge;
 	info->set_charger = pdata->set_charger;
+	info->no_temp_support = pdata->no_temp_support;
 
 	max8925_init_charger(chip, info);
 	return 0;
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 5259dfe8c585..69ec8f0bd490 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -223,6 +223,7 @@ struct max8925_power_pdata {
 	unsigned	batt_detect:1;
 	unsigned	topoff_threshold:2;
 	unsigned	fast_charge:3;	/* charge current */
+	unsigned	no_temp_support:1; /* set if no temperature detect */
 };
 
 /*
-- 
cgit v1.2.3


From 5ba1fa0ae288e93179d54e3c59b2241eb1709f0c Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Fri, 25 Nov 2011 23:24:03 +0400
Subject: max8925_power: Do not detect ac insert if handled by other code

On brownstone rev 4 ac-insert detect is handled by vbus.

allow the platform code to configure the disabling of insert
by setting no_insert_detect.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/max8925_power.c | 8 ++++++--
 include/linux/mfd/max8925.h   | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/max8925_power.c b/drivers/power/max8925_power.c
index 83b827cf004b..be2d563cb315 100644
--- a/drivers/power/max8925_power.c
+++ b/drivers/power/max8925_power.c
@@ -79,6 +79,7 @@ struct max8925_power_info {
 	unsigned		topoff_threshold:2;
 	unsigned		fast_charge:3;
 	unsigned		no_temp_support:1;
+	unsigned		no_insert_detect:1;
 
 	int (*set_charger) (int);
 };
@@ -365,8 +366,10 @@ static __devinit int max8925_init_charger(struct max8925_chip *chip,
 	int ret;
 
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_OVP, "ac-ovp");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_F, "ac-remove");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_R, "ac-insert");
+	if (!info->no_insert_detect) {
+		REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_F, "ac-remove");
+		REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_R, "ac-insert");
+	}
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_OVP, "usb-ovp");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_F, "usb-remove");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_R, "usb-insert");
@@ -481,6 +484,7 @@ static __devinit int max8925_power_probe(struct platform_device *pdev)
 	info->fast_charge = pdata->fast_charge;
 	info->set_charger = pdata->set_charger;
 	info->no_temp_support = pdata->no_temp_support;
+	info->no_insert_detect = pdata->no_insert_detect;
 
 	max8925_init_charger(chip, info);
 	return 0;
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 69ec8f0bd490..e742e044e2eb 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -224,6 +224,7 @@ struct max8925_power_pdata {
 	unsigned	topoff_threshold:2;
 	unsigned	fast_charge:3;	/* charge current */
 	unsigned	no_temp_support:1; /* set if no temperature detect */
+	unsigned	no_insert_detect:1; /* set if no ac insert detect */
 };
 
 /*
-- 
cgit v1.2.3


From 72af5a4b9cc9c4527f2967e0283bee632237c26e Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Fri, 25 Nov 2011 23:11:06 +0400
Subject: max8925_power: Remove support for irq bits that do not exist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The max8925 cannot return usb status.  The bits

       [MAX8925_IRQ_VCHG_USB_OVP] = {
               .reg            = MAX8925_CHG_IRQ1,
               .mask_reg       = MAX8925_CHG_IRQ1_MASK,
               .offs           = 1 << 3,
       },
       [MAX8925_IRQ_VCHG_USB_F] =  {
               .reg            = MAX8925_CHG_IRQ1,
               .mask_reg       = MAX8925_CHG_IRQ1_MASK,
               .offs           = 1 << 4,
       },
       [MAX8925_IRQ_VCHG_USB_R] = {
               .reg            = MAX8925_CHG_IRQ1,
               .mask_reg       = MAX8925_CHG_IRQ1_MASK,
               .offs           = 1 << 5,
       },

do not exist in the irq register.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/mfd/max8925-core.c    | 15 ---------------
 drivers/power/max8925_power.c | 13 -------------
 include/linux/mfd/max8925.h   |  3 ---
 3 files changed, 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index e1e59c92f758..ca881efedf75 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -210,21 +210,6 @@ static struct max8925_irq_data max8925_irqs[] = {
 		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
 		.offs		= 1 << 2,
 	},
-	[MAX8925_IRQ_VCHG_USB_OVP] = {
-		.reg		= MAX8925_CHG_IRQ1,
-		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
-		.offs		= 1 << 3,
-	},
-	[MAX8925_IRQ_VCHG_USB_F] =  {
-		.reg		= MAX8925_CHG_IRQ1,
-		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
-		.offs		= 1 << 4,
-	},
-	[MAX8925_IRQ_VCHG_USB_R] = {
-		.reg		= MAX8925_CHG_IRQ1,
-		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
-		.offs		= 1 << 5,
-	},
 	[MAX8925_IRQ_VCHG_THM_OK_R] = {
 		.reg		= MAX8925_CHG_IRQ2,
 		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
diff --git a/drivers/power/max8925_power.c b/drivers/power/max8925_power.c
index be2d563cb315..cbc7a0b6da52 100644
--- a/drivers/power/max8925_power.c
+++ b/drivers/power/max8925_power.c
@@ -120,16 +120,6 @@ static irqreturn_t max8925_charger_handler(int irq, void *data)
 		__set_charger(info, 0);
 		dev_dbg(chip->dev, "Adapter removed\n");
 		break;
-	case MAX8925_IRQ_VCHG_USB_R:
-		info->usb_online = 1;
-		__set_charger(info, 1);
-		dev_dbg(chip->dev, "USB inserted\n");
-		break;
-	case MAX8925_IRQ_VCHG_USB_F:
-		info->usb_online = 0;
-		__set_charger(info, 0);
-		dev_dbg(chip->dev, "USB removed\n");
-		break;
 	case MAX8925_IRQ_VCHG_THM_OK_F:
 		/* Battery is not ready yet */
 		dev_dbg(chip->dev, "Battery temperature is out of range\n");
@@ -370,9 +360,6 @@ static __devinit int max8925_init_charger(struct max8925_chip *chip,
 		REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_F, "ac-remove");
 		REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_R, "ac-insert");
 	}
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_OVP, "usb-ovp");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_F, "usb-remove");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_R, "usb-insert");
 	if (!info->no_temp_support) {
 		REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_R, "batt-temp-in-range");
 		REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_F, "batt-temp-out-range");
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index e742e044e2eb..10aeaf8bfb94 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -167,9 +167,6 @@ enum {
 	MAX8925_IRQ_VCHG_DC_OVP,
 	MAX8925_IRQ_VCHG_DC_F,
 	MAX8925_IRQ_VCHG_DC_R,
-	MAX8925_IRQ_VCHG_USB_OVP,
-	MAX8925_IRQ_VCHG_USB_F,
-	MAX8925_IRQ_VCHG_USB_R,
 	MAX8925_IRQ_VCHG_THM_OK_R,
 	MAX8925_IRQ_VCHG_THM_OK_F,
 	MAX8925_IRQ_VCHG_SYSLOW_F,
-- 
cgit v1.2.3


From e7a5f6d55991fb3b3214f435681ee2db96320395 Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Mon, 29 Aug 2011 09:32:04 -0700
Subject: max8925_power: Enable power change notifications

The power core infrastructure allow external power change
events to be passed to drivers what are listed in the
supplied_to call back field.  Enable this feature by
allowing the supplied_to field to be passed to the driver.

This feature will enable drivers named in the supplied_to
field that have a external_power_changed callback to be
notified when power was been turned on or off.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/max8925_power.c | 5 +++++
 include/linux/mfd/max8925.h   | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/max8925_power.c b/drivers/power/max8925_power.c
index cbc7a0b6da52..377d1e633b4a 100644
--- a/drivers/power/max8925_power.c
+++ b/drivers/power/max8925_power.c
@@ -441,6 +441,8 @@ static __devinit int max8925_power_probe(struct platform_device *pdev)
 	info->ac.properties = max8925_ac_props;
 	info->ac.num_properties = ARRAY_SIZE(max8925_ac_props);
 	info->ac.get_property = max8925_ac_get_prop;
+	info->ac.supplied_to = pdata->supplied_to;
+	info->ac.num_supplicants = pdata->num_supplicants;
 	ret = power_supply_register(&pdev->dev, &info->ac);
 	if (ret)
 		goto out;
@@ -451,6 +453,9 @@ static __devinit int max8925_power_probe(struct platform_device *pdev)
 	info->usb.properties = max8925_usb_props;
 	info->usb.num_properties = ARRAY_SIZE(max8925_usb_props);
 	info->usb.get_property = max8925_usb_get_prop;
+	info->usb.supplied_to = pdata->supplied_to;
+	info->usb.num_supplicants = pdata->num_supplicants;
+
 	ret = power_supply_register(&pdev->dev, &info->usb);
 	if (ret)
 		goto out_usb;
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 10aeaf8bfb94..b8e6d9449086 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -222,6 +222,8 @@ struct max8925_power_pdata {
 	unsigned	fast_charge:3;	/* charge current */
 	unsigned	no_temp_support:1; /* set if no temperature detect */
 	unsigned	no_insert_detect:1; /* set if no ac insert detect */
+	char		**supplied_to;
+	int		num_supplicants;
 };
 
 /*
-- 
cgit v1.2.3


From cf50dcc24f82a6dc2bce523eec2a979eb1b106e2 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 25 Nov 2011 14:32:52 +0000
Subject: dsa: Change dsa_uses_{dsa, trailer}_tags() into inline functions

eth_type_trans() will use these functions if DSA is enabled, which
blocks building DSA as a module.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 include/net/dsa.h         | 53 +++++++++++++++++++++++++++++++++++++++++++++--
 net/dsa/dsa.c             | 23 --------------------
 net/dsa/dsa_priv.h        | 33 -----------------------------
 4 files changed, 52 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 999bb264fe27..63721a69804c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1080,7 +1080,7 @@ struct net_device {
 	struct vlan_group __rcu	*vlgrp;		/* VLAN group */
 #endif
 #ifdef CONFIG_NET_DSA
-	void			*dsa_ptr;	/* dsa specific data */
+	struct dsa_switch_tree	*dsa_ptr;	/* dsa specific data */
 #endif
 	void 			*atalk_ptr;	/* AppleTalk link 	*/
 	struct in_device __rcu	*ip_ptr;	/* IPv4 specific data	*/
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 839f768f9e35..32a1b49e8a8c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -11,6 +11,9 @@
 #ifndef __LINUX_NET_DSA_H
 #define __LINUX_NET_DSA_H
 
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
 
@@ -54,8 +57,54 @@ struct dsa_platform_data {
 	struct dsa_chip_data	*chip;
 };
 
-extern bool dsa_uses_dsa_tags(void *dsa_ptr);
-extern bool dsa_uses_trailer_tags(void *dsa_ptr);
+struct dsa_switch_tree {
+	/*
+	 * Configuration data for the platform device that owns
+	 * this dsa switch tree instance.
+	 */
+	struct dsa_platform_data	*pd;
+
+	/*
+	 * Reference to network device to use, and which tagging
+	 * protocol to use.
+	 */
+	struct net_device	*master_netdev;
+	__be16			tag_protocol;
+
+	/*
+	 * The switch and port to which the CPU is attached.
+	 */
+	s8			cpu_switch;
+	s8			cpu_port;
+
+	/*
+	 * Link state polling.
+	 */
+	int			link_poll_needed;
+	struct work_struct	link_poll_work;
+	struct timer_list	link_poll_timer;
+
+	/*
+	 * Data for the individual switch chips.
+	 */
+	struct dsa_switch	*ds[DSA_MAX_SWITCHES];
+};
+
+/*
+ * The original DSA tag format and some other tag formats have no
+ * ethertype, which means that we need to add a little hack to the
+ * networking receive path to make sure that received frames get
+ * the right ->protocol assigned to them when one of those tag
+ * formats is in use.
+ */
+static inline bool dsa_uses_dsa_tags(struct dsa_switch_tree *dst)
+{
+	return !!(dst->tag_protocol == htons(ETH_P_DSA));
+}
 
+static inline bool dsa_uses_trailer_tags(struct dsa_switch_tree *dst)
+{
+	return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
+}
 
 #endif
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0dc1589343c3..66f5c0460cd3 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -199,29 +199,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
 }
 
 
-/* hooks for ethertype-less tagging formats *********************************/
-/*
- * The original DSA tag format and some other tag formats have no
- * ethertype, which means that we need to add a little hack to the
- * networking receive path to make sure that received frames get
- * the right ->protocol assigned to them when one of those tag
- * formats is in use.
- */
-bool dsa_uses_dsa_tags(void *dsa_ptr)
-{
-	struct dsa_switch_tree *dst = dsa_ptr;
-
-	return !!(dst->tag_protocol == htons(ETH_P_DSA));
-}
-
-bool dsa_uses_trailer_tags(void *dsa_ptr)
-{
-	struct dsa_switch_tree *dst = dsa_ptr;
-
-	return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
-}
-
-
 /* link polling *************************************************************/
 static void dsa_link_poll_work(struct work_struct *ugly)
 {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 4b0ea0540442..a45186cb6daf 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -48,39 +48,6 @@ struct dsa_switch {
 	struct net_device	*ports[DSA_MAX_PORTS];
 };
 
-struct dsa_switch_tree {
-	/*
-	 * Configuration data for the platform device that owns
-	 * this dsa switch tree instance.
-	 */
-	struct dsa_platform_data	*pd;
-
-	/*
-	 * Reference to network device to use, and which tagging
-	 * protocol to use.
-	 */
-	struct net_device	*master_netdev;
-	__be16			tag_protocol;
-
-	/*
-	 * The switch and port to which the CPU is attached.
-	 */
-	s8			cpu_switch;
-	s8			cpu_port;
-
-	/*
-	 * Link state polling.
-	 */
-	int			link_poll_needed;
-	struct work_struct	link_poll_work;
-	struct timer_list	link_poll_timer;
-
-	/*
-	 * Data for the individual switch chips.
-	 */
-	struct dsa_switch	*ds[DSA_MAX_SWITCHES];
-};
-
 static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
 {
 	return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port);
-- 
cgit v1.2.3


From 34a430d7bd26b35ca3a7d3fc83663de8ea6e33f6 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 25 Nov 2011 14:38:38 +0000
Subject: dsa: Allow core and drivers to be built as modules

Change the kconfig types to tristate and adjust the condition for
declaring net_device::dsa_ptr to allow for this.

Adjust the makefile so that if NET_DSA_MV88E6123_61_65=y and
NET_DSA_MV88E6131=m or vice versa then both drivers are built-in.  We
could leave these options as bool and make NET_DSA_MV88E6XXX a
user-selected option, but that would break existing configurations.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 net/dsa/Kconfig           | 10 +++++-----
 net/dsa/Makefile          |  8 ++++++--
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 63721a69804c..87f7353a2407 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1079,7 +1079,7 @@ struct net_device {
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 	struct vlan_group __rcu	*vlgrp;		/* VLAN group */
 #endif
-#ifdef CONFIG_NET_DSA
+#if IS_ENABLED(CONFIG_NET_DSA)
 	struct dsa_switch_tree	*dsa_ptr;	/* dsa specific data */
 #endif
 	void 			*atalk_ptr;	/* AppleTalk link 	*/
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index c53ded2a98df..7e12303827e8 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,5 +1,5 @@
 menuconfig NET_DSA
-	bool "Distributed Switch Architecture support"
+	tristate "Distributed Switch Architecture support"
 	default n
 	depends on EXPERIMENTAL && NETDEVICES && !S390
 	select PHYLIB
@@ -26,11 +26,11 @@ config NET_DSA_TAG_TRAILER
 
 # switch drivers
 config NET_DSA_MV88E6XXX
-	bool
+	tristate
 	default n
 
 config NET_DSA_MV88E6060
-	bool "Marvell 88E6060 ethernet switch chip support"
+	tristate "Marvell 88E6060 ethernet switch chip support"
 	select NET_DSA_TAG_TRAILER
 	---help---
 	  This enables support for the Marvell 88E6060 ethernet switch
@@ -41,7 +41,7 @@ config NET_DSA_MV88E6XXX_NEED_PPU
 	default n
 
 config NET_DSA_MV88E6131
-	bool "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support"
+	tristate "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support"
 	select NET_DSA_MV88E6XXX
 	select NET_DSA_MV88E6XXX_NEED_PPU
 	select NET_DSA_TAG_DSA
@@ -50,7 +50,7 @@ config NET_DSA_MV88E6131
 	  ethernet switch chips.
 
 config NET_DSA_MV88E6123_61_65
-	bool "Marvell 88E6123/6161/6165 ethernet switch chip support"
+	tristate "Marvell 88E6123/6161/6165 ethernet switch chip support"
 	select NET_DSA_MV88E6XXX
 	select NET_DSA_TAG_EDSA
 	---help---
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 5c48ac556997..191dd482e557 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -11,5 +11,9 @@ dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx_drv.o
 mv88e6xxx_drv-y += mv88e6xxx.o
-mv88e6xxx_drv-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o
-mv88e6xxx_drv-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o
+ifdef CONFIG_NET_DSA_MV88E6123_61_65
+mv88e6xxx_drv-y += mv88e6123_61_65.o
+endif
+ifdef CONFIG_NET_DSA_MV88E6131
+mv88e6xxx_drv-y += mv88e6131.o
+endif
-- 
cgit v1.2.3


From d11ead75672d655652dbfc1b1a2c359e5b65536d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 25 Nov 2011 14:40:26 +0000
Subject: net: Use IS_ENABLED() in netdevice.h as appropriate

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 87f7353a2407..ac9a4b9344ca 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -144,22 +144,20 @@ static inline bool dev_xmit_complete(int rc)
  *	used.
  */
 
-#if defined(CONFIG_WLAN) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25)
 # if defined(CONFIG_MAC80211_MESH)
 #  define LL_MAX_HEADER 128
 # else
 #  define LL_MAX_HEADER 96
 # endif
-#elif defined(CONFIG_TR) || defined(CONFIG_TR_MODULE)
+#elif IS_ENABLED(CONFIG_TR)
 # define LL_MAX_HEADER 48
 #else
 # define LL_MAX_HEADER 32
 #endif
 
-#if !defined(CONFIG_NET_IPIP) && !defined(CONFIG_NET_IPIP_MODULE) && \
-    !defined(CONFIG_NET_IPGRE) &&  !defined(CONFIG_NET_IPGRE_MODULE) && \
-    !defined(CONFIG_IPV6_SIT) && !defined(CONFIG_IPV6_SIT_MODULE) && \
-    !defined(CONFIG_IPV6_TUNNEL) && !defined(CONFIG_IPV6_TUNNEL_MODULE)
+#if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \
+    !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL)
 #define MAX_HEADER LL_MAX_HEADER
 #else
 #define MAX_HEADER (LL_MAX_HEADER + 48)
@@ -922,7 +920,7 @@ struct net_device_ops {
 	int			(*ndo_get_vf_port)(struct net_device *dev,
 						   int vf, struct sk_buff *skb);
 	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
 	int			(*ndo_fcoe_ddp_setup)(struct net_device *dev,
@@ -937,7 +935,7 @@ struct net_device_ops {
 						       unsigned int sgc);
 #endif
 
-#if defined(CONFIG_LIBFCOE) || defined(CONFIG_LIBFCOE_MODULE)
+#if IS_ENABLED(CONFIG_LIBFCOE)
 #define NETDEV_FCOE_WWNN 0
 #define NETDEV_FCOE_WWPN 1
 	int			(*ndo_fcoe_get_wwn)(struct net_device *dev,
@@ -1076,7 +1074,7 @@ struct net_device {
 
 	/* Protocol specific pointers */
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 	struct vlan_group __rcu	*vlgrp;		/* VLAN group */
 #endif
 #if IS_ENABLED(CONFIG_NET_DSA)
@@ -1242,7 +1240,7 @@ struct net_device {
 	struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
 	u8 prio_tc_map[TC_BITMASK + 1];
 
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 	/* max exchange id for FCoE LRO by ddp */
 	unsigned int		fcoe_ddp_xid;
 #endif
-- 
cgit v1.2.3


From b30f8bdcfa7dd05f4268348f3388ff903132f28e Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 21 Nov 2011 08:57:56 +0000
Subject: eeprom_93cx6: Add data direction control.

Some devices need to know if the data is to be output or read, so add a
data direction into the eeprom structure to tell the driver whether the
data line should be driven.

The user in this case is the Micrel KS8851 which has a direction
control for the EEPROM data line and thus needs to know whether
to drive it (writing) or to tristate it for receiving.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Cc: Wolfram Sang <w.sang@pengutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/misc/eeprom/eeprom_93cx6.c | 3 +++
 include/linux/eeprom_93cx6.h       | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c
index 7b33de95c4bf..a6037af6f076 100644
--- a/drivers/misc/eeprom/eeprom_93cx6.c
+++ b/drivers/misc/eeprom/eeprom_93cx6.c
@@ -63,6 +63,7 @@ static void eeprom_93cx6_startup(struct eeprom_93cx6 *eeprom)
 	eeprom->reg_data_out = 0;
 	eeprom->reg_data_clock = 0;
 	eeprom->reg_chip_select = 1;
+	eeprom->drive_data = 1;
 	eeprom->register_write(eeprom);
 
 	/*
@@ -101,6 +102,7 @@ static void eeprom_93cx6_write_bits(struct eeprom_93cx6 *eeprom,
 	 */
 	eeprom->reg_data_in = 0;
 	eeprom->reg_data_out = 0;
+	eeprom->drive_data = 1;
 
 	/*
 	 * Start writing all bits.
@@ -140,6 +142,7 @@ static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom,
 	 */
 	eeprom->reg_data_in = 0;
 	eeprom->reg_data_out = 0;
+	eeprom->drive_data = 0;
 
 	/*
 	 * Start reading all bits.
diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h
index c4627cbdb8e0..e04546e9c592 100644
--- a/include/linux/eeprom_93cx6.h
+++ b/include/linux/eeprom_93cx6.h
@@ -46,6 +46,7 @@
  * @register_write(struct eeprom_93cx6 *eeprom): handler to
  * write to the eeprom register by using all reg_* fields.
  * @width: eeprom width, should be one of the PCI_EEPROM_WIDTH_* defines
+ * @drive_data: Set if we're driving the data line.
  * @reg_data_in: register field to indicate data input
  * @reg_data_out: register field to indicate data output
  * @reg_data_clock: register field to set the data clock
@@ -62,6 +63,7 @@ struct eeprom_93cx6 {
 
 	int width;
 
+	char drive_data;
 	char reg_data_in;
 	char reg_data_out;
 	char reg_data_clock;
-- 
cgit v1.2.3


From 072bc80156729f853e8bcafe1b17c48c74462887 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 21 Nov 2011 08:57:57 +0000
Subject: eeprom_93cx6: Add write support

Add support for writing data to EEPROM.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Cc: Wolfram Sang <w.sang@pengutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/misc/eeprom/eeprom_93cx6.c | 85 ++++++++++++++++++++++++++++++++++++++
 include/linux/eeprom_93cx6.h       |  6 +++
 2 files changed, 91 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c
index a6037af6f076..0ff4b02177be 100644
--- a/drivers/misc/eeprom/eeprom_93cx6.c
+++ b/drivers/misc/eeprom/eeprom_93cx6.c
@@ -234,3 +234,88 @@ void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word,
 }
 EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread);
 
+/**
+ * eeprom_93cx6_wren - set the write enable state
+ * @eeprom: Pointer to eeprom structure
+ * @enable: true to enable writes, otherwise disable writes
+ *
+ * Set the EEPROM write enable state to either allow or deny
+ * writes depending on the @enable value.
+ */
+void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable)
+{
+	u16 command;
+
+	/* start the command */
+	eeprom_93cx6_startup(eeprom);
+
+	/* create command to enable/disable */
+
+	command = enable ? PCI_EEPROM_EWEN_OPCODE : PCI_EEPROM_EWDS_OPCODE;
+	command <<= (eeprom->width - 2);
+
+	eeprom_93cx6_write_bits(eeprom, command,
+				PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_wren);
+
+/**
+ * eeprom_93cx6_write - write data to the EEPROM
+ * @eeprom: Pointer to eeprom structure
+ * @addr: Address to write data to.
+ * @data: The data to write to address @addr.
+ *
+ * Write the @data to the specified @addr in the EEPROM and
+ * waiting for the device to finish writing.
+ *
+ * Note, since we do not expect large number of write operations
+ * we delay in between parts of the operation to avoid using excessive
+ * amounts of CPU time busy waiting.
+ */
+void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data)
+{
+	int timeout = 100;
+	u16 command;
+
+	/* start the command */
+	eeprom_93cx6_startup(eeprom);
+
+	command = PCI_EEPROM_WRITE_OPCODE << eeprom->width;
+	command |= addr;
+
+	/* send write command */
+	eeprom_93cx6_write_bits(eeprom, command,
+				PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	/* send data */
+	eeprom_93cx6_write_bits(eeprom, data, 16);
+
+	/* get ready to check for busy */
+	eeprom->drive_data = 0;
+	eeprom->reg_chip_select = 1;
+	eeprom->register_write(eeprom);
+
+	/* wait at-least 250ns to get DO to be the busy signal */
+	usleep_range(1000, 2000);
+
+	/* wait for DO to go high to signify finish */
+
+	while (true) {
+		eeprom->register_read(eeprom);
+
+		if (eeprom->reg_data_out)
+			break;
+
+		usleep_range(1000, 2000);
+
+		if (--timeout <= 0) {
+			printk(KERN_ERR "%s: timeout\n", __func__);
+			break;
+		}
+	}
+
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_write);
diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h
index e04546e9c592..e50f98b0297a 100644
--- a/include/linux/eeprom_93cx6.h
+++ b/include/linux/eeprom_93cx6.h
@@ -33,6 +33,7 @@
 #define PCI_EEPROM_WIDTH_93C86	8
 #define PCI_EEPROM_WIDTH_OPCODE	3
 #define PCI_EEPROM_WRITE_OPCODE	0x05
+#define PCI_EEPROM_ERASE_OPCODE 0x07
 #define PCI_EEPROM_READ_OPCODE	0x06
 #define PCI_EEPROM_EWDS_OPCODE	0x10
 #define PCI_EEPROM_EWEN_OPCODE	0x13
@@ -74,3 +75,8 @@ extern void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom,
 	const u8 word, u16 *data);
 extern void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom,
 	const u8 word, __le16 *data, const u16 words);
+
+extern void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable);
+
+extern void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom,
+			       u8 addr, u16 data);
-- 
cgit v1.2.3


From 49f5ed4250c757cb19d953fcac2737a35ca14d76 Mon Sep 17 00:00:00 2001
From: chas williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
Date: Tue, 22 Nov 2011 12:51:56 +0000
Subject: atm: eliminate atm_guess_pdu2truesize()

Signed-off-by: Chas Williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/iphase.c   |  4 ++--
 include/linux/atmdev.h | 10 ----------
 net/atm/atm_misc.c     |  2 +-
 3 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 3d0c2b0fed9c..9e373ba20308 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -1320,8 +1320,8 @@ static void rx_dle_intr(struct atm_dev *dev)
           if (ia_vcc == NULL)
           {
              atomic_inc(&vcc->stats->rx_err);
+             atm_return(vcc, skb->truesize);
              dev_kfree_skb_any(skb);
-             atm_return(vcc, atm_guess_pdu2truesize(len));
              goto INCR_DLE;
            }
           // get real pkt length  pwang_test
@@ -1334,8 +1334,8 @@ static void rx_dle_intr(struct atm_dev *dev)
              atomic_inc(&vcc->stats->rx_err);
              IF_ERR(printk("rx_dle_intr: Bad  AAL5 trailer %d (skb len %d)", 
                                                             length, skb->len);)
+             atm_return(vcc, skb->truesize);
              dev_kfree_skb_any(skb);
-             atm_return(vcc, atm_guess_pdu2truesize(len));
              goto INCR_DLE;
           }
           skb_trim(skb, length);
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 43ea1b2de3ee..f4ff882cb2da 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -445,16 +445,6 @@ void vcc_insert_socket(struct sock *sk);
 
 void atm_dev_release_vccs(struct atm_dev *dev);
 
-/*
- * This is approximately the algorithm used by alloc_skb.
- *
- */
-
-static inline int atm_guess_pdu2truesize(int size)
-{
-	return SKB_TRUESIZE(size);
-}
-
 
 static inline void atm_force_charge(struct atm_vcc *vcc,int truesize)
 {
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index f41f02656ff4..876fbe83e2e4 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -26,7 +26,7 @@ struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc, int pdu_size,
 				 gfp_t gfp_flags)
 {
 	struct sock *sk = sk_atm(vcc);
-	int guess = atm_guess_pdu2truesize(pdu_size);
+	int guess = SKB_TRUESIZE(pdu_size);
 
 	atm_force_charge(vcc, guess);
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
-- 
cgit v1.2.3


From c8421147926fcacf53081a36438a0bed394da9f5 Mon Sep 17 00:00:00 2001
From: Aman Deep <amandeep3986@gmail.com>
Date: Tue, 22 Nov 2011 19:33:36 +0530
Subject: xHCI: Adding #define values used for hub descriptor

xhci-hub used some numerical values for initialisation of root hub
descriptors. #define values are addded in usb 2.0 hub specification
file and these values are used for root hub characteristics
initialisation.

Also use some #defines in places where magic numbers are being used.

Signed-off-by: Aman Deep <amandeep3986@gmail.com>
Acked-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/xhci-hub.c | 18 ++++++++----------
 include/linux/usb/ch11.h    | 19 ++++++++++++++-----
 2 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 430e88fd3f6c..35e257f79c7b 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -57,17 +57,15 @@ static void xhci_common_hub_descriptor(struct xhci_hcd *xhci,
 	desc->bHubContrCurrent = 0;
 
 	desc->bNbrPorts = ports;
-	/* Ugh, these should be #defines, FIXME */
-	/* Using table 11-13 in USB 2.0 spec. */
 	temp = 0;
-	/* Bits 1:0 - support port power switching, or power always on */
+	/* Bits 1:0 - support per-port power switching, or power always on */
 	if (HCC_PPC(xhci->hcc_params))
-		temp |= 0x0001;
+		temp |= HUB_CHAR_INDV_PORT_LPSM;
 	else
-		temp |= 0x0002;
+		temp |= HUB_CHAR_NO_LPSM;
 	/* Bit  2 - root hubs are not part of a compound device */
 	/* Bits 4:3 - individual port over current protection */
-	temp |= 0x0008;
+	temp |= HUB_CHAR_INDV_PORT_OCPM;
 	/* Bits 6:5 - no TTs in root ports */
 	/* Bit  7 - no port indicators */
 	desc->wHubCharacteristics = cpu_to_le16(temp);
@@ -86,9 +84,9 @@ static void xhci_usb2_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci,
 	ports = xhci->num_usb2_ports;
 
 	xhci_common_hub_descriptor(xhci, desc, ports);
-	desc->bDescriptorType = 0x29;
+	desc->bDescriptorType = USB_DT_HUB;
 	temp = 1 + (ports / 8);
-	desc->bDescLength = 7 + 2 * temp;
+	desc->bDescLength = USB_DT_HUB_NONVAR_SIZE + 2 * temp;
 
 	/* The Device Removable bits are reported on a byte granularity.
 	 * If the port doesn't exist within that byte, the bit is set to 0.
@@ -137,8 +135,8 @@ static void xhci_usb3_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci,
 
 	ports = xhci->num_usb3_ports;
 	xhci_common_hub_descriptor(xhci, desc, ports);
-	desc->bDescriptorType = 0x2a;
-	desc->bDescLength = 12;
+	desc->bDescriptorType = USB_DT_SS_HUB;
+	desc->bDescLength = USB_DT_SS_HUB_SIZE;
 
 	/* header decode latency should be zero for roothubs,
 	 * see section 4.23.5.2.
diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
index 4ebaf0824179..55e7325926c1 100644
--- a/include/linux/usb/ch11.h
+++ b/include/linux/usb/ch11.h
@@ -165,11 +165,20 @@ struct usb_port_status {
  * wHubCharacteristics (masks)
  * See USB 2.0 spec Table 11-13, offset 3
  */
-#define HUB_CHAR_LPSM		0x0003 /* D1 .. D0 */
-#define HUB_CHAR_COMPOUND	0x0004 /* D2       */
-#define HUB_CHAR_OCPM		0x0018 /* D4 .. D3 */
-#define HUB_CHAR_TTTT           0x0060 /* D6 .. D5 */
-#define HUB_CHAR_PORTIND        0x0080 /* D7       */
+#define HUB_CHAR_LPSM		0x0003 /* Logical Power Switching Mode mask */
+#define HUB_CHAR_COMMON_LPSM	0x0000 /* All ports power control at once */
+#define HUB_CHAR_INDV_PORT_LPSM	0x0001 /* per-port power control */
+#define HUB_CHAR_NO_LPSM	0x0002 /* no power switching */
+
+#define HUB_CHAR_COMPOUND	0x0004 /* hub is part of a compound device */
+
+#define HUB_CHAR_OCPM		0x0018 /* Over-Current Protection Mode mask */
+#define HUB_CHAR_COMMON_OCPM	0x0000 /* All ports Over-Current reporting */
+#define HUB_CHAR_INDV_PORT_OCPM	0x0008 /* per-port Over-current reporting */
+#define HUB_CHAR_NO_OCPM	0x0010 /* No Over-current Protection support */
+
+#define HUB_CHAR_TTTT		0x0060 /* TT Think Time mask */
+#define HUB_CHAR_PORTIND	0x0080 /* per-port indicators (LEDs) */
 
 struct usb_hub_status {
 	__le16 wHubStatus;
-- 
cgit v1.2.3


From 448ac154c957c4580531fa0c8f2045816fe2f0e7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 22 Nov 2011 13:41:24 -0800
Subject: serial/8250_pci: setup-quirk workaround for the kt serial controller

Workaround dropped notifications in the iir register.  Prevent reads
coincident with new interrupt notifications by reading the iir at most
once per interrupt.

Reported-by: Nhan H Mai <nhan.h.mai@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250.c     |  4 +++-
 drivers/tty/serial/8250_pci.c | 17 ++++++++++++++++-
 include/linux/serial_core.h   |  1 +
 3 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c
index eeadf1b8e093..3a8e5bfe17e2 100644
--- a/drivers/tty/serial/8250.c
+++ b/drivers/tty/serial/8250.c
@@ -1619,11 +1619,13 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
 	do {
 		struct uart_8250_port *up;
 		struct uart_port *port;
+		bool skip;
 
 		up = list_entry(l, struct uart_8250_port, list);
 		port = &up->port;
+		skip = pass_counter && up->port.flags & UPF_IIR_ONCE;
 
-		if (port->handle_irq(port)) {
+		if (!skip && port->handle_irq(port)) {
 			handled = 1;
 			end = NULL;
 		} else if (end == NULL)
diff --git a/drivers/tty/serial/8250_pci.c b/drivers/tty/serial/8250_pci.c
index 825937a5f210..8742ef5be6ba 100644
--- a/drivers/tty/serial/8250_pci.c
+++ b/drivers/tty/serial/8250_pci.c
@@ -1092,6 +1092,14 @@ static int skip_tx_en_setup(struct serial_private *priv,
 	return pci_default_setup(priv, board, port, idx);
 }
 
+static int kt_serial_setup(struct serial_private *priv,
+			   const struct pciserial_board *board,
+			   struct uart_port *port, int idx)
+{
+	port->flags |= UPF_IIR_ONCE;
+	return skip_tx_en_setup(priv, board, port, idx);
+}
+
 static int pci_eg20t_init(struct pci_dev *dev)
 {
 #if defined(CONFIG_SERIAL_PCH_UART) || defined(CONFIG_SERIAL_PCH_UART_MODULE)
@@ -1110,7 +1118,6 @@ pci_xr17c154_setup(struct serial_private *priv,
 	return pci_default_setup(priv, board, port, idx);
 }
 
-/* This should be in linux/pci_ids.h */
 #define PCI_VENDOR_ID_SBSMODULARIO	0x124B
 #define PCI_SUBVENDOR_ID_SBSMODULARIO	0x124B
 #define PCI_DEVICE_ID_OCTPRO		0x0001
@@ -1136,6 +1143,7 @@ pci_xr17c154_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_OXSEMI_16PCI958	0x9538
 #define PCIE_DEVICE_ID_NEO_2_OX_IBM	0x00F6
 #define PCI_DEVICE_ID_PLX_CRONYX_OMEGA	0xc001
+#define PCI_DEVICE_ID_INTEL_PATSBURG_KT 0x1d3d
 
 /* Unknown vendors/cards - this should not be in linux/pci_ids.h */
 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584	0x1584
@@ -1220,6 +1228,13 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.subdevice	= PCI_ANY_ID,
 		.setup		= ce4100_serial_setup,
 	},
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_PATSBURG_KT,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= kt_serial_setup,
+	},
 	/*
 	 * ITE
 	 */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 945e02cae614..b67305e3ad57 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -351,6 +351,7 @@ struct uart_port {
 #define UPF_CONS_FLOW		((__force upf_t) (1 << 23))
 #define UPF_SHARE_IRQ		((__force upf_t) (1 << 24))
 #define UPF_EXAR_EFR		((__force upf_t) (1 << 25))
+#define UPF_IIR_ONCE		((__force upf_t) (1 << 26))
 /* The exact UART type is known and should not be probed.  */
 #define UPF_FIXED_TYPE		((__force upf_t) (1 << 27))
 #define UPF_BOOT_AUTOCONF	((__force upf_t) (1 << 28))
-- 
cgit v1.2.3


From 876f6e67d1c617c098c67934a8d00b065bb9688b Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sat, 26 Nov 2011 19:54:58 +0000
Subject: net/mlx4: move RSS related definitions to be global

Towards adding RSS support for IB drivers/application who use
the mlx4 HW, make the RSS related definitions global and change
the mlx4_en driver to use them.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_resources.c |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c        | 10 +++++----
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h      | 10 ---------
 include/linux/mlx4/qp.h                           | 27 +++++++++++++++++++++++
 4 files changed, 34 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index 0dfb4ec8a9dd..bcbc54c16947 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -44,7 +44,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 	struct mlx4_en_dev *mdev = priv->mdev;
 
 	memset(context, 0, sizeof *context);
-	context->flags = cpu_to_be32(7 << 16 | rss << 13);
+	context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET);
 	context->pd = cpu_to_be32(mdev->priv_pdn);
 	context->mtu_msgmax = 0xff;
 	if (!is_tx && !rss)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c2df6c358603..d4bad5d57fb7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -837,9 +837,10 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
 	struct mlx4_qp_context context;
-	struct mlx4_en_rss_context *rss_context;
+	struct mlx4_rss_context *rss_context;
 	void *ptr;
-	u8 rss_mask = 0x3f;
+	u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
+		MLX4_RSS_TCP_IPV6 | MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6);
 	int i, qpn;
 	int err = 0;
 	int good_qps = 0;
@@ -877,13 +878,14 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
 				priv->rx_ring[0].cqn, &context);
 
-	ptr = ((void *) &context) + 0x3c;
+	ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path)
+					+ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
 	rss_context = ptr;
 	rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 |
 					    (rss_map->base_qpn));
 	rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
 	rss_context->flags = rss_mask;
-	rss_context->hash_fn = 1;
+	rss_context->hash_fn = MLX4_RSS_HASH_TOP;
 	for (i = 0; i < 10; i++)
 		rss_context->rss_key[i] = rsskey[i];
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 207b5add3ca8..ef7dfcff588d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -366,16 +366,6 @@ struct mlx4_en_rss_map {
 	enum mlx4_qp_state indir_state;
 };
 
-struct mlx4_en_rss_context {
-	__be32 base_qpn;
-	__be32 default_qpn;
-	u16 reserved;
-	u8 hash_fn;
-	u8 flags;
-	__be32 rss_key[10];
-	__be32 base_qpn_udp;
-};
-
 struct mlx4_en_port_state {
 	int link_state;
 	int link_speed;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 48cc4cb97858..6562ff6aa9d6 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -97,6 +97,33 @@ enum {
 	MLX4_QP_BIT_RIC				= 1 <<	4,
 };
 
+enum {
+	MLX4_RSS_HASH_XOR			= 0,
+	MLX4_RSS_HASH_TOP			= 1,
+
+	MLX4_RSS_UDP_IPV6			= 1 << 0,
+	MLX4_RSS_UDP_IPV4			= 1 << 1,
+	MLX4_RSS_TCP_IPV6			= 1 << 2,
+	MLX4_RSS_IPV6				= 1 << 3,
+	MLX4_RSS_TCP_IPV4			= 1 << 4,
+	MLX4_RSS_IPV4				= 1 << 5,
+
+	/* offset of mlx4_rss_context within mlx4_qp_context.pri_path */
+	MLX4_RSS_OFFSET_IN_QPC_PRI_PATH		= 0x24,
+	/* offset of being RSS indirection QP within mlx4_qp_context.flags */
+	MLX4_RSS_QPC_FLAG_OFFSET		= 13,
+};
+
+struct mlx4_rss_context {
+	__be32			base_qpn;
+	__be32			default_qpn;
+	u16			reserved;
+	u8			hash_fn;
+	u8			flags;
+	__be32			rss_key[10];
+	__be32			base_qpn_udp;
+};
+
 struct mlx4_qp_path {
 	u8			fl;
 	u8			reserved1[2];
-- 
cgit v1.2.3


From 559a9f1d354b577af28f84181751820ff7d29feb Mon Sep 17 00:00:00 2001
From: Oren Duer <oren@mellanox.co.il>
Date: Sat, 26 Nov 2011 19:55:15 +0000
Subject: net/mlx4_en: fix WOL handlers were always looking at port2 capability
 bit

There are 2 capability bits for WOL, one for each port.
WOL handlers were looking only on the second bit, regardless of the port.

Signed-off-by: Oren Duer <oren@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 20 ++++++++++++++++++--
 include/linux/mlx4/device.h                     |  3 ++-
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ee637a200915..7dbc6a230779 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -106,8 +106,17 @@ static void mlx4_en_get_wol(struct net_device *netdev,
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 	int err = 0;
 	u64 config = 0;
+	u64 mask;
 
-	if (!(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_WOL)) {
+	if ((priv->port < 1) || (priv->port > 2)) {
+		en_err(priv, "Failed to get WoL information\n");
+		return;
+	}
+
+	mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
+		MLX4_DEV_CAP_FLAG_WOL_PORT2;
+
+	if (!(priv->mdev->dev->caps.flags & mask)) {
 		wol->supported = 0;
 		wol->wolopts = 0;
 		return;
@@ -136,8 +145,15 @@ static int mlx4_en_set_wol(struct net_device *netdev,
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 	u64 config = 0;
 	int err = 0;
+	u64 mask;
+
+	if ((priv->port < 1) || (priv->port > 2))
+		return -EOPNOTSUPP;
+
+	mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
+		MLX4_DEV_CAP_FLAG_WOL_PORT2;
 
-	if (!(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_WOL))
+	if (!(priv->mdev->dev->caps.flags & mask))
 		return -EOPNOTSUPP;
 
 	if (wol->supported & ~WAKE_MAGIC)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 84b0b1848f17..ca2c39771c38 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -77,7 +77,8 @@ enum {
 	MLX4_DEV_CAP_FLAG_IBOE		= 1LL << 30,
 	MLX4_DEV_CAP_FLAG_UC_LOOPBACK	= 1LL << 32,
 	MLX4_DEV_CAP_FLAG_FCS_KEEP	= 1LL << 34,
-	MLX4_DEV_CAP_FLAG_WOL		= 1LL << 38,
+	MLX4_DEV_CAP_FLAG_WOL_PORT1	= 1LL << 37,
+	MLX4_DEV_CAP_FLAG_WOL_PORT2	= 1LL << 38,
 	MLX4_DEV_CAP_FLAG_UDP_RSS	= 1LL << 40,
 	MLX4_DEV_CAP_FLAG_VEP_UC_STEER	= 1LL << 41,
 	MLX4_DEV_CAP_FLAG_VEP_MC_STEER	= 1LL << 42,
-- 
cgit v1.2.3


From 60d6fe99e4a507f77b63c090eb8aacb67e21687a Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.co.il>
Date: Sat, 26 Nov 2011 19:55:19 +0000
Subject: net/mlx4_en: adding loopback support

Device must be in promiscuous mode or DMAC must be same as the host MAC, or
else packet will be dropped by the HW rx filtering.

Signed-off-by: Amir Vadai <amirv@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 19 +++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/en_tx.c     |  3 +--
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 +
 include/linux/mlx4/qp.h                        |  1 +
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 78d776bc355c..4c5bbb3aad31 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -974,6 +974,21 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+static int mlx4_en_set_features(struct net_device *netdev,
+		netdev_features_t features)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+	if (features & NETIF_F_LOOPBACK)
+		priv->ctrl_flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
+	else
+		priv->ctrl_flags &=
+			cpu_to_be32(~MLX4_WQE_CTRL_FORCE_LOOPBACK);
+
+	return 0;
+
+}
+
 static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_open		= mlx4_en_open,
 	.ndo_stop		= mlx4_en_close,
@@ -990,6 +1005,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= mlx4_en_netpoll,
 #endif
+	.ndo_set_features	= mlx4_en_set_features,
 };
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -1022,6 +1038,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	priv->port = port;
 	priv->port_up = false;
 	priv->flags = prof->flags;
+	priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
+			MLX4_WQE_CTRL_SOLICITED);
 	priv->tx_ring_num = prof->tx_ring_num;
 	priv->rx_ring_num = prof->rx_ring_num;
 	priv->mac_index = -1;
@@ -1088,6 +1106,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	dev->features = dev->hw_features | NETIF_F_HIGHDMA |
 			NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX |
 			NETIF_F_HW_VLAN_FILTER;
+	dev->hw_features |= NETIF_F_LOOPBACK;
 
 	mdev->pndev[port] = dev;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 3094f940b928..807c2186548c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -679,8 +679,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
 	tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag;
 	tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
-	tx_desc->ctrl.srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
-						MLX4_WQE_CTRL_SOLICITED);
+	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
 							 MLX4_WQE_CTRL_TCP_UDP_CSUM);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d2dd97fa091f..ea2ba6899e9a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -453,6 +453,7 @@ struct mlx4_en_priv {
 	int base_qpn;
 
 	struct mlx4_en_rss_map rss_map;
+	u32 ctrl_flags;
 	u32 flags;
 #define MLX4_EN_FLAG_PROMISC	0x1
 #define MLX4_EN_FLAG_MC_PROMISC	0x2
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 6562ff6aa9d6..bee8fa231276 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -210,6 +210,7 @@ struct mlx4_wqe_ctrl_seg {
 	 * [4]   IP checksum
 	 * [3:2] C (generate completion queue entry)
 	 * [1]   SE (solicited event)
+	 * [0]   FL (force loopback)
 	 */
 	__be32			srcrb_flags;
 	/*
-- 
cgit v1.2.3


From 4f5ca836bef3dd3eb602152d5d712a513998264e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 23 Nov 2011 00:49:14 -0800
Subject: HID: hid-input: add support for HID devices reporting Battery
 Strength

Some HID devices, such as my Bluetooth mouse, report their battery
strength as an event.  Rather than passing it through as a strange
absolute input event, this patch registers it with the power_supply
subsystem as a battery, so that the device's Battery Strength can be
reported to usermode.

The battery appears in sysfs names
/sys/class/power_supply/hid-<UNIQ>-battery, and it is a child of the
battery-containing device, so it should be clear what it's the battery of.

Unfortunately on my current Fedora 16 system, while the battery does
appear in the UI, it is listed as a Laptop Battery with 0% charge (since
it ignores the "capacity" property of the battery and instead computes
it from the "energy*" fields, which we can't supply given the limited
information contained within the HID Report).

Still, this patch is the first step.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig     |   5 +++
 drivers/hid/hid-input.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hid.h     |  16 +++++++
 3 files changed, 131 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 22a4a051f221..3a97f1fab243 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -31,6 +31,11 @@ config HID
 
 	  If unsure, say Y.
 
+config HID_BATTERY_STRENGTH
+	bool
+	depends on POWER_SUPPLY
+	default y
+
 config HIDRAW
 	bool "/dev/hidraw raw HID device support"
 	depends on HID
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 6e3252651ce3..2d96b782b203 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -271,6 +271,97 @@ static __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code)
 	return logical_extents / physical_extents;
 }
 
+#ifdef CONFIG_HID_BATTERY_STRENGTH
+static enum power_supply_property hidinput_battery_props[] = {
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+};
+
+static int hidinput_get_battery_property(struct power_supply *psy,
+					 enum power_supply_property prop,
+					 union power_supply_propval *val)
+{
+	struct hid_device *dev = container_of(psy, struct hid_device, battery);
+	int ret = 0;
+
+	switch (prop) {
+	case POWER_SUPPLY_PROP_PRESENT:
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = 1;
+		break;
+
+	case POWER_SUPPLY_PROP_CAPACITY:
+		if (dev->battery_min < dev->battery_max &&
+		    dev->battery_val >= dev->battery_min &&
+		    dev->battery_val <= dev->battery_max)
+			val->intval = (100 * (dev->battery_val - dev->battery_min)) /
+				(dev->battery_max - dev->battery_min);
+		else
+			ret = -EINVAL;
+		break;
+
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = dev->name;
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static void hidinput_setup_battery(struct hid_device *dev, s32 min, s32 max)
+{
+	struct power_supply *battery = &dev->battery;
+	int ret;
+
+	if (battery->name != NULL)
+		return;		/* already initialized? */
+
+	battery->name = kasprintf(GFP_KERNEL, "hid-%s-battery", dev->uniq);
+	if (battery->name == NULL)
+		return;
+
+	battery->type = POWER_SUPPLY_TYPE_BATTERY;
+	battery->properties = hidinput_battery_props;
+	battery->num_properties = ARRAY_SIZE(hidinput_battery_props);
+	battery->use_for_apm = 0;
+	battery->get_property = hidinput_get_battery_property;
+
+	dev->battery_min = min;
+	dev->battery_max = max;
+
+	ret = power_supply_register(&dev->dev, battery);
+	if (ret != 0) {
+		hid_warn(dev, "can't register power supply: %d\n", ret);
+		kfree(battery->name);
+		battery->name = NULL;
+	}
+}
+
+static void hidinput_cleanup_battery(struct hid_device *dev)
+{
+	if (!dev->battery.name)
+		return;
+
+	power_supply_unregister(&dev->battery);
+	kfree(dev->battery.name);
+	dev->battery.name = NULL;
+}
+#else  /* !CONFIG_HID_BATTERY_STRENGTH */
+static void hidinput_setup_battery(struct hid_device *dev, s32 min, s32 max)
+{
+}
+
+static void hidinput_cleanup_battery(struct hid_device *dev)
+{
+}
+#endif	/* CONFIG_HID_BATTERY_STRENGTH */
+
 static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_field *field,
 				     struct hid_usage *usage)
 {
@@ -629,6 +720,16 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		}
 		break;
 
+	case HID_UP_GENDEVCTRLS:
+		if ((usage->hid & HID_USAGE) == 0x20) {	/* Battery Strength */
+			hidinput_setup_battery(device,
+					       field->logical_minimum,
+					       field->logical_maximum);
+			goto ignore;
+		} else
+			goto unknown;
+		break;
+
 	case HID_UP_HPVENDOR:	/* Reported on a Dutch layout HP5308 */
 		set_bit(EV_REP, input->evbit);
 		switch (usage->hid & HID_USAGE) {
@@ -760,6 +861,13 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
 
 	input = field->hidinput->input;
 
+	if (usage->hid == HID_DC_BATTERYSTRENGTH) {
+		hid->battery_val = value;
+		hid_dbg(hid, "battery value is %d (range %d-%d)\n",
+			value, hid->battery_min, hid->battery_max);
+		return;
+	}
+
 	if (!usage->type)
 		return;
 
@@ -1016,6 +1124,8 @@ void hidinput_disconnect(struct hid_device *hid)
 {
 	struct hid_input *hidinput, *next;
 
+	hidinput_cleanup_battery(hid);
+
 	list_for_each_entry_safe(hidinput, next, &hid->inputs, list) {
 		list_del(&hidinput->list);
 		input_unregister_device(hidinput->input);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index deed5f9a1e1c..7f344c3da767 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -72,6 +72,7 @@
 #include <linux/workqueue.h>
 #include <linux/input.h>
 #include <linux/semaphore.h>
+#include <linux/power_supply.h>
 
 /*
  * We parse each description item into this structure. Short items data
@@ -190,6 +191,7 @@ struct hid_item {
 #define HID_UP_UNDEFINED	0x00000000
 #define HID_UP_GENDESK		0x00010000
 #define HID_UP_SIMULATION	0x00020000
+#define HID_UP_GENDEVCTRLS	0x00060000
 #define HID_UP_KEYBOARD		0x00070000
 #define HID_UP_LED		0x00080000
 #define HID_UP_BUTTON		0x00090000
@@ -239,6 +241,8 @@ struct hid_item {
 #define HID_GD_RIGHT		0x00010092
 #define HID_GD_LEFT		0x00010093
 
+#define HID_DC_BATTERYSTRENGTH	0x00060020
+
 #define HID_DG_DIGITIZER	0x000d0001
 #define HID_DG_PEN		0x000d0002
 #define HID_DG_LIGHTPEN		0x000d0003
@@ -482,6 +486,18 @@ struct hid_device {							/* device report descriptor */
 	struct hid_driver *driver;
 	struct hid_ll_driver *ll_driver;
 
+#ifdef CONFIG_HID_BATTERY_STRENGTH
+	/*
+	 * Power supply information for HID devices which report
+	 * battery strength. power_supply is registered iff
+	 * battery.name is non-NULL.
+	 */
+	struct power_supply battery;
+	__s32 battery_min;
+	__s32 battery_max;
+	__s32 battery_val;
+#endif
+
 	unsigned int status;						/* see STAT flags above */
 	unsigned claimed;						/* Claimed by hidinput, hiddev? */
 	unsigned quirks;						/* Various quirks the device can pull on us */
-- 
cgit v1.2.3


From 1d9d9213d526f2f4ef9a3aa198a29a0b1a670fa1 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <simon.wunderlich@s2003.tu-chemnitz.de>
Date: Fri, 18 Nov 2011 14:20:43 +0100
Subject: wireless: Add NoAck per tid support

This patch contains the configuration changes in nl80211/cfg80211.

Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Mathias Kretschmer <mathias.kretschmer@fokus.fraunhofer.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 10 ++++++++++
 include/net/cfg80211.h  |  6 ++++++
 net/wireless/nl80211.c  | 28 ++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 97bfebfcce90..1fc04853ec95 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -538,6 +538,9 @@
  *	OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME
  *	messages. Note that per PHY only one application may register.
  *
+ * @NL80211_CMD_SET_NOACK_MAP: sets a bitmap for the individual TIDs whether
+ *      No Acknowledgement Policy should be applied.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -675,6 +678,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
 
+	NL80211_CMD_SET_NOACK_MAP,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1185,6 +1190,9 @@ enum nl80211_commands {
  *    abides to when initiating radiation on DFS channels. A country maps
  *    to one DFS region.
  *
+ * @NL80211_ATTR_NOACK_MAP: This u16 bitmap contains the No Ack Policy of
+ *      up to 16 TIDs.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1428,6 +1436,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_DISABLE_HT,
 	NL80211_ATTR_HT_CAPABILITY_MASK,
 
+	NL80211_ATTR_NOACK_MAP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d5e18913f293..38ce452da20f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1465,6 +1465,8 @@ struct cfg80211_gtk_rekey_data {
  *
  * @probe_client: probe an associated client, must return a cookie that it
  *	later passes to cfg80211_probe_status().
+ *
+ * @set_noack_map: Set the NoAck Map for the TIDs.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -1658,6 +1660,10 @@ struct cfg80211_ops {
 	int	(*probe_client)(struct wiphy *wiphy, struct net_device *dev,
 				const u8 *peer, u64 *cookie);
 
+	int	(*set_noack_map)(struct wiphy *wiphy,
+				  struct net_device *dev,
+				  u16 noack_map);
+
 	struct ieee80211_channel *(*get_channel)(struct wiphy *wiphy);
 };
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a1cabde7cb5f..6026c29c338d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -204,6 +204,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_HT_CAPABILITY_MASK] = {
 		.len = NL80211_HT_CAPABILITY_LEN
 	},
+	[NL80211_ATTR_NOACK_MAP] = { .type = NLA_U16 },
 };
 
 /* policy for the key attributes */
@@ -904,6 +905,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
 		CMD(sched_scan_start, START_SCHED_SCAN);
 	CMD(probe_client, PROBE_CLIENT);
+	CMD(set_noack_map, SET_NOACK_MAP);
 	if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_REGISTER_BEACONS);
@@ -1759,6 +1761,23 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
 	return rdev->ops->del_virtual_intf(&rdev->wiphy, dev);
 }
 
+static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	u16 noack_map;
+
+	if (!info->attrs[NL80211_ATTR_NOACK_MAP])
+		return -EINVAL;
+
+	if (!rdev->ops->set_noack_map)
+		return -EOPNOTSUPP;
+
+	noack_map = nla_get_u16(info->attrs[NL80211_ATTR_NOACK_MAP]);
+
+	return rdev->ops->set_noack_map(&rdev->wiphy, dev, noack_map);
+}
+
 struct get_key_cookie {
 	struct sk_buff *msg;
 	int error;
@@ -6604,6 +6623,15 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_SET_NOACK_MAP,
+		.doit = nl80211_set_noack_map,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
-- 
cgit v1.2.3


From dca7e9430cb3e492437a5ce891b8b3e315c147ca Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@cozybit.com>
Date: Thu, 24 Nov 2011 17:15:24 -0800
Subject: {nl,cfg,mac}80211: implement dot11MeshHWMPperrMinInterval

As per 802.11mb 13.9.11.3

Signed-off-by: Thomas Pedersen <thomas@cozybit.com>
Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h       | 5 +++++
 include/net/cfg80211.h        | 1 +
 net/mac80211/cfg.c            | 3 +++
 net/mac80211/debugfs_netdev.c | 3 +++
 net/mac80211/ieee80211_i.h    | 4 +++-
 net/mac80211/mesh.c           | 1 +
 net/mac80211/mesh_hwmp.c      | 6 ++++++
 net/wireless/mesh.c           | 2 ++
 net/wireless/nl80211.c        | 6 ++++++
 9 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 1fc04853ec95..f51e3bf93a96 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -2094,6 +2094,10 @@ enum nl80211_mntr_flags {
  * access to a broader network beyond the MBSS.  This is done via Root
  * Announcement frames.
  *
+ * @NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL: The minimum interval of time (in
+ * TUs) during which a mesh STA can send only one Action frame containing a
+ * PERR element.
+ *
  * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute
  *
  * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
@@ -2117,6 +2121,7 @@ enum nl80211_meshconf_params {
 	NL80211_MESHCONF_ELEMENT_TTL,
 	NL80211_MESHCONF_HWMP_RANN_INTERVAL,
 	NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
+	NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
 
 	/* keep last */
 	__NL80211_MESHCONF_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 232d1a5c5672..ce6236b5473d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -782,6 +782,7 @@ struct mesh_config {
 	u16 min_discovery_timeout;
 	u32 dot11MeshHWMPactivePathTimeout;
 	u16 dot11MeshHWMPpreqMinInterval;
+	u16 dot11MeshHWMPperrMinInterval;
 	u16 dot11MeshHWMPnetDiameterTraversalTime;
 	u8  dot11MeshHWMPRootMode;
 	u16 dot11MeshHWMPRannInterval;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7ccba83dc8c8..393b2a4445b8 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1272,6 +1272,9 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask))
 		conf->dot11MeshHWMPpreqMinInterval =
 			nconf->dot11MeshHWMPpreqMinInterval;
+	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, mask))
+		conf->dot11MeshHWMPperrMinInterval =
+			nconf->dot11MeshHWMPperrMinInterval;
 	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
 			   mask))
 		conf->dot11MeshHWMPnetDiameterTraversalTime =
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 9352819a986b..8df28910b8ee 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -405,6 +405,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPactivePathTimeout,
 		u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC);
 IEEE80211_IF_FILE(dot11MeshHWMPpreqMinInterval,
 		u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC);
+IEEE80211_IF_FILE(dot11MeshHWMPperrMinInterval,
+		u.mesh.mshcfg.dot11MeshHWMPperrMinInterval, DEC);
 IEEE80211_IF_FILE(dot11MeshHWMPnetDiameterTraversalTime,
 		u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC);
 IEEE80211_IF_FILE(dot11MeshHWMPmaxPREQretries,
@@ -534,6 +536,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
 	MESHPARAMS_ADD(dot11MeshMaxPeerLinks);
 	MESHPARAMS_ADD(dot11MeshHWMPactivePathTimeout);
 	MESHPARAMS_ADD(dot11MeshHWMPpreqMinInterval);
+	MESHPARAMS_ADD(dot11MeshHWMPperrMinInterval);
 	MESHPARAMS_ADD(dot11MeshHWMPnetDiameterTraversalTime);
 	MESHPARAMS_ADD(dot11MeshHWMPmaxPREQretries);
 	MESHPARAMS_ADD(path_refresh_time);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 7a757a97ba37..a785d61defe1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -514,7 +514,9 @@ struct ieee80211_if_mesh {
 	atomic_t mpaths;
 	/* Timestamp of last SN update */
 	unsigned long last_sn_update;
-	/* Timestamp of last SN sent */
+	/* Time when it's ok to send next PERR */
+	unsigned long next_perr;
+	/* Timestamp of last PREQ sent */
 	unsigned long last_preq;
 	struct mesh_rmc *rmc;
 	spinlock_t mesh_preq_queue_lock;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index ee82d2f7f114..c707c8bf6d2c 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -749,6 +749,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 	atomic_set(&ifmsh->mpaths, 0);
 	mesh_rmc_init(sdata);
 	ifmsh->last_preq = jiffies;
+	ifmsh->next_perr = jiffies;
 	/* Allocate all mesh structures when creating the first mesh interface. */
 	if (!mesh_allocated)
 		ieee80211s_init();
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 208ba35661f9..fe93386d6aa9 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -241,11 +241,15 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos, ie_len;
 	int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
 		      sizeof(mgmt->u.action.u.mesh_action);
 
+	if (time_before(jiffies, ifmsh->next_perr))
+		return -EAGAIN;
+
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
 			    hdr_len +
 			    2 + 15 /* PERR IE */);
@@ -290,6 +294,8 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 
 	/* see note in function header */
 	prepare_frame_for_deferred_tx(sdata, skb);
+	ifmsh->next_perr = TU_TO_EXP_TIME(
+				   ifmsh->mshcfg.dot11MeshHWMPperrMinInterval);
 	ieee80211_add_pending_skb(local, skb);
 	return 0;
 }
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index b7b7868f4128..8c550df13037 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -20,6 +20,7 @@
  * interface
  */
 #define MESH_PREQ_MIN_INT	10
+#define MESH_PERR_MIN_INT	100
 #define MESH_DIAM_TRAVERSAL_TIME 50
 
 /*
@@ -47,6 +48,7 @@ const struct mesh_config default_mesh_config = {
 	.dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS,
 	.dot11MeshHWMPactivePathTimeout = MESH_PATH_TIMEOUT,
 	.dot11MeshHWMPpreqMinInterval = MESH_PREQ_MIN_INT,
+	.dot11MeshHWMPperrMinInterval = MESH_PERR_MIN_INT,
 	.dot11MeshHWMPnetDiameterTraversalTime = MESH_DIAM_TRAVERSAL_TIME,
 	.dot11MeshHWMPmaxPREQretries = MESH_MAX_PREQ_RETRIES,
 	.path_refresh_time = MESH_PATH_REFRESH_TIME,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5699c3b1aba4..0ee512b85a1f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3195,6 +3195,8 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
 			cur_params.dot11MeshHWMPactivePathTimeout);
 	NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
 			cur_params.dot11MeshHWMPpreqMinInterval);
+	NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
+			cur_params.dot11MeshHWMPperrMinInterval);
 	NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
 			cur_params.dot11MeshHWMPnetDiameterTraversalTime);
 	NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_ROOTMODE,
@@ -3229,6 +3231,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 },
 	[NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_HWMP_ROOTMODE] = { .type = NLA_U8 },
 	[NL80211_MESHCONF_HWMP_RANN_INTERVAL] = { .type = NLA_U16 },
@@ -3303,6 +3306,9 @@ do {\
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval,
 			mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
 			nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval,
+			mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
+			nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
 			dot11MeshHWMPnetDiameterTraversalTime,
 			mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
-- 
cgit v1.2.3


From f7bc83d87d242917ca0ee041ed509f57f361dd56 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Nov 2011 21:20:32 +0100
Subject: PM: Update comments describing device power management callbacks

The comments describing device power management callbacks in
include/pm.h are outdated and somewhat confusing, so make them
reflect the reality more accurately.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm.h | 229 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 134 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 5c4c8b18c8b7..3f3ed83a9aa5 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -54,118 +54,145 @@ typedef struct pm_message {
 /**
  * struct dev_pm_ops - device PM callbacks
  *
- * Several driver power state transitions are externally visible, affecting
+ * Several device power state transitions are externally visible, affecting
  * the state of pending I/O queues and (for drivers that touch hardware)
  * interrupts, wakeups, DMA, and other hardware state.  There may also be
- * internal transitions to various low power modes, which are transparent
+ * internal transitions to various low-power modes which are transparent
  * to the rest of the driver stack (such as a driver that's ON gating off
  * clocks which are not in active use).
  *
- * The externally visible transitions are handled with the help of the following
- * callbacks included in this structure:
- *
- * @prepare: Prepare the device for the upcoming transition, but do NOT change
- *	its hardware state.  Prevent new children of the device from being
- *	registered after @prepare() returns (the driver's subsystem and
- *	generally the rest of the kernel is supposed to prevent new calls to the
- *	probe method from being made too once @prepare() has succeeded).  If
- *	@prepare() detects a situation it cannot handle (e.g. registration of a
- *	child already in progress), it may return -EAGAIN, so that the PM core
- *	can execute it once again (e.g. after the new child has been registered)
- *	to recover from the race condition.  This method is executed for all
- *	kinds of suspend transitions and is followed by one of the suspend
- *	callbacks: @suspend(), @freeze(), or @poweroff().
- *	The PM core executes @prepare() for all devices before starting to
- *	execute suspend callbacks for any of them, so drivers may assume all of
- *	the other devices to be present and functional while @prepare() is being
- *	executed.  In particular, it is safe to make GFP_KERNEL memory
- *	allocations from within @prepare().  However, drivers may NOT assume
- *	anything about the availability of the user space at that time and it
- *	is not correct to request firmware from within @prepare() (it's too
- *	late to do that).  [To work around this limitation, drivers may
- *	register suspend and hibernation notifiers that are executed before the
- *	freezing of tasks.]
+ * The externally visible transitions are handled with the help of callbacks
+ * included in this structure in such a way that two levels of callbacks are
+ * involved.  First, the PM core executes callbacks provided by PM domains,
+ * device types, classes and bus types.  They are the subsystem-level callbacks
+ * supposed to execute callbacks provided by device drivers, although they may
+ * choose not to do that.  If the driver callbacks are executed, they have to
+ * collaborate with the subsystem-level callbacks to achieve the goals
+ * appropriate for the given system transition, given transition phase and the
+ * subsystem the device belongs to.
+ *
+ * @prepare: The principal role of this callback is to prevent new children of
+ *	the device from being registered after it has returned (the driver's
+ *	subsystem and generally the rest of the kernel is supposed to prevent
+ *	new calls to the probe method from being made too once @prepare() has
+ *	succeeded).  If @prepare() detects a situation it cannot handle (e.g.
+ *	registration of a child already in progress), it may return -EAGAIN, so
+ *	that the PM core can execute it once again (e.g. after a new child has
+ *	been registered) to recover from the race condition.
+ *	This method is executed for all kinds of suspend transitions and is
+ *	followed by one of the suspend callbacks: @suspend(), @freeze(), or
+ *	@poweroff().  The PM core executes subsystem-level @prepare() for all
+ *	devices before starting to invoke suspend callbacks for any of them, so
+ *	generally devices may be assumed to be functional or to respond to
+ *	runtime resume requests while @prepare() is being executed.  However,
+ *	device drivers may NOT assume anything about the availability of user
+ *	space at that time and it is NOT valid to request firmware from within
+ *	@prepare() (it's too late to do that).  It also is NOT valid to allocate
+ *	substantial amounts of memory from @prepare() in the GFP_KERNEL mode.
+ *	[To work around these limitations, drivers may register suspend and
+ *	hibernation notifiers to be executed before the freezing of tasks.]
  *
  * @complete: Undo the changes made by @prepare().  This method is executed for
  *	all kinds of resume transitions, following one of the resume callbacks:
  *	@resume(), @thaw(), @restore().  Also called if the state transition
- *	fails before the driver's suspend callback (@suspend(), @freeze(),
- *	@poweroff()) can be executed (e.g. if the suspend callback fails for one
+ *	fails before the driver's suspend callback: @suspend(), @freeze() or
+ *	@poweroff(), can be executed (e.g. if the suspend callback fails for one
  *	of the other devices that the PM core has unsuccessfully attempted to
  *	suspend earlier).
- *	The PM core executes @complete() after it has executed the appropriate
- *	resume callback for all devices.
+ *	The PM core executes subsystem-level @complete() after it has executed
+ *	the appropriate resume callbacks for all devices.
  *
  * @suspend: Executed before putting the system into a sleep state in which the
- *	contents of main memory are preserved.  Quiesce the device, put it into
- *	a low power state appropriate for the upcoming system state (such as
- *	PCI_D3hot), and enable wakeup events as appropriate.
+ *	contents of main memory are preserved.  The exact action to perform
+ *	depends on the device's subsystem (PM domain, device type, class or bus
+ *	type), but generally the device must be quiescent after subsystem-level
+ *	@suspend() has returned, so that it doesn't do any I/O or DMA.
+ *	Subsystem-level @suspend() is executed for all devices after invoking
+ *	subsystem-level @prepare() for all of them.
  *
  * @resume: Executed after waking the system up from a sleep state in which the
- *	contents of main memory were preserved.  Put the device into the
- *	appropriate state, according to the information saved in memory by the
- *	preceding @suspend().  The driver starts working again, responding to
- *	hardware events and software requests.  The hardware may have gone
- *	through a power-off reset, or it may have maintained state from the
- *	previous suspend() which the driver may rely on while resuming.  On most
- *	platforms, there are no restrictions on availability of resources like
- *	clocks during @resume().
+ *	contents of main memory were preserved.  The exact action to perform
+ *	depends on the device's subsystem, but generally the driver is expected
+ *	to start working again, responding to hardware events and software
+ *	requests (the device itself may be left in a low-power state, waiting
+ *	for a runtime resume to occur).  The state of the device at the time its
+ *	driver's @resume() callback is run depends on the platform and subsystem
+ *	the device belongs to.  On most platforms, there are no restrictions on
+ *	availability of resources like clocks during @resume().
+ *	Subsystem-level @resume() is executed for all devices after invoking
+ *	subsystem-level @resume_noirq() for all of them.
  *
  * @freeze: Hibernation-specific, executed before creating a hibernation image.
- *	Quiesce operations so that a consistent image can be created, but do NOT
- *	otherwise put the device into a low power device state and do NOT emit
- *	system wakeup events.  Save in main memory the device settings to be
- *	used by @restore() during the subsequent resume from hibernation or by
- *	the subsequent @thaw(), if the creation of the image or the restoration
- *	of main memory contents from it fails.
+ *	Analogous to @suspend(), but it should not enable the device to signal
+ *	wakeup events or change its power state.  The majority of subsystems
+ *	(with the notable exception of the PCI bus type) expect the driver-level
+ *	@freeze() to save the device settings in memory to be used by @restore()
+ *	during the subsequent resume from hibernation.
+ *	Subsystem-level @freeze() is executed for all devices after invoking
+ *	subsystem-level @prepare() for all of them.
  *
  * @thaw: Hibernation-specific, executed after creating a hibernation image OR
- *	if the creation of the image fails.  Also executed after a failing
+ *	if the creation of an image has failed.  Also executed after a failing
  *	attempt to restore the contents of main memory from such an image.
  *	Undo the changes made by the preceding @freeze(), so the device can be
  *	operated in the same way as immediately before the call to @freeze().
+ *	Subsystem-level @thaw() is executed for all devices after invoking
+ *	subsystem-level @thaw_noirq() for all of them.  It also may be executed
+ *	directly after @freeze() in case of a transition error.
  *
  * @poweroff: Hibernation-specific, executed after saving a hibernation image.
- *	Quiesce the device, put it into a low power state appropriate for the
- *	upcoming system state (such as PCI_D3hot), and enable wakeup events as
- *	appropriate.
+ *	Analogous to @suspend(), but it need not save the device's settings in
+ *	memory.
+ *	Subsystem-level @poweroff() is executed for all devices after invoking
+ *	subsystem-level @prepare() for all of them.
  *
  * @restore: Hibernation-specific, executed after restoring the contents of main
- *	memory from a hibernation image.  Driver starts working again,
- *	responding to hardware events and software requests.  Drivers may NOT
- *	make ANY assumptions about the hardware state right prior to @restore().
- *	On most platforms, there are no restrictions on availability of
- *	resources like clocks during @restore().
- *
- * @suspend_noirq: Complete the operations of ->suspend() by carrying out any
- *	actions required for suspending the device that need interrupts to be
- *	disabled
- *
- * @resume_noirq: Prepare for the execution of ->resume() by carrying out any
- *	actions required for resuming the device that need interrupts to be
- *	disabled
- *
- * @freeze_noirq: Complete the operations of ->freeze() by carrying out any
- *	actions required for freezing the device that need interrupts to be
- *	disabled
- *
- * @thaw_noirq: Prepare for the execution of ->thaw() by carrying out any
- *	actions required for thawing the device that need interrupts to be
- *	disabled
- *
- * @poweroff_noirq: Complete the operations of ->poweroff() by carrying out any
- *	actions required for handling the device that need interrupts to be
- *	disabled
- *
- * @restore_noirq: Prepare for the execution of ->restore() by carrying out any
- *	actions required for restoring the operations of the device that need
- *	interrupts to be disabled
+ *	memory from a hibernation image, analogous to @resume().
+ *
+ * @suspend_noirq: Complete the actions started by @suspend().  Carry out any
+ *	additional operations required for suspending the device that might be
+ *	racing with its driver's interrupt handler, which is guaranteed not to
+ *	run while @suspend_noirq() is being executed.
+ *	It generally is expected that the device will be in a low-power state
+ *	(appropriate for the target system sleep state) after subsystem-level
+ *	@suspend_noirq() has returned successfully.  If the device can generate
+ *	system wakeup signals and is enabled to wake up the system, it should be
+ *	configured to do so at that time.  However, depending on the platform
+ *	and device's subsystem, @suspend() may be allowed to put the device into
+ *	the low-power state and configure it to generate wakeup signals, in
+ *	which case it generally is not necessary to define @suspend_noirq().
+ *
+ * @resume_noirq: Prepare for the execution of @resume() by carrying out any
+ *	operations required for resuming the device that might be racing with
+ *	its driver's interrupt handler, which is guaranteed not to run while
+ *	@resume_noirq() is being executed.
+ *
+ * @freeze_noirq: Complete the actions started by @freeze().  Carry out any
+ *	additional operations required for freezing the device that might be
+ *	racing with its driver's interrupt handler, which is guaranteed not to
+ *	run while @freeze_noirq() is being executed.
+ *	The power state of the device should not be changed by either @freeze()
+ *	or @freeze_noirq() and it should not be configured to signal system
+ *	wakeup by any of these callbacks.
+ *
+ * @thaw_noirq: Prepare for the execution of @thaw() by carrying out any
+ *	operations required for thawing the device that might be racing with its
+ *	driver's interrupt handler, which is guaranteed not to run while
+ *	@thaw_noirq() is being executed.
+ *
+ * @poweroff_noirq: Complete the actions started by @poweroff().  Analogous to
+ *	@suspend_noirq(), but it need not save the device's settings in memory.
+ *
+ * @restore_noirq: Prepare for the execution of @restore() by carrying out any
+ *	operations required for thawing the device that might be racing with its
+ *	driver's interrupt handler, which is guaranteed not to run while
+ *	@restore_noirq() is being executed.  Analogous to @resume_noirq().
  *
  * All of the above callbacks, except for @complete(), return error codes.
  * However, the error codes returned by the resume operations, @resume(),
- * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq() do
+ * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do
  * not cause the PM core to abort the resume transition during which they are
- * returned.  The error codes returned in that cases are only printed by the PM
+ * returned.  The error codes returned in those cases are only printed by the PM
  * core to the system logs for debugging purposes.  Still, it is recommended
  * that drivers only return error codes from their resume methods in case of an
  * unrecoverable failure (i.e. when the device being handled refuses to resume
@@ -174,31 +201,43 @@ typedef struct pm_message {
  * their children.
  *
  * It is allowed to unregister devices while the above callbacks are being
- * executed.  However, it is not allowed to unregister a device from within any
- * of its own callbacks.
+ * executed.  However, a callback routine must NOT try to unregister the device
+ * it was called for, although it may unregister children of that device (for
+ * example, if it detects that a child was unplugged while the system was
+ * asleep).
+ *
+ * Refer to Documentation/power/devices.txt for more information about the role
+ * of the above callbacks in the system suspend process.
  *
- * There also are the following callbacks related to run-time power management
- * of devices:
+ * There also are callbacks related to runtime power management of devices.
+ * Again, these callbacks are executed by the PM core only for subsystems
+ * (PM domains, device types, classes and bus types) and the subsystem-level
+ * callbacks are supposed to invoke the driver callbacks.  Moreover, the exact
+ * actions to be performed by a device driver's callbacks generally depend on
+ * the platform and subsystem the device belongs to.
  *
  * @runtime_suspend: Prepare the device for a condition in which it won't be
  *	able to communicate with the CPU(s) and RAM due to power management.
- *	This need not mean that the device should be put into a low power state.
+ *	This need not mean that the device should be put into a low-power state.
  *	For example, if the device is behind a link which is about to be turned
  *	off, the device may remain at full power.  If the device does go to low
- *	power and is capable of generating run-time wake-up events, remote
- *	wake-up (i.e., a hardware mechanism allowing the device to request a
- *	change of its power state via a wake-up event, such as PCI PME) should
- *	be enabled for it.
+ *	power and is capable of generating runtime wakeup events, remote wakeup
+ *	(i.e., a hardware mechanism allowing the device to request a change of
+ *	its power state via an interrupt) should be enabled for it.
  *
  * @runtime_resume: Put the device into the fully active state in response to a
- *	wake-up event generated by hardware or at the request of software.  If
- *	necessary, put the device into the full power state and restore its
+ *	wakeup event generated by hardware or at the request of software.  If
+ *	necessary, put the device into the full-power state and restore its
  *	registers, so that it is fully operational.
  *
- * @runtime_idle: Device appears to be inactive and it might be put into a low
- *	power state if all of the necessary conditions are satisfied.  Check
+ * @runtime_idle: Device appears to be inactive and it might be put into a
+ *	low-power state if all of the necessary conditions are satisfied.  Check
  *	these conditions and handle the device as appropriate, possibly queueing
  *	a suspend request for it.  The return value is ignored by the PM core.
+ *
+ * Refer to Documentation/power/runtime_pm.txt for more information about the
+ * role of the above callbacks in device runtime power management.
+ *
  */
 
 struct dev_pm_ops {
-- 
cgit v1.2.3


From 5cac98dd06bc43a7baab3523184f70fd359e9f35 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 27 Nov 2011 21:14:46 +0000
Subject: net: Fix corruption in /proc/*/net/dev_mcast

I just hit this during my testing. Isn't there another bug lurking?

BUG kmalloc-8: Redzone overwritten

INFO: 0xc0000000de9dec48-0xc0000000de9dec4b. First byte 0x0 instead of 0xcc
INFO: Allocated in .__seq_open_private+0x30/0xa0 age=0 cpu=5 pid=3896
	.__kmalloc+0x1e0/0x2d0
	.__seq_open_private+0x30/0xa0
	.seq_open_net+0x60/0xe0
	.dev_mc_seq_open+0x4c/0x70
	.proc_reg_open+0xd8/0x260
	.__dentry_open.clone.11+0x2b8/0x400
	.do_last+0xf4/0x950
	.path_openat+0xf8/0x480
	.do_filp_open+0x48/0xc0
	.do_sys_open+0x140/0x250
	syscall_exit+0x0/0x40

dev_mc_seq_ops uses dev_seq_start/next/stop but only allocates
sizeof(struct seq_net_private) of private data, whereas it expects
sizeof(struct dev_iter_state):

struct dev_iter_state {
	struct seq_net_private p;
	unsigned int pos; /* bucket << BUCKET_SPACE + offset */
};

Create dev_seq_open_ops and use it so we don't have to expose
struct dev_iter_state.

[ Problem added by commit f04565ddf52e4 (dev: use name hash for
  dev_seq_ops) -Eric ]

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 net/core/dev.c            | 6 ++++++
 net/core/dev_addr_lists.c | 3 +--
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cbeb5867cff7..a82ad4dd306a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2536,6 +2536,8 @@ extern void		net_disable_timestamp(void);
 extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
 extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 extern void dev_seq_stop(struct seq_file *seq, void *v);
+extern int dev_seq_open_ops(struct inode *inode, struct file *file,
+			    const struct seq_operations *ops);
 #endif
 
 extern int netdev_class_create_file(struct class_attribute *class_attr);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ba50a1e404c..1482eea0bbf0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4282,6 +4282,12 @@ static int dev_seq_open(struct inode *inode, struct file *file)
 			    sizeof(struct dev_iter_state));
 }
 
+int dev_seq_open_ops(struct inode *inode, struct file *file,
+		     const struct seq_operations *ops)
+{
+	return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
+}
+
 static const struct file_operations dev_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = dev_seq_open,
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 277faef9148d..febba516db62 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -696,8 +696,7 @@ static const struct seq_operations dev_mc_seq_ops = {
 
 static int dev_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
+	return dev_seq_open_ops(inode, file, &dev_mc_seq_ops);
 }
 
 static const struct file_operations dev_mc_seq_fops = {
-- 
cgit v1.2.3


From 4f718a29fe4908c2cea782f751e9805319684e2b Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 28 Nov 2011 09:44:14 +0100
Subject: firmware: Sigma: Prevent out of bounds memory access

The SigmaDSP firmware loader currently does not perform enough boundary size
checks when processing the firmware. As a result it is possible that a
malformed firmware can cause an out of bounds memory access.

This patch adds checks which ensure that both the action header and the payload
are completely inside the firmware data boundaries before processing them.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
---
 drivers/firmware/sigma.c | 76 +++++++++++++++++++++++++++++++++++-------------
 include/linux/sigma.h    |  5 ----
 2 files changed, 55 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/sigma.c b/drivers/firmware/sigma.c
index f10fc521951b..c780baa59ed9 100644
--- a/drivers/firmware/sigma.c
+++ b/drivers/firmware/sigma.c
@@ -14,13 +14,34 @@
 #include <linux/module.h>
 #include <linux/sigma.h>
 
-/* Return: 0==OK, <0==error, =1 ==no more actions */
+static size_t sigma_action_size(struct sigma_action *sa)
+{
+	size_t payload = 0;
+
+	switch (sa->instr) {
+	case SIGMA_ACTION_WRITEXBYTES:
+	case SIGMA_ACTION_WRITESINGLE:
+	case SIGMA_ACTION_WRITESAFELOAD:
+		payload = sigma_action_len(sa);
+		break;
+	default:
+		break;
+	}
+
+	payload = ALIGN(payload, 2);
+
+	return payload + sizeof(struct sigma_action);
+}
+
+/*
+ * Returns a negative error value in case of an error, 0 if processing of
+ * the firmware should be stopped after this action, 1 otherwise.
+ */
 static int
-process_sigma_action(struct i2c_client *client, struct sigma_firmware *ssfw)
+process_sigma_action(struct i2c_client *client, struct sigma_action *sa)
 {
-	struct sigma_action *sa = (void *)(ssfw->fw->data + ssfw->pos);
 	size_t len = sigma_action_len(sa);
-	int ret = 0;
+	int ret;
 
 	pr_debug("%s: instr:%i addr:%#x len:%zu\n", __func__,
 		sa->instr, sa->addr, len);
@@ -29,44 +50,50 @@ process_sigma_action(struct i2c_client *client, struct sigma_firmware *ssfw)
 	case SIGMA_ACTION_WRITEXBYTES:
 	case SIGMA_ACTION_WRITESINGLE:
 	case SIGMA_ACTION_WRITESAFELOAD:
-		if (ssfw->fw->size < ssfw->pos + len)
-			return -EINVAL;
 		ret = i2c_master_send(client, (void *)&sa->addr, len);
 		if (ret < 0)
 			return -EINVAL;
 		break;
-
 	case SIGMA_ACTION_DELAY:
-		ret = 0;
 		udelay(len);
 		len = 0;
 		break;
-
 	case SIGMA_ACTION_END:
-		return 1;
-
+		return 0;
 	default:
 		return -EINVAL;
 	}
 
-	/* when arrive here ret=0 or sent data */
-	ssfw->pos += sigma_action_size(sa, len);
-	return ssfw->pos == ssfw->fw->size;
+	return 1;
 }
 
 static int
 process_sigma_actions(struct i2c_client *client, struct sigma_firmware *ssfw)
 {
-	pr_debug("%s: processing %p\n", __func__, ssfw);
+	struct sigma_action *sa;
+	size_t size;
+	int ret;
+
+	while (ssfw->pos + sizeof(*sa) <= ssfw->fw->size) {
+		sa = (struct sigma_action *)(ssfw->fw->data + ssfw->pos);
+
+		size = sigma_action_size(sa);
+		ssfw->pos += size;
+		if (ssfw->pos > ssfw->fw->size || size == 0)
+			break;
+
+		ret = process_sigma_action(client, sa);
 
-	while (1) {
-		int ret = process_sigma_action(client, ssfw);
 		pr_debug("%s: action returned %i\n", __func__, ret);
-		if (ret == 1)
-			return 0;
-		else if (ret)
+
+		if (ret <= 0)
 			return ret;
 	}
+
+	if (ssfw->pos != ssfw->fw->size)
+		return -EINVAL;
+
+	return 0;
 }
 
 int process_sigma_firmware(struct i2c_client *client, const char *name)
@@ -89,7 +116,14 @@ int process_sigma_firmware(struct i2c_client *client, const char *name)
 
 	/* then verify the header */
 	ret = -EINVAL;
-	if (fw->size < sizeof(*ssfw_head))
+
+	/*
+	 * Reject too small or unreasonable large files. The upper limit has been
+	 * chosen a bit arbitrarily, but it should be enough for all practical
+	 * purposes and having the limit makes it easier to avoid integer
+	 * overflows later in the loading process.
+	 */
+	if (fw->size < sizeof(*ssfw_head) || fw->size >= 0x4000000)
 		goto done;
 
 	ssfw_head = (void *)fw->data;
diff --git a/include/linux/sigma.h b/include/linux/sigma.h
index e2accb3164d8..9a138c2946bb 100644
--- a/include/linux/sigma.h
+++ b/include/linux/sigma.h
@@ -50,11 +50,6 @@ static inline u32 sigma_action_len(struct sigma_action *sa)
 	return (sa->len_hi << 16) | sa->len;
 }
 
-static inline size_t sigma_action_size(struct sigma_action *sa, u32 payload_len)
-{
-	return sizeof(*sa) + payload_len + (payload_len % 2);
-}
-
 extern int process_sigma_firmware(struct i2c_client *client, const char *name);
 
 #endif
-- 
cgit v1.2.3


From bda63586bc5929e97288cdb371bb6456504867ed Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 28 Nov 2011 09:44:16 +0100
Subject: firmware: Sigma: Fix endianess issues

Currently the SigmaDSP firmware loader only works correctly on little-endian
systems. Fix this by using the proper endianess conversion functions.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
---
 drivers/firmware/sigma.c | 2 +-
 include/linux/sigma.h    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/sigma.c b/drivers/firmware/sigma.c
index 36265de0a9e8..1eedb6f7fdab 100644
--- a/drivers/firmware/sigma.c
+++ b/drivers/firmware/sigma.c
@@ -133,7 +133,7 @@ int process_sigma_firmware(struct i2c_client *client, const char *name)
 	crc = crc32(0, fw->data + sizeof(*ssfw_head),
 			fw->size - sizeof(*ssfw_head));
 	pr_debug("%s: crc=%x\n", __func__, crc);
-	if (crc != ssfw_head->crc)
+	if (crc != le32_to_cpu(ssfw_head->crc))
 		goto done;
 
 	ssfw.pos = sizeof(*ssfw_head);
diff --git a/include/linux/sigma.h b/include/linux/sigma.h
index 9a138c2946bb..d0de882c0d96 100644
--- a/include/linux/sigma.h
+++ b/include/linux/sigma.h
@@ -24,7 +24,7 @@ struct sigma_firmware {
 struct sigma_firmware_header {
 	unsigned char magic[7];
 	u8 version;
-	u32 crc;
+	__le32 crc;
 };
 
 enum {
@@ -40,14 +40,14 @@ enum {
 struct sigma_action {
 	u8 instr;
 	u8 len_hi;
-	u16 len;
-	u16 addr;
+	__le16 len;
+	__be16 addr;
 	unsigned char payload[];
 };
 
 static inline u32 sigma_action_len(struct sigma_action *sa)
 {
-	return (sa->len_hi << 16) | sa->len;
+	return (sa->len_hi << 16) | le16_to_cpu(sa->len);
 }
 
 extern int process_sigma_firmware(struct i2c_client *client, const char *name);
-- 
cgit v1.2.3


From 40216ce7aa88c2e70869723a0f5929fdbd4a91c5 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 28 Nov 2011 09:44:17 +0100
Subject: ASoC: Move SigmaDSP firmware loader to ASoC

It has been pointed out previously, that the firmware subsystem is not the right
place for the SigmaDSP firmware loader. Furthermore the SigmaDSP is currently
only used in audio products and we are aiming for better integration into the
ASoC framework in the future, with support for ALSA controls for firmware
parameters and support dynamic power management as well. So the natural choice
for the SigmaDSP firmware loader is the ASoC subsystem.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 MAINTAINERS                 |   1 +
 drivers/firmware/Kconfig    |  12 ----
 drivers/firmware/Makefile   |   1 -
 drivers/firmware/sigma.c    | 153 -------------------------------------------
 include/linux/sigma.h       |  55 ----------------
 sound/soc/codecs/Kconfig    |   6 +-
 sound/soc/codecs/Makefile   |   2 +
 sound/soc/codecs/adau1701.c |   2 +-
 sound/soc/codecs/sigmadsp.c | 154 ++++++++++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/sigmadsp.h |  55 ++++++++++++++++
 10 files changed, 218 insertions(+), 223 deletions(-)
 delete mode 100644 drivers/firmware/sigma.c
 delete mode 100644 include/linux/sigma.h
 create mode 100644 sound/soc/codecs/sigmadsp.c
 create mode 100644 sound/soc/codecs/sigmadsp.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index fd7e441b5ea7..6a93a930ec66 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -542,6 +542,7 @@ F:	sound/soc/codecs/adau*
 F:	sound/soc/codecs/adav*
 F:	sound/soc/codecs/ad1*
 F:	sound/soc/codecs/ssm*
+F:	sound/soc/codecs/sigmadsp.*
 
 ANALOG DEVICES INC ASOC DRIVERS
 L:	uclinux-dist-devel@blackfin.uclinux.org
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index efba163595db..9b00072a020f 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -145,18 +145,6 @@ config ISCSI_IBFT
 	  detect iSCSI boot parameters dynamically during system boot, say Y.
 	  Otherwise, say N.
 
-config SIGMA
-	tristate "SigmaStudio firmware loader"
-	depends on I2C
-	select CRC32
-	default n
-	help
-	  Enable helper functions for working with Analog Devices SigmaDSP
-	  parts and binary firmwares produced by Analog Devices SigmaStudio.
-
-	  If unsure, say N here.  Drivers that need these helpers will select
-	  this option automatically.
-
 source "drivers/firmware/google/Kconfig"
 
 endmenu
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 47338c979126..5a7e27399729 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -12,6 +12,5 @@ obj-$(CONFIG_DMIID)		+= dmi-id.o
 obj-$(CONFIG_ISCSI_IBFT_FIND)	+= iscsi_ibft_find.o
 obj-$(CONFIG_ISCSI_IBFT)	+= iscsi_ibft.o
 obj-$(CONFIG_FIRMWARE_MEMMAP)	+= memmap.o
-obj-$(CONFIG_SIGMA)		+= sigma.o
 
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
diff --git a/drivers/firmware/sigma.c b/drivers/firmware/sigma.c
deleted file mode 100644
index 1eedb6f7fdab..000000000000
--- a/drivers/firmware/sigma.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Load Analog Devices SigmaStudio firmware files
- *
- * Copyright 2009-2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/crc32.h>
-#include <linux/delay.h>
-#include <linux/firmware.h>
-#include <linux/kernel.h>
-#include <linux/i2c.h>
-#include <linux/module.h>
-#include <linux/sigma.h>
-
-static size_t sigma_action_size(struct sigma_action *sa)
-{
-	size_t payload = 0;
-
-	switch (sa->instr) {
-	case SIGMA_ACTION_WRITEXBYTES:
-	case SIGMA_ACTION_WRITESINGLE:
-	case SIGMA_ACTION_WRITESAFELOAD:
-		payload = sigma_action_len(sa);
-		break;
-	default:
-		break;
-	}
-
-	payload = ALIGN(payload, 2);
-
-	return payload + sizeof(struct sigma_action);
-}
-
-/*
- * Returns a negative error value in case of an error, 0 if processing of
- * the firmware should be stopped after this action, 1 otherwise.
- */
-static int
-process_sigma_action(struct i2c_client *client, struct sigma_action *sa)
-{
-	size_t len = sigma_action_len(sa);
-	int ret;
-
-	pr_debug("%s: instr:%i addr:%#x len:%zu\n", __func__,
-		sa->instr, sa->addr, len);
-
-	switch (sa->instr) {
-	case SIGMA_ACTION_WRITEXBYTES:
-	case SIGMA_ACTION_WRITESINGLE:
-	case SIGMA_ACTION_WRITESAFELOAD:
-		ret = i2c_master_send(client, (void *)&sa->addr, len);
-		if (ret < 0)
-			return -EINVAL;
-		break;
-	case SIGMA_ACTION_DELAY:
-		udelay(len);
-		len = 0;
-		break;
-	case SIGMA_ACTION_END:
-		return 0;
-	default:
-		return -EINVAL;
-	}
-
-	return 1;
-}
-
-static int
-process_sigma_actions(struct i2c_client *client, struct sigma_firmware *ssfw)
-{
-	struct sigma_action *sa;
-	size_t size;
-	int ret;
-
-	while (ssfw->pos + sizeof(*sa) <= ssfw->fw->size) {
-		sa = (struct sigma_action *)(ssfw->fw->data + ssfw->pos);
-
-		size = sigma_action_size(sa);
-		ssfw->pos += size;
-		if (ssfw->pos > ssfw->fw->size || size == 0)
-			break;
-
-		ret = process_sigma_action(client, sa);
-
-		pr_debug("%s: action returned %i\n", __func__, ret);
-
-		if (ret <= 0)
-			return ret;
-	}
-
-	if (ssfw->pos != ssfw->fw->size)
-		return -EINVAL;
-
-	return 0;
-}
-
-int process_sigma_firmware(struct i2c_client *client, const char *name)
-{
-	int ret;
-	struct sigma_firmware_header *ssfw_head;
-	struct sigma_firmware ssfw;
-	const struct firmware *fw;
-	u32 crc;
-
-	pr_debug("%s: loading firmware %s\n", __func__, name);
-
-	/* first load the blob */
-	ret = request_firmware(&fw, name, &client->dev);
-	if (ret) {
-		pr_debug("%s: request_firmware() failed with %i\n", __func__, ret);
-		return ret;
-	}
-	ssfw.fw = fw;
-
-	/* then verify the header */
-	ret = -EINVAL;
-
-	/*
-	 * Reject too small or unreasonable large files. The upper limit has been
-	 * chosen a bit arbitrarily, but it should be enough for all practical
-	 * purposes and having the limit makes it easier to avoid integer
-	 * overflows later in the loading process.
-	 */
-	if (fw->size < sizeof(*ssfw_head) || fw->size >= 0x4000000)
-		goto done;
-
-	ssfw_head = (void *)fw->data;
-	if (memcmp(ssfw_head->magic, SIGMA_MAGIC, ARRAY_SIZE(ssfw_head->magic)))
-		goto done;
-
-	crc = crc32(0, fw->data + sizeof(*ssfw_head),
-			fw->size - sizeof(*ssfw_head));
-	pr_debug("%s: crc=%x\n", __func__, crc);
-	if (crc != le32_to_cpu(ssfw_head->crc))
-		goto done;
-
-	ssfw.pos = sizeof(*ssfw_head);
-
-	/* finally process all of the actions */
-	ret = process_sigma_actions(client, &ssfw);
-
- done:
-	release_firmware(fw);
-
-	pr_debug("%s: loaded %s\n", __func__, name);
-
-	return ret;
-}
-EXPORT_SYMBOL(process_sigma_firmware);
-
-MODULE_LICENSE("GPL");
diff --git a/include/linux/sigma.h b/include/linux/sigma.h
deleted file mode 100644
index d0de882c0d96..000000000000
--- a/include/linux/sigma.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Load firmware files from Analog Devices SigmaStudio
- *
- * Copyright 2009-2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#ifndef __SIGMA_FIRMWARE_H__
-#define __SIGMA_FIRMWARE_H__
-
-#include <linux/firmware.h>
-#include <linux/types.h>
-
-struct i2c_client;
-
-#define SIGMA_MAGIC "ADISIGM"
-
-struct sigma_firmware {
-	const struct firmware *fw;
-	size_t pos;
-};
-
-struct sigma_firmware_header {
-	unsigned char magic[7];
-	u8 version;
-	__le32 crc;
-};
-
-enum {
-	SIGMA_ACTION_WRITEXBYTES = 0,
-	SIGMA_ACTION_WRITESINGLE,
-	SIGMA_ACTION_WRITESAFELOAD,
-	SIGMA_ACTION_DELAY,
-	SIGMA_ACTION_PLLWAIT,
-	SIGMA_ACTION_NOOP,
-	SIGMA_ACTION_END,
-};
-
-struct sigma_action {
-	u8 instr;
-	u8 len_hi;
-	__le16 len;
-	__be16 addr;
-	unsigned char payload[];
-};
-
-static inline u32 sigma_action_len(struct sigma_action *sa)
-{
-	return (sa->len_hi << 16) | le16_to_cpu(sa->len);
-}
-
-extern int process_sigma_firmware(struct i2c_client *client, const char *name);
-
-#endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 686f45a07f34..593174c78d7b 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -141,7 +141,7 @@ config SND_SOC_AD73311
 	tristate
 
 config SND_SOC_ADAU1701
-	select SIGMA
+	select SND_SOC_SIGMADSP
 	tristate
 
 config SND_SOC_ADAU1373
@@ -234,6 +234,10 @@ config SND_SOC_RT5631
 config SND_SOC_SGTL5000
 	tristate
 
+config SND_SOC_SIGMADSP
+	tristate
+	select CRC32
+
 config SND_SOC_SN95031
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 62b01e4e7983..fa15006fcac5 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -33,6 +33,7 @@ snd-soc-rt5631-objs := rt5631.o
 snd-soc-sgtl5000-objs := sgtl5000.o
 snd-soc-alc5623-objs := alc5623.o
 snd-soc-alc5632-objs := alc5632.o
+snd-soc-sigmadsp-objs := sigmadsp.o
 snd-soc-sn95031-objs := sn95031.o
 snd-soc-spdif-objs := spdif_transciever.o
 snd-soc-ssm2602-objs := ssm2602.o
@@ -134,6 +135,7 @@ obj-$(CONFIG_SND_SOC_MAX9850)	+= snd-soc-max9850.o
 obj-$(CONFIG_SND_SOC_PCM3008)	+= snd-soc-pcm3008.o
 obj-$(CONFIG_SND_SOC_RT5631)	+= snd-soc-rt5631.o
 obj-$(CONFIG_SND_SOC_SGTL5000)  += snd-soc-sgtl5000.o
+obj-$(CONFIG_SND_SOC_SIGMADSP)	+= snd-soc-sigmadsp.o
 obj-$(CONFIG_SND_SOC_SN95031)	+=snd-soc-sn95031.o
 obj-$(CONFIG_SND_SOC_SPDIF)	+= snd-soc-spdif.o
 obj-$(CONFIG_SND_SOC_SSM2602)	+= snd-soc-ssm2602.o
diff --git a/sound/soc/codecs/adau1701.c b/sound/soc/codecs/adau1701.c
index 8b7e1c50d6e9..6a6af567f02a 100644
--- a/sound/soc/codecs/adau1701.c
+++ b/sound/soc/codecs/adau1701.c
@@ -12,13 +12,13 @@
 #include <linux/init.h>
 #include <linux/i2c.h>
 #include <linux/delay.h>
-#include <linux/sigma.h>
 #include <linux/slab.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
 #include <sound/soc.h>
 
+#include "sigmadsp.h"
 #include "adau1701.h"
 
 #define ADAU1701_DSPCTRL	0x1c
diff --git a/sound/soc/codecs/sigmadsp.c b/sound/soc/codecs/sigmadsp.c
new file mode 100644
index 000000000000..acb97a9834aa
--- /dev/null
+++ b/sound/soc/codecs/sigmadsp.c
@@ -0,0 +1,154 @@
+/*
+ * Load Analog Devices SigmaStudio firmware files
+ *
+ * Copyright 2009-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/kernel.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+
+#include "sigmadsp.h"
+
+static size_t sigma_action_size(struct sigma_action *sa)
+{
+	size_t payload = 0;
+
+	switch (sa->instr) {
+	case SIGMA_ACTION_WRITEXBYTES:
+	case SIGMA_ACTION_WRITESINGLE:
+	case SIGMA_ACTION_WRITESAFELOAD:
+		payload = sigma_action_len(sa);
+		break;
+	default:
+		break;
+	}
+
+	payload = ALIGN(payload, 2);
+
+	return payload + sizeof(struct sigma_action);
+}
+
+/*
+ * Returns a negative error value in case of an error, 0 if processing of
+ * the firmware should be stopped after this action, 1 otherwise.
+ */
+static int
+process_sigma_action(struct i2c_client *client, struct sigma_action *sa)
+{
+	size_t len = sigma_action_len(sa);
+	int ret;
+
+	pr_debug("%s: instr:%i addr:%#x len:%zu\n", __func__,
+		sa->instr, sa->addr, len);
+
+	switch (sa->instr) {
+	case SIGMA_ACTION_WRITEXBYTES:
+	case SIGMA_ACTION_WRITESINGLE:
+	case SIGMA_ACTION_WRITESAFELOAD:
+		ret = i2c_master_send(client, (void *)&sa->addr, len);
+		if (ret < 0)
+			return -EINVAL;
+		break;
+	case SIGMA_ACTION_DELAY:
+		udelay(len);
+		len = 0;
+		break;
+	case SIGMA_ACTION_END:
+		return 0;
+	default:
+		return -EINVAL;
+	}
+
+	return 1;
+}
+
+static int
+process_sigma_actions(struct i2c_client *client, struct sigma_firmware *ssfw)
+{
+	struct sigma_action *sa;
+	size_t size;
+	int ret;
+
+	while (ssfw->pos + sizeof(*sa) <= ssfw->fw->size) {
+		sa = (struct sigma_action *)(ssfw->fw->data + ssfw->pos);
+
+		size = sigma_action_size(sa);
+		ssfw->pos += size;
+		if (ssfw->pos > ssfw->fw->size || size == 0)
+			break;
+
+		ret = process_sigma_action(client, sa);
+
+		pr_debug("%s: action returned %i\n", __func__, ret);
+
+		if (ret <= 0)
+			return ret;
+	}
+
+	if (ssfw->pos != ssfw->fw->size)
+		return -EINVAL;
+
+	return 0;
+}
+
+int process_sigma_firmware(struct i2c_client *client, const char *name)
+{
+	int ret;
+	struct sigma_firmware_header *ssfw_head;
+	struct sigma_firmware ssfw;
+	const struct firmware *fw;
+	u32 crc;
+
+	pr_debug("%s: loading firmware %s\n", __func__, name);
+
+	/* first load the blob */
+	ret = request_firmware(&fw, name, &client->dev);
+	if (ret) {
+		pr_debug("%s: request_firmware() failed with %i\n", __func__, ret);
+		return ret;
+	}
+	ssfw.fw = fw;
+
+	/* then verify the header */
+	ret = -EINVAL;
+
+	/*
+	 * Reject too small or unreasonable large files. The upper limit has been
+	 * chosen a bit arbitrarily, but it should be enough for all practical
+	 * purposes and having the limit makes it easier to avoid integer
+	 * overflows later in the loading process.
+	 */
+	if (fw->size < sizeof(*ssfw_head) || fw->size >= 0x4000000)
+		goto done;
+
+	ssfw_head = (void *)fw->data;
+	if (memcmp(ssfw_head->magic, SIGMA_MAGIC, ARRAY_SIZE(ssfw_head->magic)))
+		goto done;
+
+	crc = crc32(0, fw->data + sizeof(*ssfw_head),
+			fw->size - sizeof(*ssfw_head));
+	pr_debug("%s: crc=%x\n", __func__, crc);
+	if (crc != le32_to_cpu(ssfw_head->crc))
+		goto done;
+
+	ssfw.pos = sizeof(*ssfw_head);
+
+	/* finally process all of the actions */
+	ret = process_sigma_actions(client, &ssfw);
+
+ done:
+	release_firmware(fw);
+
+	pr_debug("%s: loaded %s\n", __func__, name);
+
+	return ret;
+}
+EXPORT_SYMBOL(process_sigma_firmware);
+
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/sigmadsp.h b/sound/soc/codecs/sigmadsp.h
new file mode 100644
index 000000000000..d0de882c0d96
--- /dev/null
+++ b/sound/soc/codecs/sigmadsp.h
@@ -0,0 +1,55 @@
+/*
+ * Load firmware files from Analog Devices SigmaStudio
+ *
+ * Copyright 2009-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __SIGMA_FIRMWARE_H__
+#define __SIGMA_FIRMWARE_H__
+
+#include <linux/firmware.h>
+#include <linux/types.h>
+
+struct i2c_client;
+
+#define SIGMA_MAGIC "ADISIGM"
+
+struct sigma_firmware {
+	const struct firmware *fw;
+	size_t pos;
+};
+
+struct sigma_firmware_header {
+	unsigned char magic[7];
+	u8 version;
+	__le32 crc;
+};
+
+enum {
+	SIGMA_ACTION_WRITEXBYTES = 0,
+	SIGMA_ACTION_WRITESINGLE,
+	SIGMA_ACTION_WRITESAFELOAD,
+	SIGMA_ACTION_DELAY,
+	SIGMA_ACTION_PLLWAIT,
+	SIGMA_ACTION_NOOP,
+	SIGMA_ACTION_END,
+};
+
+struct sigma_action {
+	u8 instr;
+	u8 len_hi;
+	__le16 len;
+	__be16 addr;
+	unsigned char payload[];
+};
+
+static inline u32 sigma_action_len(struct sigma_action *sa)
+{
+	return (sa->len_hi << 16) | le16_to_cpu(sa->len);
+}
+
+extern int process_sigma_firmware(struct i2c_client *client, const char *name);
+
+#endif
-- 
cgit v1.2.3


From 63b4c2967850033de0a488d2ba7cd09052199d99 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Fri, 25 Nov 2011 01:51:06 +0800
Subject: ARM: at91/boards: use -EINVAL for invalid gpio

this will allow to use gpio_is_valid

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/mach-at91/board-1arm.c         |  2 ++
 arch/arm/mach-at91/board-afeb-9260v1.c  |  8 +++++++-
 arch/arm/mach-at91/board-cam60.c        |  4 +++-
 arch/arm/mach-at91/board-cap9adk.c      | 15 +++++++++------
 arch/arm/mach-at91/board-carmeva.c      |  7 +++++--
 arch/arm/mach-at91/board-cpu9krea.c     |  8 +++++++-
 arch/arm/mach-at91/board-cpuat91.c      |  5 +++++
 arch/arm/mach-at91/board-csb337.c       |  5 ++++-
 arch/arm/mach-at91/board-csb637.c       |  2 ++
 arch/arm/mach-at91/board-dt.c           |  1 +
 arch/arm/mach-at91/board-eb9200.c       |  9 +++++++--
 arch/arm/mach-at91/board-ecbat91.c      |  5 +++++
 arch/arm/mach-at91/board-eco920.c       |  5 +++++
 arch/arm/mach-at91/board-flexibity.c    |  5 ++++-
 arch/arm/mach-at91/board-foxg20.c       |  7 ++++++-
 arch/arm/mach-at91/board-gsia18s.c      |  5 ++++-
 arch/arm/mach-at91/board-kafa.c         |  2 ++
 arch/arm/mach-at91/board-kb9202.c       |  6 +++++-
 arch/arm/mach-at91/board-neocore926.c   |  5 ++++-
 arch/arm/mach-at91/board-pcontrol-g20.c |  2 ++
 arch/arm/mach-at91/board-picotux200.c   |  3 +++
 arch/arm/mach-at91/board-qil-a9260.c    | 12 +++++++-----
 arch/arm/mach-at91/board-rm9200dk.c     | 11 ++++++++---
 arch/arm/mach-at91/board-rm9200ek.c     |  3 +++
 arch/arm/mach-at91/board-rsi-ews.c      |  2 ++
 arch/arm/mach-at91/board-sam9-l9260.c   |  8 +++++---
 arch/arm/mach-at91/board-sam9260ek.c    | 12 +++++++-----
 arch/arm/mach-at91/board-sam9261ek.c    |  9 +++++++--
 arch/arm/mach-at91/board-sam9263ek.c    |  8 +++++---
 arch/arm/mach-at91/board-sam9g20ek.c    |  9 ++++++++-
 arch/arm/mach-at91/board-sam9m10g45ek.c |  4 ++++
 arch/arm/mach-at91/board-sam9rlek.c     |  7 ++++---
 arch/arm/mach-at91/board-snapper9260.c  |  6 ++++++
 arch/arm/mach-at91/board-stamp9g20.c    | 12 ++++++++++--
 arch/arm/mach-at91/board-usb-a926x.c    |  6 ++++--
 arch/arm/mach-at91/board-yl-9200.c      |  7 +++++--
 arch/arm/mach-at91/include/mach/board.h | 28 ++++++++++++++--------------
 include/linux/platform_data/macb.h      |  2 +-
 38 files changed, 192 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/board-1arm.c b/arch/arm/mach-at91/board-1arm.c
index a60d98d7c3e2..2628384aaae1 100644
--- a/arch/arm/mach-at91/board-1arm.c
+++ b/arch/arm/mach-at91/board-1arm.c
@@ -70,6 +70,8 @@ static struct macb_platform_data __initdata onearm_eth_data = {
 
 static struct at91_usbh_data __initdata onearm_usbh_data = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata onearm_udc_data = {
diff --git a/arch/arm/mach-at91/board-afeb-9260v1.c b/arch/arm/mach-at91/board-afeb-9260v1.c
index 17fc77925707..3bb40694b02d 100644
--- a/arch/arm/mach-at91/board-afeb-9260v1.c
+++ b/arch/arm/mach-at91/board-afeb-9260v1.c
@@ -75,6 +75,8 @@ static void __init afeb9260_init_early(void)
  */
 static struct at91_usbh_data __initdata afeb9260_usbh_data = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -82,7 +84,7 @@ static struct at91_usbh_data __initdata afeb9260_usbh_data = {
  */
 static struct at91_udc_data __initdata afeb9260_udc_data = {
 	.vbus_pin	= AT91_PIN_PC5,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 
@@ -138,6 +140,7 @@ static struct atmel_nand_data __initdata afeb9260_nand_data = {
 	.bus_width_16	= 0,
 	.parts		= afeb9260_nand_partition,
 	.num_parts	= ARRAY_SIZE(afeb9260_nand_partition),
+	.det_pin	= -EINVAL,
 };
 
 
@@ -149,6 +152,7 @@ static struct at91_mmc_data __initdata afeb9260_mmc_data = {
 	.wp_pin 	= AT91_PIN_PC4,
 	.slot_b		= 1,
 	.wire4		= 1,
+	.vcc_pin	= -EINVAL,
 };
 
 
@@ -169,6 +173,8 @@ static struct i2c_board_info __initdata afeb9260_i2c_devices[] = {
 static struct at91_cf_data afeb9260_cf_data = {
 	.chipselect = 4,
 	.irq_pin    = AT91_PIN_PA6,
+	.det_pin	= -EINVAL,
+	.vcc_pin	= -EINVAL,
 	.rst_pin    = AT91_PIN_PA7,
 	.flags      = AT91_CF_TRUE_IDE,
 };
diff --git a/arch/arm/mach-at91/board-cam60.c b/arch/arm/mach-at91/board-cam60.c
index 18c90c454079..8510e9e54988 100644
--- a/arch/arm/mach-at91/board-cam60.c
+++ b/arch/arm/mach-at91/board-cam60.c
@@ -62,6 +62,8 @@ static void __init cam60_init_early(void)
  */
 static struct at91_usbh_data __initdata cam60_usbh_data = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 
@@ -135,7 +137,7 @@ static struct mtd_partition __initdata cam60_nand_partition[] = {
 static struct atmel_nand_data __initdata cam60_nand_data = {
 	.ale		= 21,
 	.cle		= 22,
-	// .det_pin	= ... not there
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PA9,
 	.enable_pin	= AT91_PIN_PA7,
 	.parts		= cam60_nand_partition,
diff --git a/arch/arm/mach-at91/board-cap9adk.c b/arch/arm/mach-at91/board-cap9adk.c
index 14d1d0556638..ac3de4f7c31d 100644
--- a/arch/arm/mach-at91/board-cap9adk.c
+++ b/arch/arm/mach-at91/board-cap9adk.c
@@ -70,6 +70,8 @@ static void __init cap9adk_init_early(void)
  */
 static struct at91_usbh_data __initdata cap9adk_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -144,9 +146,9 @@ static struct spi_board_info cap9adk_spi_devices[] = {
  */
 static struct at91_mmc_data __initdata cap9adk_mmc_data = {
 	.wire4		= 1,
-//	.det_pin	= ... not connected
-//	.wp_pin		= ... not connected
-//	.vcc_pin	= ... not connected
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 
@@ -154,6 +156,7 @@ static struct at91_mmc_data __initdata cap9adk_mmc_data = {
  * MACB Ethernet device
  */
 static struct macb_platform_data __initdata cap9adk_macb_data = {
+	.phy_irq_pin	= -EINVAL,
 	.is_rmii	= 1,
 };
 
@@ -172,8 +175,8 @@ static struct mtd_partition __initdata cap9adk_nand_partitions[] = {
 static struct atmel_nand_data __initdata cap9adk_nand_data = {
 	.ale		= 21,
 	.cle		= 22,
-//	.det_pin	= ... not connected
-//	.rdy_pin	= ... not connected
+	.det_pin	= -EINVAL,
+	.rdy_pin	= -EINVAL,
 	.enable_pin	= AT91_PIN_PD15,
 	.parts		= cap9adk_nand_partitions,
 	.num_parts	= ARRAY_SIZE(cap9adk_nand_partitions),
@@ -351,7 +354,7 @@ static struct atmel_lcdfb_info __initdata cap9adk_lcdc_data;
  * AC97
  */
 static struct ac97c_platform_data cap9adk_ac97_data = {
-//	.reset_pin	= ... not connected
+	.reset_pin	= -EINVAL,
 };
 
 
diff --git a/arch/arm/mach-at91/board-carmeva.c b/arch/arm/mach-at91/board-carmeva.c
index 529b356cdb7d..59d9cf997537 100644
--- a/arch/arm/mach-at91/board-carmeva.c
+++ b/arch/arm/mach-at91/board-carmeva.c
@@ -64,6 +64,8 @@ static struct macb_platform_data __initdata carmeva_eth_data = {
 
 static struct at91_usbh_data __initdata carmeva_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata carmeva_udc_data = {
@@ -75,8 +77,8 @@ static struct at91_udc_data __initdata carmeva_udc_data = {
 // static struct at91_cf_data __initdata carmeva_cf_data = {
 //	.det_pin	= AT91_PIN_PB0,
 //	.rst_pin	= AT91_PIN_PC5,
-	// .irq_pin	= ... not connected
-	// .vcc_pin	= ... always powered
+	// .irq_pin	= -EINVAL,
+	// .vcc_pin	= -EINVAL,
 // };
 
 static struct at91_mmc_data __initdata carmeva_mmc_data = {
@@ -84,6 +86,7 @@ static struct at91_mmc_data __initdata carmeva_mmc_data = {
 	.wire4		= 1,
 	.det_pin	= AT91_PIN_PB10,
 	.wp_pin		= AT91_PIN_PC14,
+	.vcc_pin	= -EINVAL,
 };
 
 static struct spi_board_info carmeva_spi_devices[] = {
diff --git a/arch/arm/mach-at91/board-cpu9krea.c b/arch/arm/mach-at91/board-cpu9krea.c
index 5b55525e2966..9ab3d1ea326d 100644
--- a/arch/arm/mach-at91/board-cpu9krea.c
+++ b/arch/arm/mach-at91/board-cpu9krea.c
@@ -86,6 +86,8 @@ static void __init cpu9krea_init_early(void)
  */
 static struct at91_usbh_data __initdata cpu9krea_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -93,13 +95,14 @@ static struct at91_usbh_data __initdata cpu9krea_usbh_data = {
  */
 static struct at91_udc_data __initdata cpu9krea_udc_data = {
 	.vbus_pin	= AT91_PIN_PC8,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 /*
  * MACB Ethernet device
  */
 static struct macb_platform_data __initdata cpu9krea_macb_data = {
+	.phy_irq_pin	= -EINVAL,
 	.is_rmii	= 1,
 };
 
@@ -112,6 +115,7 @@ static struct atmel_nand_data __initdata cpu9krea_nand_data = {
 	.rdy_pin	= AT91_PIN_PC13,
 	.enable_pin	= AT91_PIN_PC14,
 	.bus_width_16	= 0,
+	.det_pin	= -EINVAL,
 };
 
 #ifdef CONFIG_MACH_CPU9260
@@ -337,6 +341,8 @@ static struct at91_mmc_data __initdata cpu9krea_mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 1,
 	.det_pin	= AT91_PIN_PA29,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 static void __init cpu9krea_board_init(void)
diff --git a/arch/arm/mach-at91/board-cpuat91.c b/arch/arm/mach-at91/board-cpuat91.c
index 7a4c82e8da51..368e1427ad99 100644
--- a/arch/arm/mach-at91/board-cpuat91.c
+++ b/arch/arm/mach-at91/board-cpuat91.c
@@ -83,11 +83,14 @@ static void __init cpuat91_init_early(void)
 }
 
 static struct macb_platform_data __initdata cpuat91_eth_data = {
+	.phy_irq_pin	= -EINVAL,
 	.is_rmii	= 1,
 };
 
 static struct at91_usbh_data __initdata cpuat91_usbh_data = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata cpuat91_udc_data = {
@@ -98,6 +101,8 @@ static struct at91_udc_data __initdata cpuat91_udc_data = {
 static struct at91_mmc_data __initdata cpuat91_mmc_data = {
 	.det_pin	= AT91_PIN_PC2,
 	.wire4		= 1,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 static struct physmap_flash_data cpuat91_flash_data = {
diff --git a/arch/arm/mach-at91/board-csb337.c b/arch/arm/mach-at91/board-csb337.c
index b004b20b8e42..1a1547b1ce4e 100644
--- a/arch/arm/mach-at91/board-csb337.c
+++ b/arch/arm/mach-at91/board-csb337.c
@@ -65,11 +65,13 @@ static struct macb_platform_data __initdata csb337_eth_data = {
 
 static struct at91_usbh_data __initdata csb337_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata csb337_udc_data = {
-	// this has no VBUS sensing pin
 	.pullup_pin	= AT91_PIN_PA24,
+	.vbus_pin	= -EINVAL,
 };
 
 static struct i2c_board_info __initdata csb337_i2c_devices[] = {
@@ -98,6 +100,7 @@ static struct at91_mmc_data __initdata csb337_mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 1,
 	.wp_pin		= AT91_PIN_PD6,
+	.vcc_pin	= -EINVAL,
 };
 
 static struct spi_board_info csb337_spi_devices[] = {
diff --git a/arch/arm/mach-at91/board-csb637.c b/arch/arm/mach-at91/board-csb637.c
index e966de5219c7..f650bf39455d 100644
--- a/arch/arm/mach-at91/board-csb637.c
+++ b/arch/arm/mach-at91/board-csb637.c
@@ -59,6 +59,8 @@ static struct macb_platform_data __initdata csb637_eth_data = {
 
 static struct at91_usbh_data __initdata csb637_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata csb637_udc_data = {
diff --git a/arch/arm/mach-at91/board-dt.c b/arch/arm/mach-at91/board-dt.c
index 41d84d9a2316..bb6b434ec0c1 100644
--- a/arch/arm/mach-at91/board-dt.c
+++ b/arch/arm/mach-at91/board-dt.c
@@ -50,6 +50,7 @@ static void __init ek_init_early(void)
 static struct atmel_nand_data __initdata ek_nand_data = {
 	.ale		= 21,
 	.cle		= 22,
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PC8,
 	.enable_pin	= AT91_PIN_PC14,
 };
diff --git a/arch/arm/mach-at91/board-eb9200.c b/arch/arm/mach-at91/board-eb9200.c
index 3788fa527121..d302ca3eeb64 100644
--- a/arch/arm/mach-at91/board-eb9200.c
+++ b/arch/arm/mach-at91/board-eb9200.c
@@ -67,6 +67,8 @@ static struct macb_platform_data __initdata eb9200_eth_data = {
 
 static struct at91_usbh_data __initdata eb9200_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata eb9200_udc_data = {
@@ -75,15 +77,18 @@ static struct at91_udc_data __initdata eb9200_udc_data = {
 };
 
 static struct at91_cf_data __initdata eb9200_cf_data = {
+	.irq_pin	= -EINVAL,
 	.det_pin	= AT91_PIN_PB0,
+	.vcc_pin	= -EINVAL,
 	.rst_pin	= AT91_PIN_PC5,
-	// .irq_pin	= ... not connected
-	// .vcc_pin	= ... always powered
 };
 
 static struct at91_mmc_data __initdata eb9200_mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 1,
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 static struct i2c_board_info __initdata eb9200_i2c_devices[] = {
diff --git a/arch/arm/mach-at91/board-ecbat91.c b/arch/arm/mach-at91/board-ecbat91.c
index af7622eae1a9..69966ce4d776 100644
--- a/arch/arm/mach-at91/board-ecbat91.c
+++ b/arch/arm/mach-at91/board-ecbat91.c
@@ -71,11 +71,16 @@ static struct macb_platform_data __initdata ecb_at91eth_data = {
 
 static struct at91_usbh_data __initdata ecb_at91usbh_data = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_mmc_data __initdata ecb_at91mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 1,
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 
diff --git a/arch/arm/mach-at91/board-eco920.c b/arch/arm/mach-at91/board-eco920.c
index 8e75867d1d18..07ef35b0ec2c 100644
--- a/arch/arm/mach-at91/board-eco920.c
+++ b/arch/arm/mach-at91/board-eco920.c
@@ -54,6 +54,8 @@ static struct macb_platform_data __initdata eco920_eth_data = {
 
 static struct at91_usbh_data __initdata eco920_usbh_data = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata eco920_udc_data = {
@@ -64,6 +66,9 @@ static struct at91_udc_data __initdata eco920_udc_data = {
 static struct at91_mmc_data __initdata eco920_mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 0,
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 static struct physmap_flash_data eco920_flash_data = {
diff --git a/arch/arm/mach-at91/board-flexibity.c b/arch/arm/mach-at91/board-flexibity.c
index 4c3f65d9c59b..eec02cd57ced 100644
--- a/arch/arm/mach-at91/board-flexibity.c
+++ b/arch/arm/mach-at91/board-flexibity.c
@@ -52,12 +52,14 @@ static void __init flexibity_init_early(void)
 /* USB Host port */
 static struct at91_usbh_data __initdata flexibity_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /* USB Device port */
 static struct at91_udc_data __initdata flexibity_udc_data = {
 	.vbus_pin	= AT91_PIN_PC5,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 /* SPI devices */
@@ -76,6 +78,7 @@ static struct at91_mmc_data __initdata flexibity_mmc_data = {
 	.wire4		= 1,
 	.det_pin	= AT91_PIN_PC9,
 	.wp_pin		= AT91_PIN_PC4,
+	.vcc_pin	= -EINVAL,
 };
 
 /* LEDs */
diff --git a/arch/arm/mach-at91/board-foxg20.c b/arch/arm/mach-at91/board-foxg20.c
index de8e09642f4e..caf017f0f4ee 100644
--- a/arch/arm/mach-at91/board-foxg20.c
+++ b/arch/arm/mach-at91/board-foxg20.c
@@ -106,6 +106,8 @@ static void __init foxg20_init_early(void)
  */
 static struct at91_usbh_data __initdata foxg20_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -113,7 +115,7 @@ static struct at91_usbh_data __initdata foxg20_usbh_data = {
  */
 static struct at91_udc_data __initdata foxg20_udc_data = {
 	.vbus_pin	= AT91_PIN_PC6,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 
@@ -147,6 +149,9 @@ static struct macb_platform_data __initdata foxg20_macb_data = {
 static struct at91_mmc_data __initdata foxg20_mmc_data = {
 	.slot_b		= 1,
 	.wire4		= 1,
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 
diff --git a/arch/arm/mach-at91/board-gsia18s.c b/arch/arm/mach-at91/board-gsia18s.c
index 51c82f151119..230e71969fb7 100644
--- a/arch/arm/mach-at91/board-gsia18s.c
+++ b/arch/arm/mach-at91/board-gsia18s.c
@@ -80,6 +80,8 @@ static void __init gsia18s_init_early(void)
  */
 static struct at91_usbh_data __initdata usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -87,7 +89,7 @@ static struct at91_usbh_data __initdata usbh_data = {
  */
 static struct at91_udc_data __initdata udc_data = {
 	.vbus_pin	= AT91_PIN_PA22,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 /*
@@ -530,6 +532,7 @@ static struct i2c_board_info __initdata gsia18s_i2c_devices[] = {
 static struct at91_cf_data __initdata gsia18s_cf1_data = {
 	.irq_pin	= AT91_PIN_PA27,
 	.det_pin	= AT91_PIN_PB30,
+	.vcc_pin	= -EINVAL,
 	.rst_pin	= AT91_PIN_PB31,
 	.chipselect	= 5,
 	.flags		= AT91_CF_TRUE_IDE,
diff --git a/arch/arm/mach-at91/board-kafa.c b/arch/arm/mach-at91/board-kafa.c
index 9628a3defcf4..efde1b2327c8 100644
--- a/arch/arm/mach-at91/board-kafa.c
+++ b/arch/arm/mach-at91/board-kafa.c
@@ -68,6 +68,8 @@ static struct macb_platform_data __initdata kafa_eth_data = {
 
 static struct at91_usbh_data __initdata kafa_usbh_data = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata kafa_udc_data = {
diff --git a/arch/arm/mach-at91/board-kb9202.c b/arch/arm/mach-at91/board-kb9202.c
index 5ba5244cb632..d75a4a2ad9c2 100644
--- a/arch/arm/mach-at91/board-kb9202.c
+++ b/arch/arm/mach-at91/board-kb9202.c
@@ -76,6 +76,8 @@ static struct macb_platform_data __initdata kb9202_eth_data = {
 
 static struct at91_usbh_data __initdata kb9202_usbh_data = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata kb9202_udc_data = {
@@ -87,6 +89,8 @@ static struct at91_mmc_data __initdata kb9202_mmc_data = {
 	.det_pin	= AT91_PIN_PB2,
 	.slot_b		= 0,
 	.wire4		= 1,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 static struct mtd_partition __initdata kb9202_nand_partition[] = {
@@ -100,7 +104,7 @@ static struct mtd_partition __initdata kb9202_nand_partition[] = {
 static struct atmel_nand_data __initdata kb9202_nand_data = {
 	.ale		= 22,
 	.cle		= 21,
-	// .det_pin	= ... not there
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PC29,
 	.enable_pin	= AT91_PIN_PC28,
 	.parts		= kb9202_nand_partition,
diff --git a/arch/arm/mach-at91/board-neocore926.c b/arch/arm/mach-at91/board-neocore926.c
index 7d24d84f5d50..3f8617c0e04e 100644
--- a/arch/arm/mach-at91/board-neocore926.c
+++ b/arch/arm/mach-at91/board-neocore926.c
@@ -72,6 +72,7 @@ static void __init neocore926_init_early(void)
 static struct at91_usbh_data __initdata neocore926_usbh_data = {
 	.ports		= 2,
 	.vbus_pin	= { AT91_PIN_PA24, AT91_PIN_PA21 },
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -79,7 +80,7 @@ static struct at91_usbh_data __initdata neocore926_usbh_data = {
  */
 static struct at91_udc_data __initdata neocore926_udc_data = {
 	.vbus_pin	= AT91_PIN_PA25,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 
@@ -149,6 +150,7 @@ static struct at91_mmc_data __initdata neocore926_mmc_data = {
 	.wire4		= 1,
 	.det_pin	= AT91_PIN_PE18,
 	.wp_pin		= AT91_PIN_PE19,
+	.vcc_pin	= -EINVAL,
 };
 
 
@@ -190,6 +192,7 @@ static struct atmel_nand_data __initdata neocore926_nand_data = {
 	.enable_pin		= AT91_PIN_PD15,
 	.parts			= neocore926_nand_partition,
 	.num_parts		= ARRAY_SIZE(neocore926_nand_partition),
+	.det_pin		= -EINVAL,
 };
 
 static struct sam9_smc_config __initdata neocore926_nand_smc_config = {
diff --git a/arch/arm/mach-at91/board-pcontrol-g20.c b/arch/arm/mach-at91/board-pcontrol-g20.c
index 9a6e02163950..b4a12fc184c8 100644
--- a/arch/arm/mach-at91/board-pcontrol-g20.c
+++ b/arch/arm/mach-at91/board-pcontrol-g20.c
@@ -107,6 +107,8 @@ static void __init add_device_pcontrol(void)
  */
 static struct at91_usbh_data __initdata usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 
diff --git a/arch/arm/mach-at91/board-picotux200.c b/arch/arm/mach-at91/board-picotux200.c
index dc18759a24b4..ab024fa11d5c 100644
--- a/arch/arm/mach-at91/board-picotux200.c
+++ b/arch/arm/mach-at91/board-picotux200.c
@@ -67,6 +67,8 @@ static struct macb_platform_data __initdata picotux200_eth_data = {
 
 static struct at91_usbh_data __initdata picotux200_usbh_data = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_mmc_data __initdata picotux200_mmc_data = {
@@ -74,6 +76,7 @@ static struct at91_mmc_data __initdata picotux200_mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 1,
 	.wp_pin		= AT91_PIN_PA17,
+	.vcc_pin	= -EINVAL,
 };
 
 #define PICOTUX200_FLASH_BASE	AT91_CHIPSELECT_0
diff --git a/arch/arm/mach-at91/board-qil-a9260.c b/arch/arm/mach-at91/board-qil-a9260.c
index 04be42798128..e029d220cb84 100644
--- a/arch/arm/mach-at91/board-qil-a9260.c
+++ b/arch/arm/mach-at91/board-qil-a9260.c
@@ -77,6 +77,8 @@ static void __init ek_init_early(void)
  */
 static struct at91_usbh_data __initdata ek_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -84,7 +86,7 @@ static struct at91_usbh_data __initdata ek_usbh_data = {
  */
 static struct at91_udc_data __initdata ek_udc_data = {
 	.vbus_pin	= AT91_PIN_PC5,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 /*
@@ -133,7 +135,7 @@ static struct mtd_partition __initdata ek_nand_partition[] = {
 static struct atmel_nand_data __initdata ek_nand_data = {
 	.ale		= 21,
 	.cle		= 22,
-//	.det_pin	= ... not connected
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PC13,
 	.enable_pin	= AT91_PIN_PC14,
 	.parts		= ek_nand_partition,
@@ -172,9 +174,9 @@ static void __init ek_add_device_nand(void)
 static struct at91_mmc_data __initdata ek_mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 1,
-//	.det_pin	= ... not connected
-//	.wp_pin		= ... not connected
-//	.vcc_pin	= ... not connected
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 /*
diff --git a/arch/arm/mach-at91/board-rm9200dk.c b/arch/arm/mach-at91/board-rm9200dk.c
index 022d0cebda9d..782f37946af5 100644
--- a/arch/arm/mach-at91/board-rm9200dk.c
+++ b/arch/arm/mach-at91/board-rm9200dk.c
@@ -72,6 +72,8 @@ static struct macb_platform_data __initdata dk_eth_data = {
 
 static struct at91_usbh_data __initdata dk_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata dk_udc_data = {
@@ -80,16 +82,19 @@ static struct at91_udc_data __initdata dk_udc_data = {
 };
 
 static struct at91_cf_data __initdata dk_cf_data = {
+	.irq_pin	= -EINVAL,
 	.det_pin	= AT91_PIN_PB0,
+	.vcc_pin	= -EINVAL,
 	.rst_pin	= AT91_PIN_PC5,
-	// .irq_pin	= ... not connected
-	// .vcc_pin	= ... always powered
 };
 
 #ifndef CONFIG_MTD_AT91_DATAFLASH_CARD
 static struct at91_mmc_data __initdata dk_mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 1,
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 #endif
 
@@ -143,7 +148,7 @@ static struct atmel_nand_data __initdata dk_nand_data = {
 	.cle		= 21,
 	.det_pin	= AT91_PIN_PB1,
 	.rdy_pin	= AT91_PIN_PC2,
-	// .enable_pin	= ... not there
+	.enable_pin	= -EINVAL,
 	.parts		= dk_nand_partition,
 	.num_parts	= ARRAY_SIZE(dk_nand_partition),
 };
diff --git a/arch/arm/mach-at91/board-rm9200ek.c b/arch/arm/mach-at91/board-rm9200ek.c
index ed275861adef..ef7c12a92246 100644
--- a/arch/arm/mach-at91/board-rm9200ek.c
+++ b/arch/arm/mach-at91/board-rm9200ek.c
@@ -72,6 +72,8 @@ static struct macb_platform_data __initdata ek_eth_data = {
 
 static struct at91_usbh_data __initdata ek_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata ek_udc_data = {
@@ -85,6 +87,7 @@ static struct at91_mmc_data __initdata ek_mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 1,
 	.wp_pin		= AT91_PIN_PA17,
+	.vcc_pin	= -EINVAL,
 };
 #endif
 
diff --git a/arch/arm/mach-at91/board-rsi-ews.c b/arch/arm/mach-at91/board-rsi-ews.c
index ed3b21f77674..af0750fafa29 100644
--- a/arch/arm/mach-at91/board-rsi-ews.c
+++ b/arch/arm/mach-at91/board-rsi-ews.c
@@ -70,6 +70,8 @@ static struct macb_platform_data rsi_ews_eth_data __initdata = {
  */
 static struct at91_usbh_data rsi_ews_usbh_data __initdata = {
 	.ports		= 1,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
diff --git a/arch/arm/mach-at91/board-sam9-l9260.c b/arch/arm/mach-at91/board-sam9-l9260.c
index 1e1834b2b4c3..84bce587735f 100644
--- a/arch/arm/mach-at91/board-sam9-l9260.c
+++ b/arch/arm/mach-at91/board-sam9-l9260.c
@@ -72,6 +72,8 @@ static void __init ek_init_early(void)
  */
 static struct at91_usbh_data __initdata ek_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -79,7 +81,7 @@ static struct at91_usbh_data __initdata ek_usbh_data = {
  */
 static struct at91_udc_data __initdata ek_udc_data = {
 	.vbus_pin	= AT91_PIN_PC5,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 
@@ -134,7 +136,7 @@ static struct mtd_partition __initdata ek_nand_partition[] = {
 static struct atmel_nand_data __initdata ek_nand_data = {
 	.ale		= 21,
 	.cle		= 22,
-//	.det_pin	= ... not connected
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PC13,
 	.enable_pin	= AT91_PIN_PC14,
 	.parts		= ek_nand_partition,
@@ -176,7 +178,7 @@ static struct at91_mmc_data __initdata ek_mmc_data = {
 	.wire4		= 1,
 	.det_pin	= AT91_PIN_PC8,
 	.wp_pin		= AT91_PIN_PC4,
-//	.vcc_pin	= ... not connected
+	.vcc_pin	= -EINVAL,
 };
 
 static void __init ek_board_init(void)
diff --git a/arch/arm/mach-at91/board-sam9260ek.c b/arch/arm/mach-at91/board-sam9260ek.c
index e1b250ed0ca8..be8233bcabdc 100644
--- a/arch/arm/mach-at91/board-sam9260ek.c
+++ b/arch/arm/mach-at91/board-sam9260ek.c
@@ -75,6 +75,8 @@ static void __init ek_init_early(void)
  */
 static struct at91_usbh_data __initdata ek_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -82,7 +84,7 @@ static struct at91_usbh_data __initdata ek_usbh_data = {
  */
 static struct at91_udc_data __initdata ek_udc_data = {
 	.vbus_pin	= AT91_PIN_PC5,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 
@@ -176,7 +178,7 @@ static struct mtd_partition __initdata ek_nand_partition[] = {
 static struct atmel_nand_data __initdata ek_nand_data = {
 	.ale		= 21,
 	.cle		= 22,
-//	.det_pin	= ... not connected
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PC13,
 	.enable_pin	= AT91_PIN_PC14,
 	.parts		= ek_nand_partition,
@@ -223,9 +225,9 @@ static void __init ek_add_device_nand(void)
 static struct at91_mmc_data __initdata ek_mmc_data = {
 	.slot_b		= 1,
 	.wire4		= 1,
-//	.det_pin	= ... not connected
-//	.wp_pin		= ... not connected
-//	.vcc_pin	= ... not connected
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 
diff --git a/arch/arm/mach-at91/board-sam9261ek.c b/arch/arm/mach-at91/board-sam9261ek.c
index a879b3398f38..40895072a1a7 100644
--- a/arch/arm/mach-at91/board-sam9261ek.c
+++ b/arch/arm/mach-at91/board-sam9261ek.c
@@ -151,6 +151,8 @@ static void __init ek_add_device_dm9000(void) {}
  */
 static struct at91_usbh_data __initdata ek_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 
@@ -159,7 +161,7 @@ static struct at91_usbh_data __initdata ek_usbh_data = {
  */
 static struct at91_udc_data __initdata ek_udc_data = {
 	.vbus_pin	= AT91_PIN_PB29,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 
@@ -182,7 +184,7 @@ static struct mtd_partition __initdata ek_nand_partition[] = {
 static struct atmel_nand_data __initdata ek_nand_data = {
 	.ale		= 22,
 	.cle		= 21,
-//	.det_pin	= ... not connected
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PC15,
 	.enable_pin	= AT91_PIN_PC14,
 	.parts		= ek_nand_partition,
@@ -345,6 +347,9 @@ static struct spi_board_info ek_spi_devices[] = {
  */
 static struct at91_mmc_data __initdata ek_mmc_data = {
 	.wire4		= 1,
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 #endif /* CONFIG_SPI_ATMEL_* */
diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c
index 3ce0b17bd9ed..29f66052fe63 100644
--- a/arch/arm/mach-at91/board-sam9263ek.c
+++ b/arch/arm/mach-at91/board-sam9263ek.c
@@ -74,6 +74,7 @@ static void __init ek_init_early(void)
 static struct at91_usbh_data __initdata ek_usbh_data = {
 	.ports		= 2,
 	.vbus_pin	= { AT91_PIN_PA24, AT91_PIN_PA21 },
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -81,7 +82,7 @@ static struct at91_usbh_data __initdata ek_usbh_data = {
  */
 static struct at91_udc_data __initdata ek_udc_data = {
 	.vbus_pin	= AT91_PIN_PA25,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 
@@ -151,7 +152,7 @@ static struct at91_mmc_data __initdata ek_mmc_data = {
 	.wire4		= 1,
 	.det_pin	= AT91_PIN_PE18,
 	.wp_pin		= AT91_PIN_PE19,
-//	.vcc_pin	= ... not connected
+	.vcc_pin	= -EINVAL,
 };
 
 
@@ -183,7 +184,7 @@ static struct mtd_partition __initdata ek_nand_partition[] = {
 static struct atmel_nand_data __initdata ek_nand_data = {
 	.ale		= 21,
 	.cle		= 22,
-//	.det_pin	= ... not connected
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PA22,
 	.enable_pin	= AT91_PIN_PD15,
 	.parts		= ek_nand_partition,
@@ -353,6 +354,7 @@ static void __init ek_add_device_buttons(void) {}
  * reset_pin is not connected: NRST
  */
 static struct ac97c_platform_data ek_ac97_data = {
+	.reset_pin	= -EINVAL,
 };
 
 
diff --git a/arch/arm/mach-at91/board-sam9g20ek.c b/arch/arm/mach-at91/board-sam9g20ek.c
index a67b269a2fce..843d6286c6f4 100644
--- a/arch/arm/mach-at91/board-sam9g20ek.c
+++ b/arch/arm/mach-at91/board-sam9g20ek.c
@@ -86,6 +86,8 @@ static void __init ek_init_early(void)
  */
 static struct at91_usbh_data __initdata ek_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -93,7 +95,7 @@ static struct at91_usbh_data __initdata ek_usbh_data = {
  */
 static struct at91_udc_data __initdata ek_udc_data = {
 	.vbus_pin	= AT91_PIN_PC5,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 
@@ -163,6 +165,7 @@ static struct atmel_nand_data __initdata ek_nand_data = {
 	.cle		= 22,
 	.rdy_pin	= AT91_PIN_PC13,
 	.enable_pin	= AT91_PIN_PC14,
+	.det_pin	= -EINVAL,
 	.parts		= ek_nand_partition,
 	.num_parts	= ARRAY_SIZE(ek_nand_partition),
 };
@@ -210,6 +213,7 @@ static struct mci_platform_data __initdata ek_mmc_data = {
 	.slot[1] = {
 		.bus_width	= 4,
 		.detect_pin	= AT91_PIN_PC9,
+		.wp_pin		= -EINVAL,
 	},
 
 };
@@ -218,6 +222,8 @@ static struct at91_mmc_data __initdata ek_mmc_data = {
 	.slot_b		= 1,	/* Only one slot so use slot B */
 	.wire4		= 1,
 	.det_pin	= AT91_PIN_PC9,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 #endif
 
@@ -227,6 +233,7 @@ static void __init ek_add_device_mmc(void)
 	if (ek_have_2mmc()) {
 		ek_mmc_data.slot[0].bus_width = 4;
 		ek_mmc_data.slot[0].detect_pin = AT91_PIN_PC2;
+		ek_mmc_data.slot[0].wp_pin = -1;
 	}
 	at91_add_device_mci(0, &ek_mmc_data);
 #else
diff --git a/arch/arm/mach-at91/board-sam9m10g45ek.c b/arch/arm/mach-at91/board-sam9m10g45ek.c
index 061465daa1f8..ea0d1b9c2b7b 100644
--- a/arch/arm/mach-at91/board-sam9m10g45ek.c
+++ b/arch/arm/mach-at91/board-sam9m10g45ek.c
@@ -69,6 +69,7 @@ static void __init ek_init_early(void)
 static struct at91_usbh_data __initdata ek_usbh_hs_data = {
 	.ports		= 2,
 	.vbus_pin	= {AT91_PIN_PD1, AT91_PIN_PD3},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 
@@ -100,6 +101,7 @@ static struct mci_platform_data __initdata mci0_data = {
 	.slot[0] = {
 		.bus_width	= 4,
 		.detect_pin	= AT91_PIN_PD10,
+		.wp_pin		= -EINVAL,
 	},
 };
 
@@ -143,6 +145,7 @@ static struct atmel_nand_data __initdata ek_nand_data = {
 	.cle		= 22,
 	.rdy_pin	= AT91_PIN_PC8,
 	.enable_pin	= AT91_PIN_PC14,
+	.det_pin	= -EINVAL,
 	.parts		= ek_nand_partition,
 	.num_parts	= ARRAY_SIZE(ek_nand_partition),
 };
@@ -330,6 +333,7 @@ static void __init ek_add_device_buttons(void) {}
  * reset_pin is not connected: NRST
  */
 static struct ac97c_platform_data ek_ac97_data = {
+	.reset_pin	= -EINVAL,
 };
 
 
diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c
index c561edac25b9..c1366d0032bf 100644
--- a/arch/arm/mach-at91/board-sam9rlek.c
+++ b/arch/arm/mach-at91/board-sam9rlek.c
@@ -67,8 +67,8 @@ static struct usba_platform_data __initdata ek_usba_udc_data = {
 static struct at91_mmc_data __initdata ek_mmc_data = {
 	.wire4		= 1,
 	.det_pin	= AT91_PIN_PA15,
-//	.wp_pin		= ... not connected
-//	.vcc_pin	= ... not connected
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 
@@ -91,7 +91,7 @@ static struct mtd_partition __initdata ek_nand_partition[] = {
 static struct atmel_nand_data __initdata ek_nand_data = {
 	.ale		= 21,
 	.cle		= 22,
-//	.det_pin	= ... not connected
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PD17,
 	.enable_pin	= AT91_PIN_PB6,
 	.parts		= ek_nand_partition,
@@ -204,6 +204,7 @@ static struct atmel_lcdfb_info __initdata ek_lcdc_data;
  * reset_pin is not connected: NRST
  */
 static struct ac97c_platform_data ek_ac97_data = {
+	.reset_pin	= -EINVAL,
 };
 
 
diff --git a/arch/arm/mach-at91/board-snapper9260.c b/arch/arm/mach-at91/board-snapper9260.c
index 73ed8c746085..4770db08e5a6 100644
--- a/arch/arm/mach-at91/board-snapper9260.c
+++ b/arch/arm/mach-at91/board-snapper9260.c
@@ -57,15 +57,19 @@ static void __init snapper9260_init_early(void)
 
 static struct at91_usbh_data __initdata snapper9260_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 static struct at91_udc_data __initdata snapper9260_udc_data = {
 	.vbus_pin		= SNAPPER9260_IO_EXP_GPIO(5),
 	.vbus_active_low	= 1,
 	.vbus_polled		= 1,
+	.pullup_pin		= -EINVAL,
 };
 
 static struct macb_platform_data snapper9260_macb_data = {
+	.phy_irq_pin	= -EINVAL,
 	.is_rmii	= 1,
 };
 
@@ -104,6 +108,8 @@ static struct atmel_nand_data __initdata snapper9260_nand_data = {
 	.parts		= snapper9260_nand_partitions,
 	.num_parts	= ARRAY_SIZE(snapper9260_nand_partitions),
 	.bus_width_16	= 0,
+	.enable_pin	= -EINVAL,
+	.det_pin	= -EINVAL,
 };
 
 static struct sam9_smc_config __initdata snapper9260_nand_smc_config = {
diff --git a/arch/arm/mach-at91/board-stamp9g20.c b/arch/arm/mach-at91/board-stamp9g20.c
index f4b9b6f2b594..e8d3d5b88244 100644
--- a/arch/arm/mach-at91/board-stamp9g20.c
+++ b/arch/arm/mach-at91/board-stamp9g20.c
@@ -85,6 +85,7 @@ static struct atmel_nand_data __initdata nand_data = {
 	.rdy_pin	= AT91_PIN_PC13,
 	.enable_pin	= AT91_PIN_PC14,
 	.bus_width_16	= 0,
+	.det_pin	= -EINVAL,
 };
 
 static struct sam9_smc_config __initdata nand_smc_config = {
@@ -122,12 +123,17 @@ static void __init add_device_nand(void)
 static struct mci_platform_data __initdata mmc_data = {
 	.slot[0] = {
 		.bus_width	= 4,
+		.detect_pin	= -1;
+		.wp_pin		= -1;
 	},
 };
 #else
 static struct at91_mmc_data __initdata mmc_data = {
 	.slot_b		= 0,
 	.wire4		= 1,
+	.det_pin	= -EINVAL,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 #endif
 
@@ -137,6 +143,8 @@ static struct at91_mmc_data __initdata mmc_data = {
  */
 static struct at91_usbh_data __initdata usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 
@@ -145,12 +153,12 @@ static struct at91_usbh_data __initdata usbh_data = {
  */
 static struct at91_udc_data __initdata portuxg20_udc_data = {
 	.vbus_pin	= AT91_PIN_PC7,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 static struct at91_udc_data __initdata stamp9g20evb_udc_data = {
 	.vbus_pin	= AT91_PIN_PA22,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 
diff --git a/arch/arm/mach-at91/board-usb-a926x.c b/arch/arm/mach-at91/board-usb-a926x.c
index 2ffb2c84c7ce..26c36fc2d1e5 100644
--- a/arch/arm/mach-at91/board-usb-a926x.c
+++ b/arch/arm/mach-at91/board-usb-a926x.c
@@ -66,6 +66,8 @@ static void __init ek_init_early(void)
  */
 static struct at91_usbh_data __initdata ek_usbh_data = {
 	.ports		= 2,
+	.vbus_pin	= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -73,7 +75,7 @@ static struct at91_usbh_data __initdata ek_usbh_data = {
  */
 static struct at91_udc_data __initdata ek_udc_data = {
 	.vbus_pin	= AT91_PIN_PB11,
-	.pullup_pin	= 0,		/* pull-up driven by UDC */
+	.pullup_pin	= -EINVAL,		/* pull-up driven by UDC */
 };
 
 static void __init ek_add_device_udc(void)
@@ -193,7 +195,7 @@ static struct mtd_partition __initdata ek_nand_partition[] = {
 static struct atmel_nand_data __initdata ek_nand_data = {
 	.ale		= 21,
 	.cle		= 22,
-//	.det_pin	= ... not connected
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PA22,
 	.enable_pin	= AT91_PIN_PD15,
 	.parts		= ek_nand_partition,
diff --git a/arch/arm/mach-at91/board-yl-9200.c b/arch/arm/mach-at91/board-yl-9200.c
index 2c40a21b2794..bbd553e1cd93 100644
--- a/arch/arm/mach-at91/board-yl-9200.c
+++ b/arch/arm/mach-at91/board-yl-9200.c
@@ -120,6 +120,8 @@ static struct macb_platform_data __initdata yl9200_eth_data = {
  */
 static struct at91_usbh_data __initdata yl9200_usbh_data = {
 	.ports			= 1,	/* PQFP version of AT91RM9200 */
+	.vbus_pin		= {-EINVAL, -EINVAL},
+	.overcurrent_pin= {-EINVAL, -EINVAL},
 };
 
 /*
@@ -137,8 +139,9 @@ static struct at91_udc_data __initdata yl9200_udc_data = {
  */
 static struct at91_mmc_data __initdata yl9200_mmc_data = {
 	.det_pin	= AT91_PIN_PB9,
-	// .wp_pin	= ... not connected
 	.wire4		= 1,
+	.wp_pin		= -EINVAL,
+	.vcc_pin	= -EINVAL,
 };
 
 /*
@@ -175,7 +178,7 @@ static struct mtd_partition __initdata yl9200_nand_partition[] = {
 static struct atmel_nand_data __initdata yl9200_nand_data = {
 	.ale		= 6,
 	.cle		= 7,
-	// .det_pin	= ... not connected
+	.det_pin	= -EINVAL,
 	.rdy_pin	= AT91_PIN_PC14,	/* R/!B (Sheet10) */
 	.enable_pin	= AT91_PIN_PC15,	/* !CE  (Sheet10) */
 	.parts		= yl9200_nand_partition,
diff --git a/arch/arm/mach-at91/include/mach/board.h b/arch/arm/mach-at91/include/mach/board.h
index e209a2992245..d0b377b21bd7 100644
--- a/arch/arm/mach-at91/include/mach/board.h
+++ b/arch/arm/mach-at91/include/mach/board.h
@@ -44,10 +44,10 @@
 
  /* USB Device */
 struct at91_udc_data {
-	u8	vbus_pin;		/* high == host powering us */
+	int	vbus_pin;		/* high == host powering us */
 	u8	vbus_active_low;	/* vbus polarity */
 	u8	vbus_polled;		/* Use polling, not interrupt */
-	u8	pullup_pin;		/* active == D+ pulled up */
+	int	pullup_pin;		/* active == D+ pulled up */
 	u8	pullup_active_low;	/* true == pullup_pin is active low */
 };
 extern void __init at91_add_device_udc(struct at91_udc_data *data);
@@ -57,10 +57,10 @@ extern void __init at91_add_device_usba(struct usba_platform_data *data);
 
  /* Compact Flash */
 struct at91_cf_data {
-	u8	irq_pin;		/* I/O IRQ */
-	u8	det_pin;		/* Card detect */
-	u8	vcc_pin;		/* power switching */
-	u8	rst_pin;		/* card reset */
+	int	irq_pin;		/* I/O IRQ */
+	int	det_pin;		/* Card detect */
+	int	vcc_pin;		/* power switching */
+	int	rst_pin;		/* card reset */
 	u8	chipselect;		/* EBI Chip Select number */
 	u8	flags;
 #define AT91_CF_TRUE_IDE	0x01
@@ -71,11 +71,11 @@ extern void __init at91_add_device_cf(struct at91_cf_data *data);
  /* MMC / SD */
   /* at91_mci platform config */
 struct at91_mmc_data {
-	u8		det_pin;	/* card detect IRQ */
+	int		det_pin;	/* card detect IRQ */
 	unsigned	slot_b:1;	/* uses Slot B */
 	unsigned	wire4:1;	/* (SD) supports DAT0..DAT3 */
-	u8		wp_pin;		/* (SD) writeprotect detect */
-	u8		vcc_pin;	/* power switching (high == on) */
+	int		wp_pin;		/* (SD) writeprotect detect */
+	int		vcc_pin;	/* power switching (high == on) */
 };
 extern void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data);
 
@@ -87,10 +87,10 @@ extern void __init at91_add_device_eth(struct macb_platform_data *data);
  /* USB Host */
 struct at91_usbh_data {
 	u8		ports;		/* number of ports on root hub */
-	u8		vbus_pin[2];	/* port power-control pin */
+	int		vbus_pin[2];	/* port power-control pin */
 	u8              vbus_pin_inverted;
 	u8              overcurrent_supported;
-	u8              overcurrent_pin[2];
+	int             overcurrent_pin[2];
 	u8              overcurrent_status[2];
 	u8              overcurrent_changed[2];
 };
@@ -100,9 +100,9 @@ extern void __init at91_add_device_usbh_ehci(struct at91_usbh_data *data);
 
  /* NAND / SmartMedia */
 struct atmel_nand_data {
-	u8		enable_pin;	/* chip enable */
-	u8		det_pin;	/* card detect */
-	u8		rdy_pin;	/* ready/busy */
+	int		enable_pin;	/* chip enable */
+	int		det_pin;	/* card detect */
+	int		rdy_pin;	/* ready/busy */
 	u8              rdy_pin_active_low;     /* rdy_pin value is inverted */
 	u8		ale;		/* address line number connected to ALE */
 	u8		cle;		/* address line number connected to CLE */
diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h
index e7c748fb6053..b081c7245ec8 100644
--- a/include/linux/platform_data/macb.h
+++ b/include/linux/platform_data/macb.h
@@ -10,7 +10,7 @@
 
 struct macb_platform_data {
 	u32		phy_mask;
-	u8		phy_irq_pin;	/* PHY IRQ */
+	int		phy_irq_pin;	/* PHY IRQ */
 	u8		is_rmii;	/* using RMII interface? */
 };
 
-- 
cgit v1.2.3


From 75957ba36c05b979701e9ec64b37819adc12f830 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:35 +0000
Subject: dql: Dynamic queue limits

Implementation of dynamic queue limits (dql).  This is a libary which
allows a queue limit to be dynamically managed.  The goal of dql is
to set the queue limit, number of objects to the queue, to be minimized
without allowing the queue to be starved.

dql would be used with a queue which has these properties:

1) Objects are queued up to some limit which can be expressed as a
   count of objects.
2) Periodically a completion process executes which retires consumed
   objects.
3) Starvation occurs when limit has been reached, all queued data has
   actually been consumed but completion processing has not yet run,
   so queuing new data is blocked.
4) Minimizing the amount of queued data is desirable.

A canonical example of such a queue would be a NIC HW transmit queue.

The queue limit is dynamic, it will increase or decrease over time
depending on the workload.  The queue limit is recalculated each time
completion processing is done.  Increases occur when the queue is
starved and can exponentially increase over successive intervals.
Decreases occur when more data is being maintained in the queue than
needed to prevent starvation.  The number of extra objects, or "slack",
is measured over successive intervals, and to avoid hysteresis the
limit is only reduced by the miminum slack seen over a configurable
time period.

dql API provides routines to manage the queue:
- dql_init is called to intialize the dql structure
- dql_reset is called to reset dynamic values
- dql_queued called when objects are being enqueued
- dql_avail returns availability in the queue
- dql_completed is called when objects have be consumed in the queue

Configuration consists of:
- max_limit, maximum limit
- min_limit, minimum limit
- slack_hold_time, time to measure instances of slack before reducing
  queue limit

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dynamic_queue_limits.h |  97 +++++++++++++++++++++++++
 lib/Kconfig                          |   3 +
 lib/Makefile                         |   2 +
 lib/dynamic_queue_limits.c           | 133 +++++++++++++++++++++++++++++++++++
 4 files changed, 235 insertions(+)
 create mode 100644 include/linux/dynamic_queue_limits.h
 create mode 100644 lib/dynamic_queue_limits.c

(limited to 'include/linux')

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
new file mode 100644
index 000000000000..5621547d631b
--- /dev/null
+++ b/include/linux/dynamic_queue_limits.h
@@ -0,0 +1,97 @@
+/*
+ * Dynamic queue limits (dql) - Definitions
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ *
+ * This header file contains the definitions for dynamic queue limits (dql).
+ * dql would be used in conjunction with a producer/consumer type queue
+ * (possibly a HW queue).  Such a queue would have these general properties:
+ *
+ *   1) Objects are queued up to some limit specified as number of objects.
+ *   2) Periodically a completion process executes which retires consumed
+ *      objects.
+ *   3) Starvation occurs when limit has been reached, all queued data has
+ *      actually been consumed, but completion processing has not yet run
+ *      so queuing new data is blocked.
+ *   4) Minimizing the amount of queued data is desirable.
+ *
+ * The goal of dql is to calculate the limit as the minimum number of objects
+ * needed to prevent starvation.
+ *
+ * The primary functions of dql are:
+ *    dql_queued - called when objects are enqueued to record number of objects
+ *    dql_avail - returns how many objects are available to be queued based
+ *      on the object limit and how many objects are already enqueued
+ *    dql_completed - called at completion time to indicate how many objects
+ *      were retired from the queue
+ *
+ * The dql implementation does not implement any locking for the dql data
+ * structures, the higher layer should provide this.  dql_queued should
+ * be serialized to prevent concurrent execution of the function; this
+ * is also true for  dql_completed.  However, dql_queued and dlq_completed  can
+ * be executed concurrently (i.e. they can be protected by different locks).
+ */
+
+#ifndef _LINUX_DQL_H
+#define _LINUX_DQL_H
+
+#ifdef __KERNEL__
+
+struct dql {
+	/* Fields accessed in enqueue path (dql_queued) */
+	unsigned int	num_queued;		/* Total ever queued */
+	unsigned int	adj_limit;		/* limit + num_completed */
+	unsigned int	last_obj_cnt;		/* Count at last queuing */
+
+	/* Fields accessed only by completion path (dql_completed) */
+
+	unsigned int	limit ____cacheline_aligned_in_smp; /* Current limit */
+	unsigned int	num_completed;		/* Total ever completed */
+
+	unsigned int	prev_ovlimit;		/* Previous over limit */
+	unsigned int	prev_num_queued;	/* Previous queue total */
+	unsigned int	prev_last_obj_cnt;	/* Previous queuing cnt */
+
+	unsigned int	lowest_slack;		/* Lowest slack found */
+	unsigned long	slack_start_time;	/* Time slacks seen */
+
+	/* Configuration */
+	unsigned int	max_limit;		/* Max limit */
+	unsigned int	min_limit;		/* Minimum limit */
+	unsigned int	slack_hold_time;	/* Time to measure slack */
+};
+
+/* Set some static maximums */
+#define DQL_MAX_OBJECT (UINT_MAX / 16)
+#define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT)
+
+/*
+ * Record number of objects queued. Assumes that caller has already checked
+ * availability in the queue with dql_avail.
+ */
+static inline void dql_queued(struct dql *dql, unsigned int count)
+{
+	BUG_ON(count > DQL_MAX_OBJECT);
+
+	dql->num_queued += count;
+	dql->last_obj_cnt = count;
+}
+
+/* Returns how many objects can be queued, < 0 indicates over limit. */
+static inline int dql_avail(const struct dql *dql)
+{
+	return dql->adj_limit - dql->num_queued;
+}
+
+/* Record number of completed objects and recalculate the limit. */
+void dql_completed(struct dql *dql, unsigned int count);
+
+/* Reset dql state */
+void dql_reset(struct dql *dql);
+
+/* Initialize dql state */
+int dql_init(struct dql *dql, unsigned hold_time);
+
+#endif /* _KERNEL_ */
+
+#endif /* _LINUX_DQL_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 32f3e5ae2be5..63b5782732ed 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -244,6 +244,9 @@ config CPU_RMAP
 	bool
 	depends on SMP
 
+config DQL
+	bool
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
diff --git a/lib/Makefile b/lib/Makefile
index a4da283f5dc0..ff00d4dcb7ed 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
 obj-$(CONFIG_CORDIC) += cordic.o
 
+obj-$(CONFIG_DQL) += dynamic_queue_limits.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
new file mode 100644
index 000000000000..3d1bdcdd7db4
--- /dev/null
+++ b/lib/dynamic_queue_limits.c
@@ -0,0 +1,133 @@
+/*
+ * Dynamic byte queue limits.  See include/linux/dynamic_queue_limits.h
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/dynamic_queue_limits.h>
+
+#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)
+
+/* Records completed count and recalculates the queue limit */
+void dql_completed(struct dql *dql, unsigned int count)
+{
+	unsigned int inprogress, prev_inprogress, limit;
+	unsigned int ovlimit, all_prev_completed, completed;
+
+	/* Can't complete more than what's in queue */
+	BUG_ON(count > dql->num_queued - dql->num_completed);
+
+	completed = dql->num_completed + count;
+	limit = dql->limit;
+	ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit);
+	inprogress = dql->num_queued - completed;
+	prev_inprogress = dql->prev_num_queued - dql->num_completed;
+	all_prev_completed = POSDIFF(completed, dql->prev_num_queued);
+
+	if ((ovlimit && !inprogress) ||
+	    (dql->prev_ovlimit && all_prev_completed)) {
+		/*
+		 * Queue considered starved if:
+		 *   - The queue was over-limit in the last interval,
+		 *     and there is no more data in the queue.
+		 *  OR
+		 *   - The queue was over-limit in the previous interval and
+		 *     when enqueuing it was possible that all queued data
+		 *     had been consumed.  This covers the case when queue
+		 *     may have becomes starved between completion processing
+		 *     running and next time enqueue was scheduled.
+		 *
+		 *     When queue is starved increase the limit by the amount
+		 *     of bytes both sent and completed in the last interval,
+		 *     plus any previous over-limit.
+		 */
+		limit += POSDIFF(completed, dql->prev_num_queued) +
+		     dql->prev_ovlimit;
+		dql->slack_start_time = jiffies;
+		dql->lowest_slack = UINT_MAX;
+	} else if (inprogress && prev_inprogress && !all_prev_completed) {
+		/*
+		 * Queue was not starved, check if the limit can be decreased.
+		 * A decrease is only considered if the queue has been busy in
+		 * the whole interval (the check above).
+		 *
+		 * If there is slack, the amount of execess data queued above
+		 * the the amount needed to prevent starvation, the queue limit
+		 * can be decreased.  To avoid hysteresis we consider the
+		 * minimum amount of slack found over several iterations of the
+		 * completion routine.
+		 */
+		unsigned int slack, slack_last_objs;
+
+		/*
+		 * Slack is the maximum of
+		 *   - The queue limit plus previous over-limit minus twice
+		 *     the number of objects completed.  Note that two times
+		 *     number of completed bytes is a basis for an upper bound
+		 *     of the limit.
+		 *   - Portion of objects in the last queuing operation that
+		 *     was not part of non-zero previous over-limit.  That is
+		 *     "round down" by non-overlimit portion of the last
+		 *     queueing operation.
+		 */
+		slack = POSDIFF(limit + dql->prev_ovlimit,
+		    2 * (completed - dql->num_completed));
+		slack_last_objs = dql->prev_ovlimit ?
+		    POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0;
+
+		slack = max(slack, slack_last_objs);
+
+		if (slack < dql->lowest_slack)
+			dql->lowest_slack = slack;
+
+		if (time_after(jiffies,
+			       dql->slack_start_time + dql->slack_hold_time)) {
+			limit = POSDIFF(limit, dql->lowest_slack);
+			dql->slack_start_time = jiffies;
+			dql->lowest_slack = UINT_MAX;
+		}
+	}
+
+	/* Enforce bounds on limit */
+	limit = clamp(limit, dql->min_limit, dql->max_limit);
+
+	if (limit != dql->limit) {
+		dql->limit = limit;
+		ovlimit = 0;
+	}
+
+	dql->adj_limit = limit + completed;
+	dql->prev_ovlimit = ovlimit;
+	dql->prev_last_obj_cnt = dql->last_obj_cnt;
+	dql->num_completed = completed;
+	dql->prev_num_queued = dql->num_queued;
+}
+EXPORT_SYMBOL(dql_completed);
+
+void dql_reset(struct dql *dql)
+{
+	/* Reset all dynamic values */
+	dql->limit = 0;
+	dql->num_queued = 0;
+	dql->num_completed = 0;
+	dql->last_obj_cnt = 0;
+	dql->prev_num_queued = 0;
+	dql->prev_last_obj_cnt = 0;
+	dql->prev_ovlimit = 0;
+	dql->lowest_slack = UINT_MAX;
+	dql->slack_start_time = jiffies;
+}
+EXPORT_SYMBOL(dql_reset);
+
+int dql_init(struct dql *dql, unsigned hold_time)
+{
+	dql->max_limit = DQL_MAX_LIMIT;
+	dql->min_limit = 0;
+	dql->slack_hold_time = hold_time;
+	dql_reset(dql);
+	return 0;
+}
+EXPORT_SYMBOL(dql_init);
-- 
cgit v1.2.3


From 7346649826382b769cfadf4a2fe8a84d060c55e9 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:44 +0000
Subject: net: Add queue state xoff flag for stack

Create separate queue state flags so that either the stack or drivers
can turn on XOFF.  Added a set of functions used in the stack to determine
if a queue is really stopped (either by stack or driver)

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 41 ++++++++++++++++++++++++++++++-----------
 net/core/dev.c            |  4 ++--
 net/core/netpoll.c        |  4 ++--
 net/core/pktgen.c         |  2 +-
 net/sched/sch_generic.c   |  8 ++++----
 net/sched/sch_multiq.c    |  6 ++++--
 net/sched/sch_teql.c      |  6 +++---
 7 files changed, 46 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ac9a4b9344ca..d19f93265cac 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -517,11 +517,23 @@ static inline void napi_synchronize(const struct napi_struct *n)
 #endif
 
 enum netdev_queue_state_t {
-	__QUEUE_STATE_XOFF,
+	__QUEUE_STATE_DRV_XOFF,
+	__QUEUE_STATE_STACK_XOFF,
 	__QUEUE_STATE_FROZEN,
-#define QUEUE_STATE_XOFF_OR_FROZEN ((1 << __QUEUE_STATE_XOFF)		| \
-				    (1 << __QUEUE_STATE_FROZEN))
+#define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF)		| \
+			      (1 << __QUEUE_STATE_STACK_XOFF))
+#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF		| \
+					(1 << __QUEUE_STATE_FROZEN))
 };
+/*
+ * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue.  The
+ * netif_tx_* functions below are used to manipulate this flag.  The
+ * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit
+ * queue independently.  The netif_xmit_*stopped functions below are called
+ * to check if the queue has been stopped by the driver or stack (either
+ * of the XOFF bits are set in the state).  Drivers should not need to call
+ * netif_xmit*stopped functions, they should only be using netif_tx_*.
+ */
 
 struct netdev_queue {
 /*
@@ -1718,7 +1730,7 @@ extern void __netif_schedule(struct Qdisc *q);
 
 static inline void netif_schedule_queue(struct netdev_queue *txq)
 {
-	if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
+	if (!(txq->state & QUEUE_STATE_ANY_XOFF))
 		__netif_schedule(txq->qdisc);
 }
 
@@ -1732,7 +1744,7 @@ static inline void netif_tx_schedule_all(struct net_device *dev)
 
 static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
 {
-	clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+	clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
 /**
@@ -1764,7 +1776,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 		return;
 	}
 #endif
-	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state))
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
 		__netif_schedule(dev_queue->qdisc);
 }
 
@@ -1796,7 +1808,7 @@ static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
 		pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
 		return;
 	}
-	set_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
 /**
@@ -1823,7 +1835,7 @@ static inline void netif_tx_stop_all_queues(struct net_device *dev)
 
 static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
 {
-	return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+	return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
 /**
@@ -1837,9 +1849,16 @@ static inline int netif_queue_stopped(const struct net_device *dev)
 	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
 }
 
-static inline int netif_tx_queue_frozen_or_stopped(const struct netdev_queue *dev_queue)
+static inline int netif_xmit_stopped(const struct netdev_queue *dev_queue)
 {
-	return dev_queue->state & QUEUE_STATE_XOFF_OR_FROZEN;
+	return dev_queue->state & QUEUE_STATE_ANY_XOFF;
+}
+
+static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue)
+{
+	return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
+}
+
 }
 
 /**
@@ -1926,7 +1945,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 	if (netpoll_trap())
 		return;
 #endif
-	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state))
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
 		__netif_schedule(txq->qdisc);
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index c7ef6c5d3782..cb8f753b4238 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2270,7 +2270,7 @@ gso:
 			return rc;
 		}
 		txq_trans_update(txq);
-		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
+		if (unlikely(netif_xmit_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
 
@@ -2558,7 +2558,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 
 			HARD_TX_LOCK(dev, txq, cpu);
 
-			if (!netif_tx_queue_stopped(txq)) {
+			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
 				rc = dev_hard_start_xmit(skb, dev, txq);
 				__this_cpu_dec(xmit_recursion);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 1a7d8e2c9768..0d38808a2305 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -76,7 +76,7 @@ static void queue_process(struct work_struct *work)
 
 		local_irq_save(flags);
 		__netif_tx_lock(txq, smp_processor_id());
-		if (netif_tx_queue_frozen_or_stopped(txq) ||
+		if (netif_xmit_frozen_or_stopped(txq) ||
 		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
@@ -317,7 +317,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
-				if (!netif_tx_queue_stopped(txq)) {
+				if (!netif_xmit_stopped(txq)) {
 					status = ops->ndo_start_xmit(skb, dev);
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index aa53a35a631b..449fe0f068f8 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3342,7 +3342,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 	__netif_tx_lock_bh(txq);
 
-	if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
+	if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
 		ret = NETDEV_TX_BUSY;
 		pkt_dev->last_ok = 0;
 		goto unlock;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 79ac1458c2ba..67fc573e013a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,7 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_frozen_or_stopped(txq)) {
+		if (!netif_xmit_frozen_or_stopped(txq)) {
 			q->gso_skb = NULL;
 			q->q.qlen--;
 		} else
@@ -121,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	spin_unlock(root_lock);
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_tx_queue_frozen_or_stopped(txq))
+	if (!netif_xmit_frozen_or_stopped(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 
 	HARD_TX_UNLOCK(dev, txq);
@@ -143,7 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		ret = dev_requeue_skb(skb, q);
 	}
 
-	if (ret && netif_tx_queue_frozen_or_stopped(txq))
+	if (ret && netif_xmit_frozen_or_stopped(txq))
 		ret = 0;
 
 	return ret;
@@ -242,7 +242,7 @@ static void dev_watchdog(unsigned long arg)
 				 * old device drivers set dev->trans_start
 				 */
 				trans_start = txq->trans_start ? : dev->trans_start;
-				if (netif_tx_queue_stopped(txq) &&
+				if (netif_xmit_stopped(txq) &&
 				    time_after(jiffies, (trans_start +
 							 dev->watchdog_timeo))) {
 					some_queue_timedout = 1;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index edc1950e0e77..49131d7a7446 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -107,7 +107,8 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 		/* Check that target subqueue is available before
 		 * pulling an skb to avoid head-of-line blocking.
 		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+		if (!netif_xmit_stopped(
+		    netdev_get_tx_queue(qdisc_dev(sch), q->curband))) {
 			qdisc = q->queues[q->curband];
 			skb = qdisc->dequeue(qdisc);
 			if (skb) {
@@ -138,7 +139,8 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
 		/* Check that target subqueue is available before
 		 * pulling an skb to avoid head-of-line blocking.
 		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+		if (!netif_xmit_stopped(
+		    netdev_get_tx_queue(qdisc_dev(sch), curband))) {
 			qdisc = q->queues[curband];
 			skb = qdisc->ops->peek(qdisc);
 			if (skb)
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index a3b7120fcc74..283bfe3de59d 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -301,7 +301,7 @@ restart:
 
 		if (slave_txq->qdisc_sleeping != q)
 			continue;
-		if (__netif_subqueue_stopped(slave, subq) ||
+		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 		    !netif_running(slave)) {
 			busy = 1;
 			continue;
@@ -312,7 +312,7 @@ restart:
 			if (__netif_tx_trylock(slave_txq)) {
 				unsigned int length = qdisc_pkt_len(skb);
 
-				if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
+				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 					txq_trans_update(slave_txq);
 					__netif_tx_unlock(slave_txq);
@@ -324,7 +324,7 @@ restart:
 				}
 				__netif_tx_unlock(slave_txq);
 			}
-			if (netif_queue_stopped(dev))
+			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 				busy = 1;
 			break;
 		case 1:
-- 
cgit v1.2.3


From c5d67bd78c5dc540e3461c36fb3d389fbe0de4c3 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:52 +0000
Subject: net: Add netdev interfaces for recording sends/comp

Add interfaces for drivers to call for recording number of packets and
bytes at send time and transmit completion.  Also, added a function to
"reset" a queue.  These will be used by Byte Queue Limits.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d19f93265cac..9b24cc7a54d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1859,6 +1859,34 @@ static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_qu
 	return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
 }
 
+static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
+					unsigned int bytes)
+{
+}
+
+static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
+{
+	netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes);
+}
+
+static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
+					     unsigned pkts, unsigned bytes)
+{
+}
+
+static inline void netdev_completed_queue(struct net_device *dev,
+					  unsigned pkts, unsigned bytes)
+{
+	netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes);
+}
+
+static inline void netdev_tx_reset_queue(struct netdev_queue *q)
+{
+}
+
+static inline void netdev_reset_queue(struct net_device *dev_queue)
+{
+	netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0));
 }
 
 /**
-- 
cgit v1.2.3


From 114cf5802165ee93e3ab461c9c505cd94a08b800 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:33:09 +0000
Subject: bql: Byte queue limits

Networking stack support for byte queue limits, uses dynamic queue
limits library.  Byte queue limits are maintained per transmit queue,
and a dql structure has been added to netdev_queue structure for this
purpose.

Configuration of bql is in the tx-<n> sysfs directory for the queue
under the byte_queue_limits directory.  Configuration includes:
limit_min, bql minimum limit
limit_max, bql maximum limit
hold_time, bql slack hold time

Also under the directory are:
limit, current byte limit
inflight, current number of bytes on the queue

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  32 ++++++++++-
 net/Kconfig               |   6 ++
 net/core/dev.c            |   3 +
 net/core/net-sysfs.c      | 140 +++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 172 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9b24cc7a54d1..97edb3215a5a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -43,6 +43,7 @@
 #include <linux/rculist.h>
 #include <linux/dmaengine.h>
 #include <linux/workqueue.h>
+#include <linux/dynamic_queue_limits.h>
 
 #include <linux/ethtool.h>
 #include <net/net_namespace.h>
@@ -541,7 +542,6 @@ struct netdev_queue {
  */
 	struct net_device	*dev;
 	struct Qdisc		*qdisc;
-	unsigned long		state;
 	struct Qdisc		*qdisc_sleeping;
 #ifdef CONFIG_SYSFS
 	struct kobject		kobj;
@@ -564,6 +564,12 @@ struct netdev_queue {
 	 * (/sys/class/net/DEV/Q/trans_timeout)
 	 */
 	unsigned long		trans_timeout;
+
+	unsigned long		state;
+
+#ifdef CONFIG_BQL
+	struct dql		dql;
+#endif
 } ____cacheline_aligned_in_smp;
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -1862,6 +1868,15 @@ static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_qu
 static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 					unsigned int bytes)
 {
+#ifdef CONFIG_BQL
+	dql_queued(&dev_queue->dql, bytes);
+	if (unlikely(dql_avail(&dev_queue->dql) < 0)) {
+		set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
+		if (unlikely(dql_avail(&dev_queue->dql) >= 0))
+			clear_bit(__QUEUE_STATE_STACK_XOFF,
+			    &dev_queue->state);
+	}
+#endif
 }
 
 static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
@@ -1872,6 +1887,18 @@ static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
 static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
 					     unsigned pkts, unsigned bytes)
 {
+#ifdef CONFIG_BQL
+	if (likely(bytes)) {
+		dql_completed(&dev_queue->dql, bytes);
+		if (unlikely(test_bit(__QUEUE_STATE_STACK_XOFF,
+		    &dev_queue->state) &&
+		    dql_avail(&dev_queue->dql) >= 0)) {
+			if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF,
+			     &dev_queue->state))
+				netif_schedule_queue(dev_queue);
+		}
+	}
+#endif
 }
 
 static inline void netdev_completed_queue(struct net_device *dev,
@@ -1882,6 +1909,9 @@ static inline void netdev_completed_queue(struct net_device *dev,
 
 static inline void netdev_tx_reset_queue(struct netdev_queue *q)
 {
+#ifdef CONFIG_BQL
+	dql_reset(&q->dql);
+#endif
 }
 
 static inline void netdev_reset_queue(struct net_device *dev_queue)
diff --git a/net/Kconfig b/net/Kconfig
index 63d2c5dc36ff..2d998735c4d8 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -239,6 +239,12 @@ config NETPRIO_CGROUP
 	  Cgroup subsystem for use in assigning processes to network priorities on
 	  a per-interface basis
 
+config BQL
+	boolean
+	depends on SYSFS
+	select DQL
+	default y
+
 config HAVE_BPF_JIT
 	bool
 
diff --git a/net/core/dev.c b/net/core/dev.c
index cb8f753b4238..91a599122074 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5470,6 +5470,9 @@ static void netdev_init_one_queue(struct net_device *dev,
 	queue->xmit_lock_owner = -1;
 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
+#ifdef CONFIG_BQL
+	dql_init(&queue->dql, HZ);
+#endif
 }
 
 static int netif_alloc_netdev_queues(struct net_device *dev)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b17c14a0fce9..3bf72b638d34 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
 #include <linux/wireless.h>
 #include <linux/vmalloc.h>
 #include <linux/export.h>
+#include <linux/jiffies.h>
 #include <net/wext.h>
 
 #include "net-sysfs.h"
@@ -845,6 +846,116 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
 static struct netdev_queue_attribute queue_trans_timeout =
 	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
 
+#ifdef CONFIG_BQL
+/*
+ * Byte queue limits sysfs structures and functions.
+ */
+static ssize_t bql_show(char *buf, unsigned int value)
+{
+	return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t bql_set(const char *buf, const size_t count,
+		       unsigned int *pvalue)
+{
+	unsigned int value;
+	int err;
+
+	if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+		value = DQL_MAX_LIMIT;
+	else {
+		err = kstrtouint(buf, 10, &value);
+		if (err < 0)
+			return err;
+		if (value > DQL_MAX_LIMIT)
+			return -EINVAL;
+	}
+
+	*pvalue = value;
+
+	return count;
+}
+
+static ssize_t bql_show_hold_time(struct netdev_queue *queue,
+				  struct netdev_queue_attribute *attr,
+				  char *buf)
+{
+	struct dql *dql = &queue->dql;
+
+	return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
+}
+
+static ssize_t bql_set_hold_time(struct netdev_queue *queue,
+				 struct netdev_queue_attribute *attribute,
+				 const char *buf, size_t len)
+{
+	struct dql *dql = &queue->dql;
+	unsigned value;
+	int err;
+
+	err = kstrtouint(buf, 10, &value);
+	if (err < 0)
+		return err;
+
+	dql->slack_hold_time = msecs_to_jiffies(value);
+
+	return len;
+}
+
+static struct netdev_queue_attribute bql_hold_time_attribute =
+	__ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
+	    bql_set_hold_time);
+
+static ssize_t bql_show_inflight(struct netdev_queue *queue,
+				 struct netdev_queue_attribute *attr,
+				 char *buf)
+{
+	struct dql *dql = &queue->dql;
+
+	return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
+}
+
+static struct netdev_queue_attribute bql_inflight_attribute =
+	__ATTR(inflight, S_IRUGO | S_IWUSR, bql_show_inflight, NULL);
+
+#define BQL_ATTR(NAME, FIELD)						\
+static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
+				 struct netdev_queue_attribute *attr,	\
+				 char *buf)				\
+{									\
+	return bql_show(buf, queue->dql.FIELD);				\
+}									\
+									\
+static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
+				struct netdev_queue_attribute *attr,	\
+				const char *buf, size_t len)		\
+{									\
+	return bql_set(buf, len, &queue->dql.FIELD);			\
+}									\
+									\
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute =	\
+	__ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME,		\
+	    bql_set_ ## NAME);
+
+BQL_ATTR(limit, limit)
+BQL_ATTR(limit_max, max_limit)
+BQL_ATTR(limit_min, min_limit)
+
+static struct attribute *dql_attrs[] = {
+	&bql_limit_attribute.attr,
+	&bql_limit_max_attribute.attr,
+	&bql_limit_min_attribute.attr,
+	&bql_hold_time_attribute.attr,
+	&bql_inflight_attribute.attr,
+	NULL
+};
+
+static struct attribute_group dql_group = {
+	.name  = "byte_queue_limits",
+	.attrs  = dql_attrs,
+};
+#endif /* CONFIG_BQL */
+
 #ifdef CONFIG_XPS
 static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 {
@@ -1096,17 +1207,17 @@ static struct attribute *netdev_queue_default_attrs[] = {
 	NULL
 };
 
-#ifdef CONFIG_XPS
 static void netdev_queue_release(struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
+#ifdef CONFIG_XPS
 	xps_queue_release(queue);
+#endif
 
 	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
 }
-#endif /* CONFIG_XPS */
 
 static struct kobj_type netdev_queue_ktype = {
 	.sysfs_ops = &netdev_queue_sysfs_ops,
@@ -1125,14 +1236,21 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
 	kobj->kset = net->queues_kset;
 	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
 	    "tx-%u", index);
-	if (error) {
-		kobject_put(kobj);
-		return error;
-	}
+	if (error)
+		goto exit;
+
+#ifdef CONFIG_BQL
+	error = sysfs_create_group(kobj, &dql_group);
+	if (error)
+		goto exit;
+#endif
 
 	kobject_uevent(kobj, KOBJ_ADD);
 	dev_hold(queue->dev);
 
+	return 0;
+exit:
+	kobject_put(kobj);
 	return error;
 }
 #endif /* CONFIG_SYSFS */
@@ -1152,8 +1270,14 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 		}
 	}
 
-	while (--i >= new_num)
-		kobject_put(&net->_tx[i].kobj);
+	while (--i >= new_num) {
+		struct netdev_queue *queue = net->_tx + i;
+
+#ifdef CONFIG_BQL
+		sysfs_remove_group(&queue->kobj, &dql_group);
+#endif
+		kobject_put(&queue->kobj);
+	}
 
 	return error;
 #else
-- 
cgit v1.2.3


From 018690d33ecf4aa1eb1415e38c40e2b0b6c7808e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 29 Nov 2011 20:10:36 +0000
Subject: regmap: Allow regmap_update_bits() users to detect changes

Some users of regmap_update_bits() would like to be able to tell their
users if they actually did an update so provide a variant which also
returns a flag indicating if an update took place. We could return a
tristate in the return value of regmap_update_bits() but this makes the
API more cumbersome to use and doesn't fit with the general zero for
success idiom we have.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap.c | 58 ++++++++++++++++++++++++++++++++++----------
 include/linux/regmap.h       |  3 +++
 2 files changed, 48 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index a8620900abc4..add5da6d9c0a 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -569,18 +569,9 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 }
 EXPORT_SYMBOL_GPL(regmap_bulk_read);
 
-/**
- * regmap_update_bits: Perform a read/modify/write cycle on the register map
- *
- * @map: Register map to update
- * @reg: Register to update
- * @mask: Bitmask to change
- * @val: New value for bitmask
- *
- * Returns zero for success, a negative number on error.
- */
-int regmap_update_bits(struct regmap *map, unsigned int reg,
-		       unsigned int mask, unsigned int val)
+static int _regmap_update_bits(struct regmap *map, unsigned int reg,
+			       unsigned int mask, unsigned int val,
+			       bool *change)
 {
 	int ret;
 	unsigned int tmp, orig;
@@ -594,16 +585,57 @@ int regmap_update_bits(struct regmap *map, unsigned int reg,
 	tmp = orig & ~mask;
 	tmp |= val & mask;
 
-	if (tmp != orig)
+	if (tmp != orig) {
 		ret = _regmap_write(map, reg, tmp);
+		*change = true;
+	} else {
+		*change = false;
+	}
 
 out:
 	mutex_unlock(&map->lock);
 
 	return ret;
 }
+
+/**
+ * regmap_update_bits: Perform a read/modify/write cycle on the register map
+ *
+ * @map: Register map to update
+ * @reg: Register to update
+ * @mask: Bitmask to change
+ * @val: New value for bitmask
+ *
+ * Returns zero for success, a negative number on error.
+ */
+int regmap_update_bits(struct regmap *map, unsigned int reg,
+		       unsigned int mask, unsigned int val)
+{
+	bool change;
+	return _regmap_update_bits(map, reg, mask, val, &change);
+}
 EXPORT_SYMBOL_GPL(regmap_update_bits);
 
+/**
+ * regmap_update_bits_check: Perform a read/modify/write cycle on the
+ *                           register map and report if updated
+ *
+ * @map: Register map to update
+ * @reg: Register to update
+ * @mask: Bitmask to change
+ * @val: New value for bitmask
+ * @change: Boolean indicating if a write was done
+ *
+ * Returns zero for success, a negative number on error.
+ */
+int regmap_update_bits_check(struct regmap *map, unsigned int reg,
+			     unsigned int mask, unsigned int val,
+			     bool *change)
+{
+	return _regmap_update_bits(map, reg, mask, val, change);
+}
+EXPORT_SYMBOL_GPL(regmap_update_bits_check);
+
 static int __init regmap_initcall(void)
 {
 	regmap_debugfs_initcall();
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 81dfe0acb20c..a83e4a097abd 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -138,6 +138,9 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		     size_t val_count);
 int regmap_update_bits(struct regmap *map, unsigned int reg,
 		       unsigned int mask, unsigned int val);
+int regmap_update_bits_check(struct regmap *map, unsigned int reg,
+			     unsigned int mask, unsigned int val,
+			     bool *change);
 
 int regcache_sync(struct regmap *map);
 void regcache_cache_only(struct regmap *map, bool enable);
-- 
cgit v1.2.3


From 596b9b68ef118f7409afbc78487263e08ef96261 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 25 Jul 2011 00:01:25 +0000
Subject: neigh: Add infrastructure for allocating device neigh privates.

netdev->neigh_priv_len records the private area length.

This will trigger for neigh_table objects which set tbl->entry_size
to zero, and the first instances of this will be forthcoming.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  2 ++
 include/linux/netdevice.h                 |  1 +
 net/atm/clip.c                            |  1 +
 net/core/neighbour.c                      | 14 +++++++++++---
 4 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index efd7a9636aff..57ae9b9265e3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1218,6 +1218,8 @@ static struct net_device *ipoib_add_port(const char *format,
 	priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);
 	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
 
+	priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
+
 	result = ib_query_pkey(hca, port, 0, &priv->pkey);
 	if (result) {
 		printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97edb3215a5a..5462c2cd5eab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1080,6 +1080,7 @@ struct net_device {
 	unsigned char		perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
 	unsigned char		addr_assign_type; /* hw address assignment type */
 	unsigned char		addr_len;	/* hardware address length	*/
+	unsigned char		neigh_priv_len;
 	unsigned short          dev_id;		/* for shared network cards */
 
 	spinlock_t		addr_list_lock;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 11439a7f6782..aea7cad2ece1 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -535,6 +535,7 @@ static void clip_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &clip_netdev_ops;
 	dev->type = ARPHRD_ATM;
+	dev->neigh_priv_len = sizeof(struct atmarp_entry);
 	dev->hard_header_len = RFC1483LLC_LEN;
 	dev->mtu = RFC1626_MTU;
 	dev->tx_queue_len = 100;	/* "normal" queue (packets) */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 661ad12e0cc9..ef750ff7497e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -273,7 +273,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 }
 EXPORT_SYMBOL(neigh_ifdown);
 
-static struct neighbour *neigh_alloc(struct neigh_table *tbl)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
 {
 	struct neighbour *n = NULL;
 	unsigned long now = jiffies;
@@ -288,7 +288,15 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 			goto out_entries;
 	}
 
-	n = kzalloc(tbl->entry_size, GFP_ATOMIC);
+	if (tbl->entry_size)
+		n = kzalloc(tbl->entry_size, GFP_ATOMIC);
+	else {
+		int sz = sizeof(*n) + tbl->key_len;
+
+		sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
+		sz += dev->neigh_priv_len;
+		n = kzalloc(sz, GFP_ATOMIC);
+	}
 	if (!n)
 		goto out_entries;
 
@@ -463,7 +471,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 	u32 hash_val;
 	int key_len = tbl->key_len;
 	int error;
-	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
 	struct neigh_hash_table *nht;
 
 	if (!n) {
-- 
cgit v1.2.3


From da6a8fa0275e2178c44a875374cae80d057538d1 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 25 Jul 2011 00:01:38 +0000
Subject: neigh: Add device constructor/destructor capability.

If the neigh entry has device private state, it will need
constructor/destructor ops.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/neighbour.c      | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5462c2cd5eab..1c4ddb37f2b5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -974,6 +974,8 @@ struct net_device_ops {
 						    netdev_features_t features);
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
+	int			(*ndo_neigh_construct)(struct neighbour *n);
+	int			(*ndo_neigh_destroy)(struct neighbour *n);
 };
 
 /*
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ef750ff7497e..cdf8dc34f0ba 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -489,6 +489,14 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		goto out_neigh_release;
 	}
 
+	if (dev->netdev_ops->ndo_neigh_construct) {
+		error = dev->netdev_ops->ndo_neigh_construct(n);
+		if (error < 0) {
+			rc = ERR_PTR(error);
+			goto out_neigh_release;
+		}
+	}
+
 	/* Device specific setup. */
 	if (n->parms->neigh_setup &&
 	    (error = n->parms->neigh_setup(n)) < 0) {
@@ -692,6 +700,8 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
  */
 void neigh_destroy(struct neighbour *neigh)
 {
+	struct net_device *dev = neigh->dev;
+
 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
 
 	if (!neigh->dead) {
@@ -707,7 +717,10 @@ void neigh_destroy(struct neighbour *neigh)
 	skb_queue_purge(&neigh->arp_queue);
 	neigh->arp_queue_len_bytes = 0;
 
-	dev_put(neigh->dev);
+	if (dev->netdev_ops->ndo_neigh_destroy)
+		dev->netdev_ops->ndo_neigh_destroy(neigh);
+
+	dev_put(dev);
 	neigh_parms_put(neigh->parms);
 
 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
-- 
cgit v1.2.3


From 7bc0f28c7a0cd19f40e5a6e4d0a117db9a4e4cd5 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Wed, 30 Nov 2011 12:20:26 +0000
Subject: netem: rate extension

Currently netem is not in the ability to emulate channel bandwidth. Only static
delay (and optional random jitter) can be configured.

To emulate the channel rate the token bucket filter (sch_tbf) can be used.  But
TBF has some major emulation flaws. The buffer (token bucket depth/rate) cannot
be 0. Also the idea behind TBF is that the credit (token in buckets) fills if
no packet is transmitted. So that there is always a "positive" credit for new
packets. In real life this behavior contradicts the law of nature where
nothing can travel faster as speed of light. E.g.: on an emulated 1000 byte/s
link a small IPv4/TCP SYN packet with ~50 byte require ~0.05 seconds - not 0
seconds.

Netem is an excellent place to implement a rate limiting feature: static
delay is already implemented, tfifo already has time information and the
user can skip TBF configuration completely.

This patch implement rate feature which can be configured via tc. e.g:

	tc qdisc add dev eth0 root netem rate 10kbit

To emulate a link of 5000byte/s and add an additional static delay of 10ms:

	tc qdisc add dev eth0 root netem delay 10ms rate 5KBps

Note: similar to TBF the rate extension is bounded to the kernel timing
system. Depending on the architecture timer granularity, higher rates (e.g.
10mbit/s and higher) tend to transmission bursts. Also note: further queues
living in network adaptors; see ethtool(8).

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@drr.davemloft.net>
---
 include/linux/pkt_sched.h |  5 +++++
 net/sched/sch_netem.c     | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 7281d5acf2f9..fb556dc594d3 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -465,6 +465,7 @@ enum {
 	TCA_NETEM_REORDER,
 	TCA_NETEM_CORRUPT,
 	TCA_NETEM_LOSS,
+	TCA_NETEM_RATE,
 	__TCA_NETEM_MAX,
 };
 
@@ -495,6 +496,10 @@ struct tc_netem_corrupt {
 	__u32	correlation;
 };
 
+struct tc_netem_rate {
+	__u32	rate;	/* byte/s */
+};
+
 enum {
 	NETEM_LOSS_UNSPEC,
 	NETEM_LOSS_GI,		/* General Intuitive - 4 state model */
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index eb3b9a86c6ed..9b7af9f1272f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -79,6 +79,7 @@ struct netem_sched_data {
 	u32 duplicate;
 	u32 reorder;
 	u32 corrupt;
+	u32 rate;
 
 	struct crndstate {
 		u32 last;
@@ -298,6 +299,11 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 }
 
+static psched_time_t packet_len_2_sched_time(unsigned int len, u32 rate)
+{
+	return PSCHED_NS2TICKS((u64)len * NSEC_PER_SEC / rate);
+}
+
 /*
  * Insert one skb into qdisc.
  * Note: parent depends on return value to account for queue length.
@@ -371,6 +377,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				  &q->delay_cor, q->delay_dist);
 
 		now = psched_get_time();
+
+		if (q->rate) {
+			struct sk_buff_head *list = &q->qdisc->q;
+
+			delay += packet_len_2_sched_time(skb->len, q->rate);
+
+			if (!skb_queue_empty(list)) {
+				/*
+				 * Last packet in queue is reference point (now).
+				 * First packet in queue is already in flight,
+				 * calculate this time bonus and substract
+				 * from delay.
+				 */
+				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
+				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
+			}
+		}
+
 		cb->time_to_send = now + delay;
 		++q->counter;
 		ret = qdisc_enqueue(skb, q->qdisc);
@@ -535,6 +559,14 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 	init_crandom(&q->corrupt_cor, r->correlation);
 }
 
+static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_rate *r = nla_data(attr);
+
+	q->rate = r->rate;
+}
+
 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -594,6 +626,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
+	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
 };
 
@@ -666,6 +699,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_NETEM_CORRUPT])
 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 
+	if (tb[TCA_NETEM_RATE])
+		get_rate(sch, tb[TCA_NETEM_RATE]);
+
 	q->loss_model = CLG_RANDOM;
 	if (tb[TCA_NETEM_LOSS])
 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
@@ -846,6 +882,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_netem_corr cor;
 	struct tc_netem_reorder reorder;
 	struct tc_netem_corrupt corrupt;
+	struct tc_netem_rate rate;
 
 	qopt.latency = q->latency;
 	qopt.jitter = q->jitter;
@@ -868,6 +905,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	corrupt.correlation = q->corrupt_cor.rho;
 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 
+	rate.rate = q->rate;
+	NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
+
 	if (dump_loss_model(q, skb) != 0)
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 4585790d1cde32a5719c24412e9845e031358e08 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 30 Nov 2011 10:55:14 +0000
Subject: ASoC: Allow more WM8958/WM1811 button levels with default handler

The WM8958 and WM1811 support detecting a range of buttons. Allow the
user to provide platform data enabling more of these levels without
having to write a custom detection handler.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/wm8994/pdata.h |  3 +++
 sound/soc/codecs/wm8994.c        | 42 ++++++++++++++++++++++++++++++++--------
 sound/soc/codecs/wm8994.h        |  1 +
 3 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index ea32f306dca6..195ade95af38 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -168,6 +168,9 @@ struct wm8994_pdata {
 	/* WM8958 microphone bias configuration */
 	int micbias[2];
 
+	/* WM8958 microphone detection ranges */
+	u16 micd_lvl_sel;
+
 	/* Disable the internal pull downs on the LDOs if they are
 	 * always driven (eg, connected to an always on supply or
 	 * GPIO that always drives an output.  If they float power
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 45bfa09f2e45..3e52d40866d2 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -3043,6 +3043,7 @@ static void wm8958_default_micdet(u16 status, void *data)
 {
 	struct snd_soc_codec *codec = data;
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	int report;
 
 	dev_dbg(codec->dev, "MICDET %x\n", status);
 
@@ -3055,7 +3056,7 @@ static void wm8958_default_micdet(u16 status, void *data)
 		wm8958_micd_set_rate(codec);
 
 		snd_soc_jack_report(wm8994->micdet[0].jack, 0,
-				    SND_JACK_BTN_0 | SND_JACK_HEADSET);
+				    wm8994->btn_mask | SND_JACK_HEADSET);
 
 		return;
 	}
@@ -3088,12 +3089,27 @@ static void wm8958_default_micdet(u16 status, void *data)
 
 	/* Report short circuit as a button */
 	if (wm8994->jack_mic) {
+		report = 0;
 		if (status & 0x4)
-			snd_soc_jack_report(wm8994->micdet[0].jack,
-					    SND_JACK_BTN_0, SND_JACK_BTN_0);
-		else
-			snd_soc_jack_report(wm8994->micdet[0].jack,
-					    0, SND_JACK_BTN_0);
+			report |= SND_JACK_BTN_0;
+
+		if (status & 0x8)
+			report |= SND_JACK_BTN_1;
+
+		if (status & 0x10)
+			report |= SND_JACK_BTN_2;
+
+		if (status & 0x20)
+			report |= SND_JACK_BTN_3;
+
+		if (status & 0x40)
+			report |= SND_JACK_BTN_4;
+
+		if (status & 0x80)
+			report |= SND_JACK_BTN_5;
+
+		snd_soc_jack_report(wm8994->micdet[0].jack, report,
+				    wm8994->btn_mask);
 	}
 }
 
@@ -3118,6 +3134,7 @@ int wm8958_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack,
 {
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
+	u16 micd_lvl_sel;
 
 	switch (control->type) {
 	case WM1811:
@@ -3145,9 +3162,18 @@ int wm8958_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack,
 
 		wm8958_micd_set_rate(codec);
 
-		/* Detect microphones and short circuits */
+		/* Detect microphones and short circuits by default */
+		if (wm8994->pdata->micd_lvl_sel)
+			micd_lvl_sel = wm8994->pdata->micd_lvl_sel;
+		else
+			micd_lvl_sel = 0x41;
+
+		wm8994->btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+			SND_JACK_BTN_2 | SND_JACK_BTN_3 |
+			SND_JACK_BTN_4 | SND_JACK_BTN_5;
+
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_2,
-				    WM8958_MICD_LVL_SEL_MASK, 0x41);
+				    WM8958_MICD_LVL_SEL_MASK, micd_lvl_sel);
 
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
 				    WM8958_MICD_ENA, WM8958_MICD_ENA);
diff --git a/sound/soc/codecs/wm8994.h b/sound/soc/codecs/wm8994.h
index c3e71d72eb6a..77e3d8c9eeb8 100644
--- a/sound/soc/codecs/wm8994.h
+++ b/sound/soc/codecs/wm8994.h
@@ -129,6 +129,7 @@ struct wm8994_priv {
 	struct wm8994_micdet micdet[2];
 	bool detecting;
 	bool jack_mic;
+	int btn_mask;
 
 	wm8958_micdet_cb jack_cb;
 	void *jack_cb_data;
-- 
cgit v1.2.3


From af6b6fe41c4bc9e7933d66bbbf5106e0e7e6e484 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 30 Nov 2011 20:32:05 +0000
Subject: ASoC: Implement support for WM1811A jack detection

The WM1811A features an advanced low power accessory detection subsystem
which allows the device to be maintained in a very low power state while
the system is idle without sacrificing any accessory detection features.

Implement software support for this, automatically managing the power
configuration of the device depending on the detected accessory.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/wm8994/registers.h |  16 +++
 sound/soc/codecs/wm8994.c            | 264 ++++++++++++++++++++++++++++++++---
 sound/soc/codecs/wm8994.h            |   3 +
 3 files changed, 264 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/registers.h b/include/linux/mfd/wm8994/registers.h
index 83a9caec0e43..ebfc92fdcd77 100644
--- a/include/linux/mfd/wm8994/registers.h
+++ b/include/linux/mfd/wm8994/registers.h
@@ -242,6 +242,7 @@
 #define WM8994_GPIO_4                           0x703
 #define WM8994_GPIO_5                           0x704
 #define WM8994_GPIO_6                           0x705
+#define WM1811_JACKDET_CTRL			0x705
 #define WM8994_GPIO_7                           0x706
 #define WM8994_GPIO_8                           0x707
 #define WM8994_GPIO_9                           0x708
@@ -1852,6 +1853,9 @@
 /*
  * R57 (0x39) - AntiPOP (2)
  */
+#define WM1811_JACKDET_MODE_MASK                0x0180  /* JACKDET_MODE - [8:7] */
+#define WM1811_JACKDET_MODE_SHIFT                    7  /* JACKDET_MODE - [8:7] */
+#define WM1811_JACKDET_MODE_WIDTH                    2  /* JACKDET_MODE - [8:7] */
 #define WM8994_MICB2_DISCH                      0x0100  /* MICB2_DISCH */
 #define WM8994_MICB2_DISCH_MASK                 0x0100  /* MICB2_DISCH */
 #define WM8994_MICB2_DISCH_SHIFT                     8  /* MICB2_DISCH */
@@ -4186,6 +4190,18 @@
 #define WM8994_STL_SEL_SHIFT                         0  /* STL_SEL */
 #define WM8994_STL_SEL_WIDTH                         1  /* STL_SEL */
 
+/*
+ * R1797 (0x705) - JACKDET Ctrl
+ */
+#define WM1811_JACKDET_DB                       0x0100  /* JACKDET_DB */
+#define WM1811_JACKDET_DB_MASK                  0x0100  /* JACKDET_DB */
+#define WM1811_JACKDET_DB_SHIFT                      8  /* JACKDET_DB */
+#define WM1811_JACKDET_DB_WIDTH                      1  /* JACKDET_DB */
+#define WM1811_JACKDET_LVL                      0x0040  /* JACKDET_LVL */
+#define WM1811_JACKDET_LVL_MASK                 0x0040  /* JACKDET_LVL */
+#define WM1811_JACKDET_LVL_SHIFT                     6  /* JACKDET_LVL */
+#define WM1811_JACKDET_LVL_WIDTH                     1  /* JACKDET_LVL */
+
 /*
  * R1824 (0x720) - Pull Control (1)
  */
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index e65745bc1003..2e28f472b963 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -38,6 +38,11 @@
 #include "wm8994.h"
 #include "wm_hubs.h"
 
+#define WM1811_JACKDET_MODE_NONE  0x0000
+#define WM1811_JACKDET_MODE_JACK  0x0100
+#define WM1811_JACKDET_MODE_MIC   0x0080
+#define WM1811_JACKDET_MODE_AUDIO 0x0180
+
 #define WM8994_NUM_DRC 3
 #define WM8994_NUM_EQ  3
 
@@ -55,23 +60,34 @@ static int wm8994_retune_mobile_base[] = {
 
 static void wm8958_default_micdet(u16 status, void *data);
 
-static const struct {
+struct wm8958_micd_rate {
 	int sysclk;
 	bool idle;
 	int start;
 	int rate;
-} wm8958_micd_rates[] = {
+};
+
+static const struct wm8958_micd_rate micdet_rates[] = {
 	{ 32768,       true,  1, 4 },
 	{ 32768,       false, 1, 1 },
 	{ 44100 * 256, true,  7, 10 },
 	{ 44100 * 256, false, 7, 10 },
 };
 
+static const struct wm8958_micd_rate jackdet_rates[] = {
+	{ 32768,       true,  0, 1 },
+	{ 32768,       false, 0, 1 },
+	{ 44100 * 256, true,  7, 10 },
+	{ 44100 * 256, false, 7, 10 },
+};
+
 static void wm8958_micd_set_rate(struct snd_soc_codec *codec)
 {
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	int best, i, sysclk, val;
 	bool idle;
+	const struct wm8958_micd_rate *rates;
+	int num_rates;
 
 	if (wm8994->jack_cb != wm8958_default_micdet)
 		return;
@@ -84,19 +100,27 @@ static void wm8958_micd_set_rate(struct snd_soc_codec *codec)
 	else
 		sysclk = wm8994->aifclk[0];
 
+	if (wm8994->jackdet) {
+		rates = jackdet_rates;
+		num_rates = ARRAY_SIZE(jackdet_rates);
+	} else {
+		rates = micdet_rates;
+		num_rates = ARRAY_SIZE(micdet_rates);
+	}
+
 	best = 0;
-	for (i = 0; i < ARRAY_SIZE(wm8958_micd_rates); i++) {
-		if (wm8958_micd_rates[i].idle != idle)
+	for (i = 0; i < num_rates; i++) {
+		if (rates[i].idle != idle)
 			continue;
-		if (abs(wm8958_micd_rates[i].sysclk - sysclk) <
-		    abs(wm8958_micd_rates[best].sysclk - sysclk))
+		if (abs(rates[i].sysclk - sysclk) <
+		    abs(rates[best].sysclk - sysclk))
 			best = i;
-		else if (wm8958_micd_rates[best].idle != idle)
+		else if (rates[best].idle != idle)
 			best = i;
 	}
 
-	val = wm8958_micd_rates[best].start << WM8958_MICD_BIAS_STARTTIME_SHIFT
-		| wm8958_micd_rates[best].rate << WM8958_MICD_RATE_SHIFT;
+	val = rates[best].start << WM8958_MICD_BIAS_STARTTIME_SHIFT
+		| rates[best].rate << WM8958_MICD_RATE_SHIFT;
 
 	snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
 			    WM8958_MICD_BIAS_STARTTIME_MASK |
@@ -762,6 +786,74 @@ SOC_SINGLE_TLV("MIXINL IN1RP Boost Volume", WM8994_INPUT_MIXER_1, 8, 1, 0,
 	       mixin_boost_tlv),
 };
 
+/* We run all mode setting through a function to enforce audio mode */
+static void wm1811_jackdet_set_mode(struct snd_soc_codec *codec, u16 mode)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (wm8994->active_refcount)
+		mode = WM1811_JACKDET_MODE_AUDIO;
+
+	snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+			    WM1811_JACKDET_MODE_MASK, mode);
+
+	if (mode == WM1811_JACKDET_MODE_MIC)
+		msleep(2);
+}
+
+static void active_reference(struct snd_soc_codec *codec)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	mutex_lock(&wm8994->accdet_lock);
+
+	wm8994->active_refcount++;
+
+	dev_dbg(codec->dev, "Active refcount incremented, now %d\n",
+		wm8994->active_refcount);
+
+	if (wm8994->active_refcount == 1) {
+		/* If we're using jack detection go into audio mode */
+		if (wm8994->jackdet && wm8994->jack_cb) {
+			snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+					    WM1811_JACKDET_MODE_MASK,
+					    WM1811_JACKDET_MODE_AUDIO);
+			msleep(2);
+		}
+	}
+
+	mutex_unlock(&wm8994->accdet_lock);
+}
+
+static void active_dereference(struct snd_soc_codec *codec)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	u16 mode;
+
+	mutex_lock(&wm8994->accdet_lock);
+
+	wm8994->active_refcount--;
+
+	dev_dbg(codec->dev, "Active refcount decremented, now %d\n",
+		wm8994->active_refcount);
+
+	if (wm8994->active_refcount == 0) {
+		/* Go into appropriate detection only mode */
+		if (wm8994->jackdet && wm8994->jack_cb) {
+			if (wm8994->jack_mic || wm8994->mic_detecting)
+				mode = WM1811_JACKDET_MODE_MIC;
+			else
+				mode = WM1811_JACKDET_MODE_JACK;
+
+			snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+					    WM1811_JACKDET_MODE_MASK,
+					    mode);
+		}
+	}
+
+	mutex_unlock(&wm8994->accdet_lock);
+}
+
 static int clk_sys_event(struct snd_soc_dapm_widget *w,
 			 struct snd_kcontrol *kcontrol, int event)
 {
@@ -1919,6 +2011,8 @@ static int _wm8994_set_fll(struct snd_soc_codec *codec, int id, int src,
 	if (freq_out) {
 		/* Enable VMID if we need it */
 		if (!was_enabled) {
+			active_reference(codec);
+
 			switch (control->type) {
 			case WM8994:
 				vmid_reference(codec);
@@ -1962,6 +2056,8 @@ static int _wm8994_set_fll(struct snd_soc_codec *codec, int id, int src,
 			default:
 				break;
 			}
+
+			active_dereference(codec);
 		}
 	}
 
@@ -2091,6 +2187,9 @@ static int wm8994_set_bias_level(struct snd_soc_codec *codec,
 		default:
 			break;
 		}
+
+		if (codec->dapm.bias_level == SND_SOC_BIAS_STANDBY)
+			active_reference(codec);
 		break;
 
 	case SND_SOC_BIAS_STANDBY:
@@ -2143,6 +2242,9 @@ static int wm8994_set_bias_level(struct snd_soc_codec *codec,
 					    WM8994_LINEOUT2_DISCH);
 		}
 
+		if (codec->dapm.bias_level == SND_SOC_BIAS_PREPARE)
+			active_dereference(codec);
+
 		/* MICBIAS into bypass mode on newer devices */
 		switch (control->type) {
 		case WM8958:
@@ -2168,6 +2270,7 @@ static int wm8994_set_bias_level(struct snd_soc_codec *codec,
 		break;
 	}
 	codec->dapm.bias_level = level;
+
 	return 0;
 }
 
@@ -2715,6 +2818,9 @@ static int wm8994_suspend(struct snd_soc_codec *codec, pm_message_t state)
 		snd_soc_update_bits(codec, WM8994_MICBIAS, WM8994_MICD_ENA, 0);
 		break;
 	case WM1811:
+		snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+				    WM1811_JACKDET_MODE_MASK, 0);
+		/* Fall through */
 	case WM8958:
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
 				    WM8958_MICD_ENA, 0);
@@ -2784,6 +2890,13 @@ static int wm8994_resume(struct snd_soc_codec *codec)
 					    WM8994_MICD_ENA, WM8994_MICD_ENA);
 		break;
 	case WM1811:
+		if (wm8994->jackdet && wm8994->jack_cb) {
+			/* Restart from idle */
+			snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+					    WM1811_JACKDET_MODE_MASK,
+					    WM1811_JACKDET_MODE_JACK);
+			break;
+		}
 	case WM8958:
 		if (wm8994->jack_cb)
 			snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
@@ -3047,17 +3160,20 @@ static void wm8958_default_micdet(u16 status, void *data)
 
 	dev_dbg(codec->dev, "MICDET %x\n", status);
 
-	/* If nothing present then clear our statuses */
+	/* Either nothing present or just starting detection */
 	if (!(status & WM8958_MICD_STS)) {
-		dev_dbg(codec->dev, "Detected open circuit\n");
-		wm8994->jack_mic = false;
-		wm8994->mic_detecting = true;
+		if (!wm8994->jackdet) {
+			/* If nothing present then clear our statuses */
+			dev_dbg(codec->dev, "Detected open circuit\n");
+			wm8994->jack_mic = false;
+			wm8994->mic_detecting = true;
 
-		wm8958_micd_set_rate(codec);
-
-		snd_soc_jack_report(wm8994->micdet[0].jack, 0,
-				    wm8994->btn_mask | SND_JACK_HEADSET);
+			wm8958_micd_set_rate(codec);
 
+			snd_soc_jack_report(wm8994->micdet[0].jack, 0,
+					    wm8994->btn_mask |
+					     SND_JACK_HEADSET);
+		}
 		return;
 	}
 
@@ -3085,6 +3201,15 @@ static void wm8958_default_micdet(u16 status, void *data)
 
 		snd_soc_jack_report(wm8994->micdet[0].jack, SND_JACK_HEADPHONE,
 				    SND_JACK_HEADSET);
+
+		/* If we have jackdet that will detect removal */
+		if (wm8994->jackdet) {
+			snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
+					    WM8958_MICD_ENA, 0);
+
+			wm1811_jackdet_set_mode(codec,
+						WM1811_JACKDET_MODE_JACK);
+		}
 	}
 
 	/* Report short circuit as a button */
@@ -3113,6 +3238,56 @@ static void wm8958_default_micdet(u16 status, void *data)
 	}
 }
 
+static irqreturn_t wm1811_jackdet_irq(int irq, void *data)
+{
+	struct wm8994_priv *wm8994 = data;
+	struct snd_soc_codec *codec = wm8994->codec;
+	int reg;
+
+	mutex_lock(&wm8994->accdet_lock);
+
+	reg = snd_soc_read(codec, WM1811_JACKDET_CTRL);
+	if (reg < 0) {
+		dev_err(codec->dev, "Failed to read jack status: %d\n", reg);
+		mutex_unlock(&wm8994->accdet_lock);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(codec->dev, "JACKDET %x\n", reg);
+
+	if (reg & WM1811_JACKDET_LVL) {
+		dev_dbg(codec->dev, "Jack detected\n");
+
+		snd_soc_jack_report(wm8994->micdet[0].jack,
+				    SND_JACK_MECHANICAL, SND_JACK_MECHANICAL);
+
+		/*
+		 * Start off measument of microphone impedence to find
+		 * out what's actually there.
+		 */
+		wm8994->mic_detecting = true;
+		wm1811_jackdet_set_mode(codec, WM1811_JACKDET_MODE_MIC);
+		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
+				    WM8958_MICD_ENA, WM8958_MICD_ENA);
+	} else {
+		dev_dbg(codec->dev, "Jack not detected\n");
+
+		snd_soc_jack_report(wm8994->micdet[0].jack, 0,
+				    SND_JACK_MECHANICAL | SND_JACK_HEADSET |
+				    wm8994->btn_mask);
+
+		wm8994->mic_detecting = false;
+		wm8994->jack_mic = false;
+		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
+				    WM8958_MICD_ENA, 0);
+		wm1811_jackdet_set_mode(codec, WM1811_JACKDET_MODE_JACK);
+	}
+
+	mutex_unlock(&wm8994->accdet_lock);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * wm8958_mic_detect - Enable microphone detection via the WM8958 IRQ
  *
@@ -3175,8 +3350,22 @@ int wm8958_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack,
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_2,
 				    WM8958_MICD_LVL_SEL_MASK, micd_lvl_sel);
 
-		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
-				    WM8958_MICD_ENA, WM8958_MICD_ENA);
+		WARN_ON(codec->dapm.bias_level > SND_SOC_BIAS_STANDBY);
+
+		/*
+		 * If we can use jack detection start off with that,
+		 * otherwise jump straight to microphone detection.
+		 */
+		if (wm8994->jackdet) {
+			snd_soc_update_bits(codec, WM8994_LDO_1,
+					    WM8994_LDO1_DISCH, 0);
+			wm1811_jackdet_set_mode(codec,
+						WM1811_JACKDET_MODE_JACK);
+		} else {
+			snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
+					    WM8958_MICD_ENA, WM8958_MICD_ENA);
+		}
+
 	} else {
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
 				    WM8958_MICD_ENA, 0);
@@ -3193,6 +3382,18 @@ static irqreturn_t wm8958_mic_irq(int irq, void *data)
 	struct snd_soc_codec *codec = wm8994->codec;
 	int reg, count;
 
+	mutex_lock(&wm8994->accdet_lock);
+
+	/*
+	 * Jack detection may have detected a removal simulataneously
+	 * with an update of the MICDET status; if so it will have
+	 * stopped detection and we can ignore this interrupt.
+	 */
+	if (!(snd_soc_read(codec, WM8958_MIC_DETECT_1) & WM8958_MICD_ENA)) {
+		mutex_unlock(&wm8994->accdet_lock);
+		return IRQ_HANDLED;
+	}
+
 	/* We may occasionally read a detection without an impedence
 	 * range being provided - if that happens loop again.
 	 */
@@ -3200,6 +3401,7 @@ static irqreturn_t wm8958_mic_irq(int irq, void *data)
 	do {
 		reg = snd_soc_read(codec, WM8958_MIC_DETECT_3);
 		if (reg < 0) {
+			mutex_unlock(&wm8994->accdet_lock);
 			dev_err(codec->dev,
 				"Failed to read mic detect status: %d\n",
 				reg);
@@ -3230,6 +3432,8 @@ static irqreturn_t wm8958_mic_irq(int irq, void *data)
 		dev_warn(codec->dev, "Accessory detection with no callback\n");
 
 out:
+	mutex_unlock(&wm8994->accdet_lock);
+
 	return IRQ_HANDLED;
 }
 
@@ -3280,6 +3484,8 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
 	wm8994->pdata = dev_get_platdata(codec->dev->parent);
 	wm8994->codec = codec;
 
+	mutex_init(&wm8994->accdet_lock);
+
 	for (i = 0; i < ARRAY_SIZE(wm8994->fll_locked); i++)
 		init_completion(&wm8994->fll_locked[i]);
 
@@ -3428,6 +3634,21 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
 		}
 	}
 
+	switch (control->type) {
+	case WM1811:
+		if (wm8994->revision > 1) {
+			ret = wm8994_request_irq(wm8994->wm8994,
+						 WM8994_IRQ_GPIO(6),
+						 wm1811_jackdet_irq, "JACKDET",
+						 wm8994);
+			if (ret == 0)
+				wm8994->jackdet = true;
+		}
+		break;
+	default:
+		break;
+	}
+
 	wm8994->fll_locked_irq = true;
 	for (i = 0; i < ARRAY_SIZE(wm8994->fll_locked); i++) {
 		ret = wm8994_request_irq(wm8994->wm8994,
@@ -3650,6 +3871,8 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
 	return 0;
 
 err_irq:
+	if (wm8994->jackdet)
+		wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_GPIO(6), wm8994);
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_MIC2_SHRT, wm8994);
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_MIC2_DET, wm8994);
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_MIC1_SHRT, wm8994);
@@ -3688,6 +3911,9 @@ static int  wm8994_codec_remove(struct snd_soc_codec *codec)
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_TEMP_SHUT, codec);
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_TEMP_WARN, codec);
 
+	if (wm8994->jackdet)
+		wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_GPIO(6), wm8994);
+
 	switch (control->type) {
 	case WM8994:
 		if (wm8994->micdet_irq)
diff --git a/sound/soc/codecs/wm8994.h b/sound/soc/codecs/wm8994.h
index 8622bc4db2fe..6ef3f11878c6 100644
--- a/sound/soc/codecs/wm8994.h
+++ b/sound/soc/codecs/wm8994.h
@@ -85,6 +85,7 @@ struct wm8994_priv {
 	bool fll_locked_irq;
 
 	int vmid_refcount;
+	int active_refcount;
 
 	int dac_rates[2];
 	int lrclk_shared[2];
@@ -126,10 +127,12 @@ struct wm8994_priv {
 	const char **enh_eq_texts;
 	struct soc_enum enh_eq_enum;
 
+	struct mutex accdet_lock;
 	struct wm8994_micdet micdet[2];
 	bool mic_detecting;
 	bool jack_mic;
 	int btn_mask;
+	bool jackdet;
 
 	wm8958_micdet_cb jack_cb;
 	void *jack_cb_data;
-- 
cgit v1.2.3


From cd1707a99a2cb43cd8ab0c1952b455b218f15884 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Dec 2011 13:44:25 +0000
Subject: ASoC: Add platform data for WM8958/WM1811 microphone detection rates

Allow systems to override the default microphone detection rates using
platform data in case the settings are not suitable (eg, due to an
unusually noisy jack).

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/wm8994/pdata.h | 20 ++++++++++++++++++++
 sound/soc/codecs/wm8994.c        | 12 ++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 195ade95af38..5256f1f41d7f 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -113,6 +113,23 @@ struct wm8958_enh_eq_cfg {
 	u16 regs[WM8958_ENH_EQ_REGS];
 };
 
+/**
+ * Microphone detection rates, used to tune response rates and power
+ * consumption for WM8958/WM1811 microphone detection.
+ *
+ * @sysclk: System clock rate to use this configuration for.
+ * @idle: True if this configuration should use when no accessory is detected,
+ *        false otherwise.
+ * @start: Value for MICD_BIAS_START_TIME register field (not shifted).
+ * @rate: Value for MICD_RATE register field (not shifted).
+ */
+struct wm8958_micd_rate {
+	int sysclk;
+	bool idle;
+	int start;
+	int rate;
+};
+
 struct wm8994_pdata {
 	int gpio_base;
 
@@ -144,6 +161,9 @@ struct wm8994_pdata {
 	int num_enh_eq_cfgs;
 	struct wm8958_enh_eq_cfg *enh_eq_cfgs;
 
+	int num_micd_rates;
+	struct wm8958_micd_rate *micd_rates;
+
         /* LINEOUT can be differential or single ended */
         unsigned int lineout1_diff:1;
         unsigned int lineout2_diff:1;
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 91f3638ab33f..6bdf8137c7e8 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -60,13 +60,6 @@ static int wm8994_retune_mobile_base[] = {
 
 static void wm8958_default_micdet(u16 status, void *data);
 
-struct wm8958_micd_rate {
-	int sysclk;
-	bool idle;
-	int start;
-	int rate;
-};
-
 static const struct wm8958_micd_rate micdet_rates[] = {
 	{ 32768,       true,  1, 4 },
 	{ 32768,       false, 1, 1 },
@@ -100,7 +93,10 @@ static void wm8958_micd_set_rate(struct snd_soc_codec *codec)
 	else
 		sysclk = wm8994->aifclk[0];
 
-	if (wm8994->jackdet) {
+	if (wm8994->pdata && wm8994->pdata->micd_rates) {
+		rates = wm8994->pdata->micd_rates;
+		num_rates = wm8994->pdata->num_micd_rates;
+	} else if (wm8994->jackdet) {
 		rates = jackdet_rates;
 		num_rates = ARRAY_SIZE(jackdet_rates);
 	} else {
-- 
cgit v1.2.3


From 53e4acea0e819a6a8513e10a0773f2259ede0481 Mon Sep 17 00:00:00 2001
From: Chris Blair <chris.blair@stericsson.com>
Date: Tue, 8 Nov 2011 08:54:46 +0000
Subject: spi/pl022: add support for pm_runtime autosuspend

Adds support for configuring the spi bus to use autosuspend for
runtime power management. This can reduce the latency in starting an
spi transfer by not suspending the device immediately following
completion of a transfer. If another transfer then takes place before
the autosuspend timeout, the call to resume the device can return
immediately rather than needing to risk sleeping in order to resume
the device.

Reviewed-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Chris Blair <chris.blair@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/spi/spi-pl022.c    | 20 ++++++++++++++++++--
 include/linux/amba/pl022.h |  4 ++++
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 82a929f916fd..13988a3024bb 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -1516,7 +1516,13 @@ static void pump_messages(struct work_struct *work)
 			/* nothing more to do - disable spi/ssp and power off */
 			writew((readw(SSP_CR1(pl022->virtbase)) &
 				(~SSP_CR1_MASK_SSE)), SSP_CR1(pl022->virtbase));
-			pm_runtime_put(&pl022->adev->dev);
+
+			if (pl022->master_info->autosuspend_delay > 0) {
+				pm_runtime_mark_last_busy(&pl022->adev->dev);
+				pm_runtime_put_autosuspend(&pl022->adev->dev);
+			} else {
+				pm_runtime_put(&pl022->adev->dev);
+			}
 		}
 		pl022->busy = false;
 		spin_unlock_irqrestore(&pl022->queue_lock, flags);
@@ -2247,7 +2253,17 @@ pl022_probe(struct amba_device *adev, const struct amba_id *id)
 	dev_dbg(dev, "probe succeeded\n");
 
 	/* let runtime pm put suspend */
-	pm_runtime_put(dev);
+	if (platform_info->autosuspend_delay > 0) {
+		dev_info(&adev->dev,
+			"will use autosuspend for runtime pm, delay %dms\n",
+			platform_info->autosuspend_delay);
+		pm_runtime_set_autosuspend_delay(dev,
+			platform_info->autosuspend_delay);
+		pm_runtime_use_autosuspend(dev);
+		pm_runtime_put_autosuspend(dev);
+	} else {
+		pm_runtime_put(dev);
+	}
 	return 0;
 
  err_spi_register:
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index 4ce98f54186b..572f637299c9 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -238,6 +238,9 @@ struct dma_chan;
  * @enable_dma: if true enables DMA driven transfers.
  * @dma_rx_param: parameter to locate an RX DMA channel.
  * @dma_tx_param: parameter to locate a TX DMA channel.
+ * @autosuspend_delay: delay in ms following transfer completion before the
+ *     runtime power management system suspends the device. A setting of 0
+ *     indicates no delay and the device will be suspended immediately.
  */
 struct pl022_ssp_controller {
 	u16 bus_id;
@@ -246,6 +249,7 @@ struct pl022_ssp_controller {
 	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
 	void *dma_rx_param;
 	void *dma_tx_param;
+	int autosuspend_delay;
 };
 
 /**
-- 
cgit v1.2.3


From 2a367c3a82557cd11a04949ef2160658987fa772 Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Wed, 30 Nov 2011 23:41:18 +0000
Subject: can: cc770: add driver core for the Bosch CC770 and Intel AN82527

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/Kconfig            |   2 +
 drivers/net/can/Makefile           |   1 +
 drivers/net/can/cc770/Kconfig      |   3 +
 drivers/net/can/cc770/Makefile     |   7 +
 drivers/net/can/cc770/cc770.c      | 881 +++++++++++++++++++++++++++++++++++++
 drivers/net/can/cc770/cc770.h      | 203 +++++++++
 include/linux/can/platform/cc770.h |  33 ++
 7 files changed, 1130 insertions(+)
 create mode 100644 drivers/net/can/cc770/Kconfig
 create mode 100644 drivers/net/can/cc770/Makefile
 create mode 100644 drivers/net/can/cc770/cc770.c
 create mode 100644 drivers/net/can/cc770/cc770.h
 create mode 100644 include/linux/can/platform/cc770.h

(limited to 'include/linux')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index f6c98fb4a517..ab45758c49a4 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -116,6 +116,8 @@ source "drivers/net/can/sja1000/Kconfig"
 
 source "drivers/net/can/c_can/Kconfig"
 
+source "drivers/net/can/cc770/Kconfig"
+
 source "drivers/net/can/usb/Kconfig"
 
 source "drivers/net/can/softing/Kconfig"
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 24ebfe8d758a..938be37b670c 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -14,6 +14,7 @@ obj-y				+= softing/
 obj-$(CONFIG_CAN_SJA1000)	+= sja1000/
 obj-$(CONFIG_CAN_MSCAN)		+= mscan/
 obj-$(CONFIG_CAN_C_CAN)		+= c_can/
+obj-$(CONFIG_CAN_CC770)		+= cc770/
 obj-$(CONFIG_CAN_AT91)		+= at91_can.o
 obj-$(CONFIG_CAN_TI_HECC)	+= ti_hecc.o
 obj-$(CONFIG_CAN_MCP251X)	+= mcp251x.o
diff --git a/drivers/net/can/cc770/Kconfig b/drivers/net/can/cc770/Kconfig
new file mode 100644
index 000000000000..225131b7ac93
--- /dev/null
+++ b/drivers/net/can/cc770/Kconfig
@@ -0,0 +1,3 @@
+menuconfig CAN_CC770
+	tristate "Bosch CC770 and Intel AN82527 devices"
+	depends on CAN_DEV && HAS_IOMEM
diff --git a/drivers/net/can/cc770/Makefile b/drivers/net/can/cc770/Makefile
new file mode 100644
index 000000000000..34e818026157
--- /dev/null
+++ b/drivers/net/can/cc770/Makefile
@@ -0,0 +1,7 @@
+#
+#  Makefile for the Bosch CC770 CAN controller drivers.
+#
+
+obj-$(CONFIG_CAN_CC770) += cc770.o
+
+ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
new file mode 100644
index 000000000000..766896747643
--- /dev/null
+++ b/drivers/net/can/cc770/cc770.c
@@ -0,0 +1,881 @@
+/*
+ * Core driver for the CC770 and AN82527 CAN controllers
+ *
+ * Copyright (C) 2009, 2011 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/interrupt.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/dev.h>
+#include <linux/can/platform/cc770.h>
+
+#include "cc770.h"
+
+MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION(KBUILD_MODNAME "CAN netdevice driver");
+
+/*
+ * The CC770 is a CAN controller from Bosch, which is 100% compatible
+ * with the AN82527 from Intel, but with "bugs" being fixed and some
+ * additional functionality, mainly:
+ *
+ * 1. RX and TX error counters are readable.
+ * 2. Support of silent (listen-only) mode.
+ * 3. Message object 15 can receive all types of frames, also RTR and EFF.
+ *
+ * Details are available from Bosch's "CC770_Product_Info_2007-01.pdf",
+ * which explains in detail the compatibility between the CC770 and the
+ * 82527. This driver use the additional functionality 3. on real CC770
+ * devices. Unfortunately, the CC770 does still not store the message
+ * identifier of received remote transmission request frames and
+ * therefore it's set to 0.
+ *
+ * The message objects 1..14 can be used for TX and RX while the message
+ * objects 15 is optimized for RX. It has a shadow register for reliable
+ * data receiption under heavy bus load. Therefore it makes sense to use
+ * this message object for the needed use case. The frame type (EFF/SFF)
+ * for the message object 15 can be defined via kernel module parameter
+ * "msgobj15_eff". If not equal 0, it will receive 29-bit EFF frames,
+ * otherwise 11 bit SFF messages.
+ */
+static int msgobj15_eff;
+module_param(msgobj15_eff, int, S_IRUGO);
+MODULE_PARM_DESC(msgobj15_eff, "Extended 29-bit frames for message object 15 "
+		 "(default: 11-bit standard frames)");
+
+static int i82527_compat;
+module_param(i82527_compat, int, S_IRUGO);
+MODULE_PARM_DESC(i82527_compat, "Strict Intel 82527 comptibility mode "
+		 "without using additional functions");
+
+/*
+ * This driver uses the last 5 message objects 11..15. The definitions
+ * and structure below allows to configure and assign them to the real
+ * message object.
+ */
+static unsigned char cc770_obj_flags[CC770_OBJ_MAX] = {
+	[CC770_OBJ_RX0] = CC770_OBJ_FLAG_RX,
+	[CC770_OBJ_RX1] = CC770_OBJ_FLAG_RX | CC770_OBJ_FLAG_EFF,
+	[CC770_OBJ_RX_RTR0] = CC770_OBJ_FLAG_RX | CC770_OBJ_FLAG_RTR,
+	[CC770_OBJ_RX_RTR1] = CC770_OBJ_FLAG_RX | CC770_OBJ_FLAG_RTR |
+			      CC770_OBJ_FLAG_EFF,
+	[CC770_OBJ_TX] = 0,
+};
+
+static struct can_bittiming_const cc770_bittiming_const = {
+	.name = KBUILD_MODNAME,
+	.tseg1_min = 1,
+	.tseg1_max = 16,
+	.tseg2_min = 1,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 64,
+	.brp_inc = 1,
+};
+
+static inline int intid2obj(unsigned int intid)
+{
+	if (intid == 2)
+		return 0;
+	else
+		return MSGOBJ_LAST + 2 - intid;
+}
+
+static void enable_all_objs(const struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	u8 msgcfg;
+	unsigned char obj_flags;
+	unsigned int o, mo;
+
+	for (o = 0; o < ARRAY_SIZE(priv->obj_flags); o++) {
+		obj_flags = priv->obj_flags[o];
+		mo = obj2msgobj(o);
+
+		if (obj_flags & CC770_OBJ_FLAG_RX) {
+			/*
+			 * We don't need extra objects for RTR and EFF if
+			 * the additional CC770 functions are enabled.
+			 */
+			if (priv->control_normal_mode & CTRL_EAF) {
+				if (o > 0)
+					continue;
+				netdev_dbg(dev, "Message object %d for "
+					   "RX data, RTR, SFF and EFF\n", mo);
+			} else {
+				netdev_dbg(dev,
+					   "Message object %d for RX %s %s\n",
+					   mo, obj_flags & CC770_OBJ_FLAG_RTR ?
+					   "RTR" : "data",
+					   obj_flags & CC770_OBJ_FLAG_EFF ?
+					   "EFF" : "SFF");
+			}
+
+			if (obj_flags & CC770_OBJ_FLAG_EFF)
+				msgcfg = MSGCFG_XTD;
+			else
+				msgcfg = 0;
+			if (obj_flags & CC770_OBJ_FLAG_RTR)
+				msgcfg |= MSGCFG_DIR;
+
+			cc770_write_reg(priv, msgobj[mo].config, msgcfg);
+			cc770_write_reg(priv, msgobj[mo].ctrl0,
+					MSGVAL_SET | TXIE_RES |
+					RXIE_SET | INTPND_RES);
+
+			if (obj_flags & CC770_OBJ_FLAG_RTR)
+				cc770_write_reg(priv, msgobj[mo].ctrl1,
+						NEWDAT_RES | CPUUPD_SET |
+						TXRQST_RES | RMTPND_RES);
+			else
+				cc770_write_reg(priv, msgobj[mo].ctrl1,
+						NEWDAT_RES | MSGLST_RES |
+						TXRQST_RES | RMTPND_RES);
+		} else {
+			netdev_dbg(dev, "Message object %d for "
+				   "TX data, RTR, SFF and EFF\n", mo);
+
+			cc770_write_reg(priv, msgobj[mo].ctrl1,
+					RMTPND_RES | TXRQST_RES |
+					CPUUPD_RES | NEWDAT_RES);
+			cc770_write_reg(priv, msgobj[mo].ctrl0,
+					MSGVAL_RES | TXIE_RES |
+					RXIE_RES | INTPND_RES);
+		}
+	}
+}
+
+static void disable_all_objs(const struct cc770_priv *priv)
+{
+	int o, mo;
+
+	for (o = 0; o <  ARRAY_SIZE(priv->obj_flags); o++) {
+		mo = obj2msgobj(o);
+
+		if (priv->obj_flags[o] & CC770_OBJ_FLAG_RX) {
+			if (o > 0 && priv->control_normal_mode & CTRL_EAF)
+				continue;
+
+			cc770_write_reg(priv, msgobj[mo].ctrl1,
+					NEWDAT_RES | MSGLST_RES |
+					TXRQST_RES | RMTPND_RES);
+			cc770_write_reg(priv, msgobj[mo].ctrl0,
+					MSGVAL_RES | TXIE_RES |
+					RXIE_RES | INTPND_RES);
+		} else {
+			/* Clear message object for send */
+			cc770_write_reg(priv, msgobj[mo].ctrl1,
+					RMTPND_RES | TXRQST_RES |
+					CPUUPD_RES | NEWDAT_RES);
+			cc770_write_reg(priv, msgobj[mo].ctrl0,
+					MSGVAL_RES | TXIE_RES |
+					RXIE_RES | INTPND_RES);
+		}
+	}
+}
+
+static void set_reset_mode(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	/* Enable configuration and puts chip in bus-off, disable interrupts */
+	cc770_write_reg(priv, control, CTRL_CCE | CTRL_INI);
+
+	priv->can.state = CAN_STATE_STOPPED;
+
+	/* Clear interrupts */
+	cc770_read_reg(priv, interrupt);
+
+	/* Clear status register */
+	cc770_write_reg(priv, status, 0);
+
+	/* Disable all used message objects */
+	disable_all_objs(priv);
+}
+
+static void set_normal_mode(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	/* Clear interrupts */
+	cc770_read_reg(priv, interrupt);
+
+	/* Clear status register and pre-set last error code */
+	cc770_write_reg(priv, status, STAT_LEC_MASK);
+
+	/* Enable all used message objects*/
+	enable_all_objs(dev);
+
+	/*
+	 * Clear bus-off, interrupts only for errors,
+	 * not for status change
+	 */
+	cc770_write_reg(priv, control, priv->control_normal_mode);
+
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+}
+
+static void chipset_init(struct cc770_priv *priv)
+{
+	int mo, id, data;
+
+	/* Enable configuration and put chip in bus-off, disable interrupts */
+	cc770_write_reg(priv, control, (CTRL_CCE | CTRL_INI));
+
+	/* Set CLKOUT divider and slew rates */
+	cc770_write_reg(priv, clkout, priv->clkout);
+
+	/* Configure CPU interface / CLKOUT enable */
+	cc770_write_reg(priv, cpu_interface, priv->cpu_interface);
+
+	/* Set bus configuration  */
+	cc770_write_reg(priv, bus_config, priv->bus_config);
+
+	/* Clear interrupts */
+	cc770_read_reg(priv, interrupt);
+
+	/* Clear status register */
+	cc770_write_reg(priv, status, 0);
+
+	/* Clear and invalidate message objects */
+	for (mo = MSGOBJ_FIRST; mo <= MSGOBJ_LAST; mo++) {
+		cc770_write_reg(priv, msgobj[mo].ctrl0,
+				INTPND_UNC | RXIE_RES |
+				TXIE_RES | MSGVAL_RES);
+		cc770_write_reg(priv, msgobj[mo].ctrl0,
+				INTPND_RES | RXIE_RES |
+				TXIE_RES | MSGVAL_RES);
+		cc770_write_reg(priv, msgobj[mo].ctrl1,
+				NEWDAT_RES | MSGLST_RES |
+				TXRQST_RES | RMTPND_RES);
+		for (data = 0; data < 8; data++)
+			cc770_write_reg(priv, msgobj[mo].data[data], 0);
+		for (id = 0; id < 4; id++)
+			cc770_write_reg(priv, msgobj[mo].id[id], 0);
+		cc770_write_reg(priv, msgobj[mo].config, 0);
+	}
+
+	/* Set all global ID masks to "don't care" */
+	cc770_write_reg(priv, global_mask_std[0], 0);
+	cc770_write_reg(priv, global_mask_std[1], 0);
+	cc770_write_reg(priv, global_mask_ext[0], 0);
+	cc770_write_reg(priv, global_mask_ext[1], 0);
+	cc770_write_reg(priv, global_mask_ext[2], 0);
+	cc770_write_reg(priv, global_mask_ext[3], 0);
+
+}
+
+static int cc770_probe_chip(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	/* Enable configuration, put chip in bus-off, disable ints */
+	cc770_write_reg(priv, control, CTRL_CCE | CTRL_EAF | CTRL_INI);
+	/* Configure cpu interface / CLKOUT disable */
+	cc770_write_reg(priv, cpu_interface, priv->cpu_interface);
+
+	/*
+	 * Check if hardware reset is still inactive or maybe there
+	 * is no chip in this address space
+	 */
+	if (cc770_read_reg(priv, cpu_interface) & CPUIF_RST) {
+		netdev_info(dev, "probing @0x%p failed (reset)\n",
+			    priv->reg_base);
+		return -ENODEV;
+	}
+
+	/* Write and read back test pattern (some arbitrary values) */
+	cc770_write_reg(priv, msgobj[1].data[1], 0x25);
+	cc770_write_reg(priv, msgobj[2].data[3], 0x52);
+	cc770_write_reg(priv, msgobj[10].data[6], 0xc3);
+	if ((cc770_read_reg(priv, msgobj[1].data[1]) != 0x25) ||
+	    (cc770_read_reg(priv, msgobj[2].data[3]) != 0x52) ||
+	    (cc770_read_reg(priv, msgobj[10].data[6]) != 0xc3)) {
+		netdev_info(dev, "probing @0x%p failed (pattern)\n",
+			    priv->reg_base);
+		return -ENODEV;
+	}
+
+	/* Check if this chip is a CC770 supporting additional functions */
+	if (cc770_read_reg(priv, control) & CTRL_EAF)
+		priv->control_normal_mode |= CTRL_EAF;
+
+	return 0;
+}
+
+static void cc770_start(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	/* leave reset mode */
+	if (priv->can.state != CAN_STATE_STOPPED)
+		set_reset_mode(dev);
+
+	/* leave reset mode */
+	set_normal_mode(dev);
+}
+
+static int cc770_set_mode(struct net_device *dev, enum can_mode mode)
+{
+	switch (mode) {
+	case CAN_MODE_START:
+		cc770_start(dev);
+		netif_wake_queue(dev);
+		break;
+
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int cc770_set_bittiming(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct can_bittiming *bt = &priv->can.bittiming;
+	u8 btr0, btr1;
+
+	btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6);
+	btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) |
+		(((bt->phase_seg2 - 1) & 0x7) << 4);
+	if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+		btr1 |= 0x80;
+
+	netdev_info(dev, "setting BTR0=0x%02x BTR1=0x%02x\n", btr0, btr1);
+
+	cc770_write_reg(priv, bit_timing_0, btr0);
+	cc770_write_reg(priv, bit_timing_1, btr1);
+
+	return 0;
+}
+
+static int cc770_get_berr_counter(const struct net_device *dev,
+				  struct can_berr_counter *bec)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	bec->txerr = cc770_read_reg(priv, tx_error_counter);
+	bec->rxerr = cc770_read_reg(priv, rx_error_counter);
+
+	return 0;
+}
+
+static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	unsigned int mo = obj2msgobj(CC770_OBJ_TX);
+	u8 dlc, rtr;
+	u32 id;
+	int i;
+
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
+	if ((cc770_read_reg(priv,
+			    msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
+		netdev_err(dev, "TX register is still occupied!\n");
+		return NETDEV_TX_BUSY;
+	}
+
+	netif_stop_queue(dev);
+
+	dlc = cf->can_dlc;
+	id = cf->can_id;
+	if (cf->can_id & CAN_RTR_FLAG)
+		rtr = 0;
+	else
+		rtr = MSGCFG_DIR;
+	cc770_write_reg(priv, msgobj[mo].ctrl1,
+			RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES);
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES);
+	if (id & CAN_EFF_FLAG) {
+		id &= CAN_EFF_MASK;
+		cc770_write_reg(priv, msgobj[mo].config,
+				(dlc << 4) | rtr | MSGCFG_XTD);
+		cc770_write_reg(priv, msgobj[mo].id[3], id << 3);
+		cc770_write_reg(priv, msgobj[mo].id[2], id >> 5);
+		cc770_write_reg(priv, msgobj[mo].id[1], id >> 13);
+		cc770_write_reg(priv, msgobj[mo].id[0], id >> 21);
+	} else {
+		id &= CAN_SFF_MASK;
+		cc770_write_reg(priv, msgobj[mo].config, (dlc << 4) | rtr);
+		cc770_write_reg(priv, msgobj[mo].id[0], id >> 3);
+		cc770_write_reg(priv, msgobj[mo].id[1], id << 5);
+	}
+
+	for (i = 0; i < dlc; i++)
+		cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]);
+
+	cc770_write_reg(priv, msgobj[mo].ctrl1,
+			RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
+
+	stats->tx_bytes += dlc;
+
+	can_put_echo_skb(skb, dev, 0);
+
+	/*
+	 * HM: We had some cases of repeated IRQs so make sure the
+	 * INT is acknowledged I know it's already further up, but
+	 * doing again fixed the issue
+	 */
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES);
+
+	return NETDEV_TX_OK;
+}
+
+static void cc770_rx(struct net_device *dev, unsigned int mo, u8 ctrl1)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u8 config;
+	u32 id;
+	int i;
+
+	skb = alloc_can_skb(dev, &cf);
+	if (!skb)
+		return;
+
+	config = cc770_read_reg(priv, msgobj[mo].config);
+
+	if (ctrl1 & RMTPND_SET) {
+		/*
+		 * Unfortunately, the chip does not store the real message
+		 * identifier of the received remote transmission request
+		 * frame. Therefore we set it to 0.
+		 */
+		cf->can_id = CAN_RTR_FLAG;
+		if (config & MSGCFG_XTD)
+			cf->can_id |= CAN_EFF_FLAG;
+		cf->can_dlc = 0;
+	} else {
+		if (config & MSGCFG_XTD) {
+			id = cc770_read_reg(priv, msgobj[mo].id[3]);
+			id |= cc770_read_reg(priv, msgobj[mo].id[2]) << 8;
+			id |= cc770_read_reg(priv, msgobj[mo].id[1]) << 16;
+			id |= cc770_read_reg(priv, msgobj[mo].id[0]) << 24;
+			id >>= 3;
+			id |= CAN_EFF_FLAG;
+		} else {
+			id = cc770_read_reg(priv, msgobj[mo].id[1]);
+			id |= cc770_read_reg(priv, msgobj[mo].id[0]) << 8;
+			id >>= 5;
+		}
+
+		cf->can_id = id;
+		cf->can_dlc = get_can_dlc((config & 0xf0) >> 4);
+		for (i = 0; i < cf->can_dlc; i++)
+			cf->data[i] = cc770_read_reg(priv, msgobj[mo].data[i]);
+	}
+	netif_rx(skb);
+
+	stats->rx_packets++;
+	stats->rx_bytes += cf->can_dlc;
+}
+
+static int cc770_err(struct net_device *dev, u8 status)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u8 lec;
+
+	netdev_dbg(dev, "status interrupt (%#x)\n", status);
+
+	skb = alloc_can_err_skb(dev, &cf);
+	if (!skb)
+		return -ENOMEM;
+
+	/* Use extended functions of the CC770 */
+	if (priv->control_normal_mode & CTRL_EAF) {
+		cf->data[6] = cc770_read_reg(priv, tx_error_counter);
+		cf->data[7] = cc770_read_reg(priv, rx_error_counter);
+	}
+
+	if (status & STAT_BOFF) {
+		/* Disable interrupts */
+		cc770_write_reg(priv, control, CTRL_INI);
+		cf->can_id |= CAN_ERR_BUSOFF;
+		priv->can.state = CAN_STATE_BUS_OFF;
+		can_bus_off(dev);
+	} else if (status & STAT_WARN) {
+		cf->can_id |= CAN_ERR_CRTL;
+		/* Only the CC770 does show error passive */
+		if (cf->data[7] > 127) {
+			cf->data[1] = CAN_ERR_CRTL_RX_PASSIVE |
+				CAN_ERR_CRTL_TX_PASSIVE;
+			priv->can.state = CAN_STATE_ERROR_PASSIVE;
+			priv->can.can_stats.error_passive++;
+		} else {
+			cf->data[1] = CAN_ERR_CRTL_RX_WARNING |
+				CAN_ERR_CRTL_TX_WARNING;
+			priv->can.state = CAN_STATE_ERROR_WARNING;
+			priv->can.can_stats.error_warning++;
+		}
+	} else {
+		/* Back to error avtive */
+		cf->can_id |= CAN_ERR_PROT;
+		cf->data[2] = CAN_ERR_PROT_ACTIVE;
+		priv->can.state = CAN_STATE_ERROR_ACTIVE;
+	}
+
+	lec = status & STAT_LEC_MASK;
+	if (lec < 7 && lec > 0) {
+		if (lec == STAT_LEC_ACK) {
+			cf->can_id |= CAN_ERR_ACK;
+		} else {
+			cf->can_id |= CAN_ERR_PROT;
+			switch (lec) {
+			case STAT_LEC_STUFF:
+				cf->data[2] |= CAN_ERR_PROT_STUFF;
+				break;
+			case STAT_LEC_FORM:
+				cf->data[2] |= CAN_ERR_PROT_FORM;
+				break;
+			case STAT_LEC_BIT1:
+				cf->data[2] |= CAN_ERR_PROT_BIT1;
+				break;
+			case STAT_LEC_BIT0:
+				cf->data[2] |= CAN_ERR_PROT_BIT0;
+				break;
+			case STAT_LEC_CRC:
+				cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+				break;
+			}
+		}
+	}
+
+	netif_rx(skb);
+
+	stats->rx_packets++;
+	stats->rx_bytes += cf->can_dlc;
+
+	return 0;
+}
+
+static int cc770_status_interrupt(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	u8 status;
+
+	status = cc770_read_reg(priv, status);
+	/* Reset the status register including RXOK and TXOK */
+	cc770_write_reg(priv, status, STAT_LEC_MASK);
+
+	if (status & (STAT_WARN | STAT_BOFF) ||
+	    (status & STAT_LEC_MASK) != STAT_LEC_MASK) {
+		cc770_err(dev, status);
+		return status & STAT_BOFF;
+	}
+
+	return 0;
+}
+
+static void cc770_rx_interrupt(struct net_device *dev, unsigned int o)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	unsigned int mo = obj2msgobj(o);
+	u8 ctrl1;
+	int n = CC770_MAX_MSG;
+
+	while (n--) {
+		ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1);
+
+		if (!(ctrl1 & NEWDAT_SET))  {
+			/* Check for RTR if additional functions are enabled */
+			if (priv->control_normal_mode & CTRL_EAF) {
+				if (!(cc770_read_reg(priv, msgobj[mo].ctrl0) &
+				      INTPND_SET))
+					break;
+			} else {
+				break;
+			}
+		}
+
+		if (ctrl1 & MSGLST_SET) {
+			stats->rx_over_errors++;
+			stats->rx_errors++;
+		}
+		if (mo < MSGOBJ_LAST)
+			cc770_write_reg(priv, msgobj[mo].ctrl1,
+					NEWDAT_RES | MSGLST_RES |
+					TXRQST_UNC | RMTPND_UNC);
+		cc770_rx(dev, mo, ctrl1);
+
+		cc770_write_reg(priv, msgobj[mo].ctrl0,
+				MSGVAL_SET | TXIE_RES |
+				RXIE_SET | INTPND_RES);
+		cc770_write_reg(priv, msgobj[mo].ctrl1,
+				NEWDAT_RES | MSGLST_RES |
+				TXRQST_RES | RMTPND_RES);
+	}
+}
+
+static void cc770_rtr_interrupt(struct net_device *dev, unsigned int o)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	unsigned int mo = obj2msgobj(o);
+	u8 ctrl0, ctrl1;
+	int n = CC770_MAX_MSG;
+
+	while (n--) {
+		ctrl0 = cc770_read_reg(priv, msgobj[mo].ctrl0);
+		if (!(ctrl0 & INTPND_SET))
+			break;
+
+		ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1);
+		cc770_rx(dev, mo, ctrl1);
+
+		cc770_write_reg(priv, msgobj[mo].ctrl0,
+				MSGVAL_SET | TXIE_RES |
+				RXIE_SET | INTPND_RES);
+		cc770_write_reg(priv, msgobj[mo].ctrl1,
+				NEWDAT_RES | CPUUPD_SET |
+				TXRQST_RES | RMTPND_RES);
+	}
+}
+
+static void cc770_tx_interrupt(struct net_device *dev, unsigned int o)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	unsigned int mo = obj2msgobj(o);
+
+	/* Nothing more to send, switch off interrupts */
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
+	/*
+	 * We had some cases of repeated IRQ so make sure the
+	 * INT is acknowledged
+	 */
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES);
+
+	stats->tx_packets++;
+	can_get_echo_skb(dev, 0);
+	netif_wake_queue(dev);
+}
+
+irqreturn_t cc770_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = (struct net_device *)dev_id;
+	struct cc770_priv *priv = netdev_priv(dev);
+	u8 intid;
+	int o, n = 0;
+
+	/* Shared interrupts and IRQ off? */
+	if (priv->can.state == CAN_STATE_STOPPED)
+		return IRQ_NONE;
+
+	if (priv->pre_irq)
+		priv->pre_irq(priv);
+
+	while (n < CC770_MAX_IRQ) {
+		/* Read the highest pending interrupt request */
+		intid = cc770_read_reg(priv, interrupt);
+		if (!intid)
+			break;
+		n++;
+
+		if (intid == 1) {
+			/* Exit in case of bus-off */
+			if (cc770_status_interrupt(dev))
+				break;
+		} else {
+			o = intid2obj(intid);
+
+			if (o >= CC770_OBJ_MAX) {
+				netdev_err(dev, "Unexpected interrupt id %d\n",
+					   intid);
+				continue;
+			}
+
+			if (priv->obj_flags[o] & CC770_OBJ_FLAG_RTR)
+				cc770_rtr_interrupt(dev, o);
+			else if (priv->obj_flags[o] & CC770_OBJ_FLAG_RX)
+				cc770_rx_interrupt(dev, o);
+			else
+				cc770_tx_interrupt(dev, o);
+		}
+	}
+
+	if (priv->post_irq)
+		priv->post_irq(priv);
+
+	if (n >= CC770_MAX_IRQ)
+		netdev_dbg(dev, "%d messages handled in ISR", n);
+
+	return (n) ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int cc770_open(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	int err;
+
+	/* set chip into reset mode */
+	set_reset_mode(dev);
+
+	/* common open */
+	err = open_candev(dev);
+	if (err)
+		return err;
+
+	err = request_irq(dev->irq, &cc770_interrupt, priv->irq_flags,
+			  dev->name, dev);
+	if (err) {
+		close_candev(dev);
+		return -EAGAIN;
+	}
+
+	/* init and start chip */
+	cc770_start(dev);
+
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+static int cc770_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	set_reset_mode(dev);
+
+	free_irq(dev->irq, dev);
+	close_candev(dev);
+
+	return 0;
+}
+
+struct net_device *alloc_cc770dev(int sizeof_priv)
+{
+	struct net_device *dev;
+	struct cc770_priv *priv;
+
+	dev = alloc_candev(sizeof(struct cc770_priv) + sizeof_priv,
+			   CC770_ECHO_SKB_MAX);
+	if (!dev)
+		return NULL;
+
+	priv = netdev_priv(dev);
+
+	priv->dev = dev;
+	priv->can.bittiming_const = &cc770_bittiming_const;
+	priv->can.do_set_bittiming = cc770_set_bittiming;
+	priv->can.do_set_mode = cc770_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
+
+	memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags));
+
+	if (sizeof_priv)
+		priv->priv = (void *)priv + sizeof(struct cc770_priv);
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(alloc_cc770dev);
+
+void free_cc770dev(struct net_device *dev)
+{
+	free_candev(dev);
+}
+EXPORT_SYMBOL_GPL(free_cc770dev);
+
+static const struct net_device_ops cc770_netdev_ops = {
+	.ndo_open = cc770_open,
+	.ndo_stop = cc770_close,
+	.ndo_start_xmit = cc770_start_xmit,
+};
+
+int register_cc770dev(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = cc770_probe_chip(dev);
+	if (err)
+		return err;
+
+	dev->netdev_ops = &cc770_netdev_ops;
+
+	dev->flags |= IFF_ECHO;	/* we support local echo */
+
+	/* Should we use additional functions? */
+	if (!i82527_compat && priv->control_normal_mode & CTRL_EAF) {
+		priv->can.do_get_berr_counter = cc770_get_berr_counter;
+		priv->control_normal_mode = CTRL_IE | CTRL_EAF | CTRL_EIE;
+		netdev_dbg(dev, "i82527 mode with additional functions\n");
+	} else {
+		priv->control_normal_mode = CTRL_IE | CTRL_EIE;
+		netdev_dbg(dev, "strict i82527 compatibility mode\n");
+	}
+
+	chipset_init(priv);
+	set_reset_mode(dev);
+
+	return register_candev(dev);
+}
+EXPORT_SYMBOL_GPL(register_cc770dev);
+
+void unregister_cc770dev(struct net_device *dev)
+{
+	set_reset_mode(dev);
+	unregister_candev(dev);
+}
+EXPORT_SYMBOL_GPL(unregister_cc770dev);
+
+static __init int cc770_init(void)
+{
+	if (msgobj15_eff) {
+		cc770_obj_flags[CC770_OBJ_RX0] |= CC770_OBJ_FLAG_EFF;
+		cc770_obj_flags[CC770_OBJ_RX1] &= ~CC770_OBJ_FLAG_EFF;
+	}
+
+	pr_info("CAN netdevice driver\n");
+
+	return 0;
+}
+module_init(cc770_init);
+
+static __exit void cc770_exit(void)
+{
+	pr_info("driver removed\n");
+}
+module_exit(cc770_exit);
diff --git a/drivers/net/can/cc770/cc770.h b/drivers/net/can/cc770/cc770.h
new file mode 100644
index 000000000000..a1739db98d91
--- /dev/null
+++ b/drivers/net/can/cc770/cc770.h
@@ -0,0 +1,203 @@
+/*
+ * Core driver for the CC770 and AN82527 CAN controllers
+ *
+ * Copyright (C) 2009, 2011 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef CC770_DEV_H
+#define CC770_DEV_H
+
+#include <linux/can/dev.h>
+
+struct cc770_msgobj {
+	u8 ctrl0;
+	u8 ctrl1;
+	u8 id[4];
+	u8 config;
+	u8 data[8];
+	u8 dontuse;		/* padding */
+} __packed;
+
+struct cc770_regs {
+	union {
+		struct cc770_msgobj msgobj[16]; /* Message object 1..15 */
+		struct {
+			u8 control;		/* Control Register */
+			u8 status;		/* Status Register */
+			u8 cpu_interface;	/* CPU Interface Register */
+			u8 dontuse1;
+			u8 high_speed_read[2];	/* High Speed Read */
+			u8 global_mask_std[2];	/* Standard Global Mask */
+			u8 global_mask_ext[4];	/* Extended Global Mask */
+			u8 msg15_mask[4];	/* Message 15 Mask */
+			u8 dontuse2[15];
+			u8 clkout;		/* Clock Out Register */
+			u8 dontuse3[15];
+			u8 bus_config;		/* Bus Configuration Register */
+			u8 dontuse4[15];
+			u8 bit_timing_0;	/* Bit Timing Register byte 0 */
+			u8 dontuse5[15];
+			u8 bit_timing_1;	/* Bit Timing Register byte 1 */
+			u8 dontuse6[15];
+			u8 interrupt;		/* Interrupt Register */
+			u8 dontuse7[15];
+			u8 rx_error_counter;	/* Receive Error Counter */
+			u8 dontuse8[15];
+			u8 tx_error_counter;	/* Transmit Error Counter */
+			u8 dontuse9[31];
+			u8 p1_conf;
+			u8 dontuse10[15];
+			u8 p2_conf;
+			u8 dontuse11[15];
+			u8 p1_in;
+			u8 dontuse12[15];
+			u8 p2_in;
+			u8 dontuse13[15];
+			u8 p1_out;
+			u8 dontuse14[15];
+			u8 p2_out;
+			u8 dontuse15[15];
+			u8 serial_reset_addr;
+		};
+	};
+} __packed;
+
+/* Control Register (0x00) */
+#define CTRL_INI	0x01	/* Initialization */
+#define CTRL_IE		0x02	/* Interrupt Enable */
+#define CTRL_SIE	0x04	/* Status Interrupt Enable */
+#define CTRL_EIE	0x08	/* Error Interrupt Enable */
+#define CTRL_EAF	0x20	/* Enable additional functions */
+#define CTRL_CCE	0x40	/* Change Configuration Enable */
+
+/* Status Register (0x01) */
+#define STAT_LEC_STUFF	0x01	/* Stuff error */
+#define STAT_LEC_FORM	0x02	/* Form error */
+#define STAT_LEC_ACK	0x03	/* Acknowledgement error */
+#define STAT_LEC_BIT1	0x04	/* Bit1 error */
+#define STAT_LEC_BIT0	0x05	/* Bit0 error */
+#define STAT_LEC_CRC	0x06	/* CRC error */
+#define STAT_LEC_MASK	0x07	/* Last Error Code mask */
+#define STAT_TXOK	0x08	/* Transmit Message Successfully */
+#define STAT_RXOK	0x10	/* Receive Message Successfully */
+#define STAT_WAKE	0x20	/* Wake Up Status */
+#define STAT_WARN	0x40	/* Warning Status */
+#define STAT_BOFF	0x80	/* Bus Off Status */
+
+/*
+ * CPU Interface Register (0x02)
+ * Clock Out Register (0x1f)
+ * Bus Configuration Register (0x2f)
+ *
+ * see include/linux/can/platform/cc770.h
+ */
+
+/* Message Control Register 0 (Base Address + 0x0) */
+#define INTPND_RES	0x01	/* No Interrupt pending */
+#define INTPND_SET	0x02	/* Interrupt pending */
+#define INTPND_UNC	0x03
+#define RXIE_RES	0x04	/* Receive Interrupt Disable */
+#define RXIE_SET	0x08	/* Receive Interrupt Enable */
+#define RXIE_UNC	0x0c
+#define TXIE_RES	0x10	/* Transmit Interrupt Disable */
+#define TXIE_SET	0x20	/* Transmit Interrupt Enable */
+#define TXIE_UNC	0x30
+#define MSGVAL_RES	0x40	/* Message Invalid */
+#define MSGVAL_SET	0x80	/* Message Valid */
+#define MSGVAL_UNC	0xc0
+
+/* Message Control Register 1 (Base Address + 0x01) */
+#define NEWDAT_RES	0x01	/* No New Data */
+#define NEWDAT_SET	0x02	/* New Data */
+#define NEWDAT_UNC	0x03
+#define MSGLST_RES	0x04	/* No Message Lost */
+#define MSGLST_SET	0x08	/* Message Lost */
+#define MSGLST_UNC	0x0c
+#define CPUUPD_RES	0x04	/* No CPU Updating */
+#define CPUUPD_SET	0x08	/* CPU Updating */
+#define CPUUPD_UNC	0x0c
+#define TXRQST_RES	0x10	/* No Transmission Request */
+#define TXRQST_SET	0x20	/* Transmission Request */
+#define TXRQST_UNC	0x30
+#define RMTPND_RES	0x40	/* No Remote Request Pending */
+#define RMTPND_SET	0x80	/* Remote Request Pending */
+#define RMTPND_UNC	0xc0
+
+/* Message Configuration Register (Base Address + 0x06) */
+#define MSGCFG_XTD	0x04	/* Extended Identifier */
+#define MSGCFG_DIR	0x08	/* Direction is Transmit */
+
+#define MSGOBJ_FIRST	1
+#define MSGOBJ_LAST	15
+
+#define CC770_IO_SIZE	0x100
+#define CC770_MAX_IRQ	20	/* max. number of interrupts handled in ISR */
+#define CC770_MAX_MSG	4	/* max. number of messages handled in ISR */
+
+#define CC770_ECHO_SKB_MAX	1
+
+#define cc770_read_reg(priv, member)					\
+	priv->read_reg(priv, offsetof(struct cc770_regs, member))
+
+#define cc770_write_reg(priv, member, value)				\
+	priv->write_reg(priv, offsetof(struct cc770_regs, member), value)
+
+/*
+ * Message objects and flags used by this driver
+ */
+#define CC770_OBJ_FLAG_RX	0x01
+#define CC770_OBJ_FLAG_RTR	0x02
+#define CC770_OBJ_FLAG_EFF	0x04
+
+enum {
+	CC770_OBJ_RX0 = 0,	/* for receiving normal messages */
+	CC770_OBJ_RX1,		/* for receiving normal messages */
+	CC770_OBJ_RX_RTR0,	/* for receiving remote transmission requests */
+	CC770_OBJ_RX_RTR1,	/* for receiving remote transmission requests */
+	CC770_OBJ_TX,		/* for sending messages */
+	CC770_OBJ_MAX
+};
+
+#define obj2msgobj(o)	(MSGOBJ_LAST - (o)) /* message object 11..15 */
+
+/*
+ * CC770 private data structure
+ */
+struct cc770_priv {
+	struct can_priv can;	/* must be the first member */
+	struct sk_buff *echo_skb;
+
+	/* the lower-layer is responsible for appropriate locking */
+	u8 (*read_reg)(const struct cc770_priv *priv, int reg);
+	void (*write_reg)(const struct cc770_priv *priv, int reg, u8 val);
+	void (*pre_irq)(const struct cc770_priv *priv);
+	void (*post_irq)(const struct cc770_priv *priv);
+
+	void *priv;		/* for board-specific data */
+	struct net_device *dev;
+
+	void __iomem *reg_base;	 /* ioremap'ed address to registers */
+	unsigned long irq_flags; /* for request_irq() */
+
+	unsigned char obj_flags[CC770_OBJ_MAX];
+	u8 control_normal_mode;	/* Control register for normal mode */
+	u8 cpu_interface;	/* CPU interface register */
+	u8 clkout;		/* Clock out register */
+	u8 bus_config;		/* Bus conffiguration register */
+};
+
+struct net_device *alloc_cc770dev(int sizeof_priv);
+void free_cc770dev(struct net_device *dev);
+int register_cc770dev(struct net_device *dev);
+void unregister_cc770dev(struct net_device *dev);
+
+#endif /* CC770_DEV_H */
diff --git a/include/linux/can/platform/cc770.h b/include/linux/can/platform/cc770.h
new file mode 100644
index 000000000000..7702641f87ee
--- /dev/null
+++ b/include/linux/can/platform/cc770.h
@@ -0,0 +1,33 @@
+#ifndef _CAN_PLATFORM_CC770_H_
+#define _CAN_PLATFORM_CC770_H_
+
+/* CPU Interface Register (0x02) */
+#define CPUIF_CEN	0x01	/* Clock Out Enable */
+#define CPUIF_MUX	0x04	/* Multiplex */
+#define CPUIF_SLP	0x08	/* Sleep */
+#define CPUIF_PWD	0x10	/* Power Down Mode */
+#define CPUIF_DMC	0x20	/* Divide Memory Clock */
+#define CPUIF_DSC	0x40	/* Divide System Clock */
+#define CPUIF_RST	0x80	/* Hardware Reset Status */
+
+/* Clock Out Register (0x1f) */
+#define CLKOUT_CD_MASK  0x0f	/* Clock Divider mask */
+#define CLKOUT_SL_MASK	0x30	/* Slew Rate mask */
+#define CLKOUT_SL_SHIFT	4
+
+/* Bus Configuration Register (0x2f) */
+#define BUSCFG_DR0	0x01	/* Disconnect RX0 Input / Select RX input */
+#define BUSCFG_DR1	0x02	/* Disconnect RX1 Input / Silent mode */
+#define BUSCFG_DT1	0x08	/* Disconnect TX1 Output */
+#define BUSCFG_POL	0x20	/* Polarity dominant or recessive */
+#define BUSCFG_CBY	0x40	/* Input Comparator Bypass */
+
+struct cc770_platform_data {
+	u32 osc_freq;	/* CAN bus oscillator frequency in Hz */
+
+	u8 cir;		/* CPU Interface Register */
+	u8 cor;		/* Clock Out Register */
+	u8 bcr;		/* Bus Configuration Register */
+};
+
+#endif	/* !_CAN_PLATFORM_CC770_H_ */
-- 
cgit v1.2.3


From 65698610f58153eb7621be3fb4f57ca318b19c60 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 1 Dec 2011 14:16:04 -0500
Subject: net: Make ndo_neigh_destroy return void.

The return value isn't used.

Suggested by Ben Hucthings.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1c4ddb37f2b5..21440e31fdab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -975,7 +975,7 @@ struct net_device_ops {
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
 	int			(*ndo_neigh_construct)(struct neighbour *n);
-	int			(*ndo_neigh_destroy)(struct neighbour *n);
+	void			(*ndo_neigh_destroy)(struct neighbour *n);
 };
 
 /*
-- 
cgit v1.2.3


From 00dc9ad18d707f36b2fb4af98fd2cf0548d2b258 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 1 Dec 2011 00:01:31 +0100
Subject: PM / Runtime: Use device PM QoS constraints (v2)

Make the runtime PM core use device PM QoS constraints to check if
it is allowed to suspend a given device, so that an error code is
returned if the device's own PM QoS constraint is negative or one of
its children has already been suspended for too long.  If this is
not the case, the maximum estimated time the device is allowed to be
suspended, computed as the minimum of the device's PM QoS constraint
and the PM QoS constraints of its children (reduced by the difference
between the current time and their suspend times) is stored in a new
device's PM field power.max_time_suspended_ns that can be used by
the device's subsystem or PM domain to decide whether or not to put
the device into lower-power (and presumably higher-latency) states
later (if the constraint is 0, which means "no constraint", the
power.max_time_suspended_ns is set to -1).

Additionally, the time of execution of the subsystem-level
.runtime_suspend() callback for the device is recorded in the new
power.suspend_time field for later use by the device's subsystem or
PM domain along with power.max_time_suspended_ns (it also is used
by the core code when the device's parent is suspended).

Introduce a new helper function,
pm_runtime_update_max_time_suspended(), allowing subsystems and PM
domains (or device drivers) to update the power.max_time_suspended_ns
field, for example after changing the power state of a suspended
device.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/qos.c     |  24 ++++---
 drivers/base/power/runtime.c | 148 +++++++++++++++++++++++++++++++++++++------
 include/linux/pm.h           |   2 +
 include/linux/pm_qos.h       |   3 +
 include/linux/pm_runtime.h   |   5 ++
 5 files changed, 154 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 86de6c50fc41..03f4bd069ca8 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -47,21 +47,29 @@ static DEFINE_MUTEX(dev_pm_qos_mtx);
 static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers);
 
 /**
- * dev_pm_qos_read_value - Get PM QoS constraint for a given device.
+ * __dev_pm_qos_read_value - Get PM QoS constraint for a given device.
+ * @dev: Device to get the PM QoS constraint value for.
+ *
+ * This routine must be called with dev->power.lock held.
+ */
+s32 __dev_pm_qos_read_value(struct device *dev)
+{
+	struct pm_qos_constraints *c = dev->power.constraints;
+
+	return c ? pm_qos_read_value(c) : 0;
+}
+
+/**
+ * dev_pm_qos_read_value - Get PM QoS constraint for a given device (locked).
  * @dev: Device to get the PM QoS constraint value for.
  */
 s32 dev_pm_qos_read_value(struct device *dev)
 {
-	struct pm_qos_constraints *c;
 	unsigned long flags;
-	s32 ret = 0;
+	s32 ret;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
-
-	c = dev->power.constraints;
-	if (c)
-		ret = pm_qos_read_value(c);
-
+	ret = __dev_pm_qos_read_value(dev);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	return ret;
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 8c78443bca8f..068f7ed1f009 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -279,6 +279,47 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev)
 	return retval != -EACCES ? retval : -EIO;
 }
 
+struct rpm_qos_data {
+	ktime_t time_now;
+	s64 constraint_ns;
+};
+
+/**
+ * rpm_update_qos_constraint - Update a given PM QoS constraint data.
+ * @dev: Device whose timing data to use.
+ * @data: PM QoS constraint data to update.
+ *
+ * Use the suspend timing data of @dev to update PM QoS constraint data pointed
+ * to by @data.
+ */
+static int rpm_update_qos_constraint(struct device *dev, void *data)
+{
+	struct rpm_qos_data *qos = data;
+	unsigned long flags;
+	s64 delta_ns;
+	int ret = 0;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	if (dev->power.max_time_suspended_ns < 0)
+		goto out;
+
+	delta_ns = dev->power.max_time_suspended_ns -
+		ktime_to_ns(ktime_sub(qos->time_now, dev->power.suspend_time));
+	if (delta_ns <= 0) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (qos->constraint_ns > delta_ns || qos->constraint_ns == 0)
+		qos->constraint_ns = delta_ns;
+
+ out:
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	return ret;
+}
+
 /**
  * rpm_suspend - Carry out runtime suspend of given device.
  * @dev: Device to suspend.
@@ -305,6 +346,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 {
 	int (*callback)(struct device *);
 	struct device *parent = NULL;
+	struct rpm_qos_data qos;
 	int retval;
 
 	trace_rpm_suspend(dev, rpmflags);
@@ -400,8 +442,38 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 		goto out;
 	}
 
+	qos.constraint_ns = __dev_pm_qos_read_value(dev);
+	if (qos.constraint_ns < 0) {
+		/* Negative constraint means "never suspend". */
+		retval = -EPERM;
+		goto out;
+	}
+	qos.constraint_ns *= NSEC_PER_USEC;
+	qos.time_now = ktime_get();
+
 	__update_runtime_status(dev, RPM_SUSPENDING);
 
+	if (!dev->power.ignore_children) {
+		if (dev->power.irq_safe)
+			spin_unlock(&dev->power.lock);
+		else
+			spin_unlock_irq(&dev->power.lock);
+
+		retval = device_for_each_child(dev, &qos,
+					       rpm_update_qos_constraint);
+
+		if (dev->power.irq_safe)
+			spin_lock(&dev->power.lock);
+		else
+			spin_lock_irq(&dev->power.lock);
+
+		if (retval)
+			goto fail;
+	}
+
+	dev->power.suspend_time = qos.time_now;
+	dev->power.max_time_suspended_ns = qos.constraint_ns ? : -1;
+
 	if (dev->pm_domain)
 		callback = dev->pm_domain->ops.runtime_suspend;
 	else if (dev->type && dev->type->pm)
@@ -414,27 +486,9 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 		callback = NULL;
 
 	retval = rpm_callback(callback, dev);
-	if (retval) {
-		__update_runtime_status(dev, RPM_ACTIVE);
-		dev->power.deferred_resume = false;
-		if (retval == -EAGAIN || retval == -EBUSY) {
-			dev->power.runtime_error = 0;
+	if (retval)
+		goto fail;
 
-			/*
-			 * If the callback routine failed an autosuspend, and
-			 * if the last_busy time has been updated so that there
-			 * is a new autosuspend expiration time, automatically
-			 * reschedule another autosuspend.
-			 */
-			if ((rpmflags & RPM_AUTO) &&
-			    pm_runtime_autosuspend_expiration(dev) != 0)
-				goto repeat;
-		} else {
-			pm_runtime_cancel_pending(dev);
-		}
-		wake_up_all(&dev->power.wait_queue);
-		goto out;
-	}
  no_callback:
 	__update_runtime_status(dev, RPM_SUSPENDED);
 	pm_runtime_deactivate_timer(dev);
@@ -466,6 +520,29 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	trace_rpm_return_int(dev, _THIS_IP_, retval);
 
 	return retval;
+
+ fail:
+	__update_runtime_status(dev, RPM_ACTIVE);
+	dev->power.suspend_time = ktime_set(0, 0);
+	dev->power.max_time_suspended_ns = -1;
+	dev->power.deferred_resume = false;
+	if (retval == -EAGAIN || retval == -EBUSY) {
+		dev->power.runtime_error = 0;
+
+		/*
+		 * If the callback routine failed an autosuspend, and
+		 * if the last_busy time has been updated so that there
+		 * is a new autosuspend expiration time, automatically
+		 * reschedule another autosuspend.
+		 */
+		if ((rpmflags & RPM_AUTO) &&
+		    pm_runtime_autosuspend_expiration(dev) != 0)
+			goto repeat;
+	} else {
+		pm_runtime_cancel_pending(dev);
+	}
+	wake_up_all(&dev->power.wait_queue);
+	goto out;
 }
 
 /**
@@ -620,6 +697,9 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	if (dev->power.no_callbacks)
 		goto no_callback;	/* Assume success. */
 
+	dev->power.suspend_time = ktime_set(0, 0);
+	dev->power.max_time_suspended_ns = -1;
+
 	__update_runtime_status(dev, RPM_RESUMING);
 
 	if (dev->pm_domain)
@@ -1279,6 +1359,9 @@ void pm_runtime_init(struct device *dev)
 	setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn,
 			(unsigned long)dev);
 
+	dev->power.suspend_time = ktime_set(0, 0);
+	dev->power.max_time_suspended_ns = -1;
+
 	init_waitqueue_head(&dev->power.wait_queue);
 }
 
@@ -1296,3 +1379,28 @@ void pm_runtime_remove(struct device *dev)
 	if (dev->power.irq_safe && dev->parent)
 		pm_runtime_put_sync(dev->parent);
 }
+
+/**
+ * pm_runtime_update_max_time_suspended - Update device's suspend time data.
+ * @dev: Device to handle.
+ * @delta_ns: Value to subtract from the device's max_time_suspended_ns field.
+ *
+ * Update the device's power.max_time_suspended_ns field by subtracting
+ * @delta_ns from it.  The resulting value of power.max_time_suspended_ns is
+ * never negative.
+ */
+void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	if (delta_ns > 0 && dev->power.max_time_suspended_ns > 0) {
+		if (dev->power.max_time_suspended_ns > delta_ns)
+			dev->power.max_time_suspended_ns -= delta_ns;
+		else
+			dev->power.max_time_suspended_ns = 0;
+	}
+
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+}
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3f3ed83a9aa5..a7676efa6831 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -521,6 +521,8 @@ struct dev_pm_info {
 	unsigned long		active_jiffies;
 	unsigned long		suspended_jiffies;
 	unsigned long		accounting_timestamp;
+	ktime_t			suspend_time;
+	s64			max_time_suspended_ns;
 #endif
 	struct pm_subsys_data	*subsys_data;  /* Owned by the subsystem. */
 	struct pm_qos_constraints *constraints;
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 83b0ea302a80..775a3236343d 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -78,6 +78,7 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier);
 int pm_qos_request_active(struct pm_qos_request *req);
 s32 pm_qos_read_value(struct pm_qos_constraints *c);
 
+s32 __dev_pm_qos_read_value(struct device *dev);
 s32 dev_pm_qos_read_value(struct device *dev);
 int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
 			   s32 value);
@@ -119,6 +120,8 @@ static inline int pm_qos_request_active(struct pm_qos_request *req)
 static inline s32 pm_qos_read_value(struct pm_qos_constraints *c)
 			{ return 0; }
 
+static inline s32 __dev_pm_qos_read_value(struct device *dev)
+			{ return 0; }
 static inline s32 dev_pm_qos_read_value(struct device *dev)
 			{ return 0; }
 static inline int dev_pm_qos_add_request(struct device *dev,
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index d3085e72a0ee..609daae7a014 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -45,6 +45,8 @@ extern void pm_runtime_irq_safe(struct device *dev);
 extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
 extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
 extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
+extern void pm_runtime_update_max_time_suspended(struct device *dev,
+						 s64 delta_ns);
 
 static inline bool pm_children_suspended(struct device *dev)
 {
@@ -148,6 +150,9 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev,
 static inline unsigned long pm_runtime_autosuspend_expiration(
 				struct device *dev) { return 0; }
 
+static inline void pm_runtime_update_max_time_suspended(struct device *dev,
+							s64 delta_ns) {}
+
 #endif /* !CONFIG_PM_RUNTIME */
 
 static inline int pm_runtime_idle(struct device *dev)
-- 
cgit v1.2.3


From d5e4cbfe2049fca375cb19c4bc0cf676e8b4a88a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 27 Nov 2011 13:11:36 +0100
Subject: PM / Domains: Make it possible to use per-device domain callbacks

The current generic PM domains code requires that the same .stop(),
.start() and .active_wakeup() device callback routines be used for
all devices in the given domain, which is inflexible and may not
cover some specific use cases.  For this reason, make it possible to
use device specific .start()/.stop() and .active_wakeup() callback
routines by adding corresponding callback pointers to struct
generic_pm_domain_data.  Add a new helper routine,
pm_genpd_register_callbacks(), that can be used to populate
the new per-device callback pointers.

Modify the shmobile's power domains code to allow drivers to add
their own code to be run during the device stop and start operations
with the help of the new callback pointers.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Magnus Damm <damm@opensource.se>
---
 arch/arm/mach-shmobile/pm-sh7372.c |  40 +++++++++-
 drivers/base/power/domain.c        | 152 ++++++++++++++++++++++++++++---------
 include/linux/pm_domain.h          |  27 ++++++-
 3 files changed, 175 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c
index 34bbcbfb1706..6777bb1be059 100644
--- a/arch/arm/mach-shmobile/pm-sh7372.c
+++ b/arch/arm/mach-shmobile/pm-sh7372.c
@@ -156,7 +156,10 @@ static void sh7372_a4r_suspend(void)
 
 static bool pd_active_wakeup(struct device *dev)
 {
-	return true;
+	bool (*active_wakeup)(struct device *dev);
+
+	active_wakeup = dev_gpd_data(dev)->ops.active_wakeup;
+	return active_wakeup ? active_wakeup(dev) : true;
 }
 
 static bool sh7372_power_down_forbidden(struct dev_pm_domain *domain)
@@ -168,15 +171,44 @@ struct dev_power_governor sh7372_always_on_gov = {
 	.power_down_ok = sh7372_power_down_forbidden,
 };
 
+static int sh7372_stop_dev(struct device *dev)
+{
+	int (*stop)(struct device *dev);
+
+	stop = dev_gpd_data(dev)->ops.stop;
+	if (stop) {
+		int ret = stop(dev);
+		if (ret)
+			return ret;
+	}
+	return pm_clk_suspend(dev);
+}
+
+static int sh7372_start_dev(struct device *dev)
+{
+	int (*start)(struct device *dev);
+	int ret;
+
+	ret = pm_clk_resume(dev);
+	if (ret)
+		return ret;
+
+	start = dev_gpd_data(dev)->ops.start;
+	if (start)
+		ret = start(dev);
+
+	return ret;
+}
+
 void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd)
 {
 	struct generic_pm_domain *genpd = &sh7372_pd->genpd;
 
 	pm_genpd_init(genpd, sh7372_pd->gov, false);
-	genpd->stop_device = pm_clk_suspend;
-	genpd->start_device = pm_clk_resume;
+	genpd->dev_ops.stop = sh7372_stop_dev;
+	genpd->dev_ops.start = sh7372_start_dev;
+	genpd->dev_ops.active_wakeup = pd_active_wakeup;
 	genpd->dev_irq_safe = true;
-	genpd->active_wakeup = pd_active_wakeup;
 	genpd->power_off = pd_power_down;
 	genpd->power_on = pd_power_up;
 	__pd_power_up(sh7372_pd, false);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 6790cf7eba5a..94afaa2686a6 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -15,6 +15,23 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/suspend.h>
+#include <linux/export.h>
+
+#define GENPD_DEV_CALLBACK(genpd, type, callback, dev)		\
+({								\
+	type (*__routine)(struct device *__d); 			\
+	type __ret = (type)0;					\
+								\
+	__routine = genpd->dev_ops.callback; 			\
+	if (__routine) {					\
+		__ret = __routine(dev); 			\
+	} else {						\
+		__routine = dev_gpd_data(dev)->ops.callback;	\
+		if (__routine) 					\
+			__ret = __routine(dev);			\
+	}							\
+	__ret;							\
+})
 
 static LIST_HEAD(gpd_list);
 static DEFINE_MUTEX(gpd_list_lock);
@@ -29,6 +46,16 @@ static struct generic_pm_domain *dev_to_genpd(struct device *dev)
 	return pd_to_genpd(dev->pm_domain);
 }
 
+static int genpd_stop_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, stop, dev);
+}
+
+static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, start, dev);
+}
+
 static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd)
 {
 	bool ret = false;
@@ -199,13 +226,9 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd,
 	mutex_unlock(&genpd->lock);
 
 	if (drv && drv->pm && drv->pm->runtime_suspend) {
-		if (genpd->start_device)
-			genpd->start_device(dev);
-
+		genpd_start_dev(genpd, dev);
 		ret = drv->pm->runtime_suspend(dev);
-
-		if (genpd->stop_device)
-			genpd->stop_device(dev);
+		genpd_stop_dev(genpd, dev);
 	}
 
 	mutex_lock(&genpd->lock);
@@ -235,13 +258,9 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd,
 	mutex_unlock(&genpd->lock);
 
 	if (drv && drv->pm && drv->pm->runtime_resume) {
-		if (genpd->start_device)
-			genpd->start_device(dev);
-
+		genpd_start_dev(genpd, dev);
 		drv->pm->runtime_resume(dev);
-
-		if (genpd->stop_device)
-			genpd->stop_device(dev);
+		genpd_stop_dev(genpd, dev);
 	}
 
 	mutex_lock(&genpd->lock);
@@ -413,6 +432,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 static int pm_genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -422,11 +442,9 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 
 	might_sleep_if(!genpd->dev_irq_safe);
 
-	if (genpd->stop_device) {
-		int ret = genpd->stop_device(dev);
-		if (ret)
-			return ret;
-	}
+	ret = genpd_stop_dev(genpd, dev);
+	if (ret)
+		return ret;
 
 	/*
 	 * If power.irq_safe is set, this routine will be run with interrupts
@@ -502,8 +520,7 @@ static int pm_genpd_runtime_resume(struct device *dev)
 	mutex_unlock(&genpd->lock);
 
  out:
-	if (genpd->start_device)
-		genpd->start_device(dev);
+	genpd_start_dev(genpd, dev);
 
 	return 0;
 }
@@ -534,6 +551,12 @@ static inline void genpd_power_off_work_fn(struct work_struct *work) {}
 
 #ifdef CONFIG_PM_SLEEP
 
+static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd,
+				    struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev);
+}
+
 /**
  * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters.
  * @genpd: PM domain to power off, if possible.
@@ -590,7 +613,7 @@ static bool resume_needed(struct device *dev, struct generic_pm_domain *genpd)
 	if (!device_can_wakeup(dev))
 		return false;
 
-	active_wakeup = genpd->active_wakeup && genpd->active_wakeup(dev);
+	active_wakeup = genpd_dev_active_wakeup(genpd, dev);
 	return device_may_wakeup(dev) ? active_wakeup : !active_wakeup;
 }
 
@@ -646,7 +669,7 @@ static int pm_genpd_prepare(struct device *dev)
 	/*
 	 * The PM domain must be in the GPD_STATE_ACTIVE state at this point,
 	 * so pm_genpd_poweron() will return immediately, but if the device
-	 * is suspended (e.g. it's been stopped by .stop_device()), we need
+	 * is suspended (e.g. it's been stopped by genpd_stop_dev()), we need
 	 * to make it operational.
 	 */
 	pm_runtime_resume(dev);
@@ -714,12 +737,10 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 	if (ret)
 		return ret;
 
-	if (dev->power.wakeup_path
-	    && genpd->active_wakeup && genpd->active_wakeup(dev))
+	if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
 		return 0;
 
-	if (genpd->stop_device)
-		genpd->stop_device(dev);
+	genpd_stop_dev(genpd, dev);
 
 	/*
 	 * Since all of the "noirq" callbacks are executed sequentially, it is
@@ -761,8 +782,7 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	 */
 	pm_genpd_poweron(genpd);
 	genpd->suspended_count--;
-	if (genpd->start_device)
-		genpd->start_device(dev);
+	genpd_start_dev(genpd, dev);
 
 	return pm_generic_resume_noirq(dev);
 }
@@ -836,8 +856,7 @@ static int pm_genpd_freeze_noirq(struct device *dev)
 	if (ret)
 		return ret;
 
-	if (genpd->stop_device)
-		genpd->stop_device(dev);
+	genpd_stop_dev(genpd, dev);
 
 	return 0;
 }
@@ -864,8 +883,7 @@ static int pm_genpd_thaw_noirq(struct device *dev)
 	if (genpd->suspend_power_off)
 		return 0;
 
-	if (genpd->start_device)
-		genpd->start_device(dev);
+	genpd_start_dev(genpd, dev);
 
 	return pm_generic_thaw_noirq(dev);
 }
@@ -938,12 +956,10 @@ static int pm_genpd_dev_poweroff_noirq(struct device *dev)
 	if (ret)
 		return ret;
 
-	if (dev->power.wakeup_path
-	    && genpd->active_wakeup && genpd->active_wakeup(dev))
+	if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
 		return 0;
 
-	if (genpd->stop_device)
-		genpd->stop_device(dev);
+	genpd_stop_dev(genpd, dev);
 
 	/*
 	 * Since all of the "noirq" callbacks are executed sequentially, it is
@@ -993,8 +1009,7 @@ static int pm_genpd_restore_noirq(struct device *dev)
 
 	pm_genpd_poweron(genpd);
 	genpd->suspended_count--;
-	if (genpd->start_device)
-		genpd->start_device(dev);
+	genpd_start_dev(genpd, dev);
 
 	return pm_generic_restore_noirq(dev);
 }
@@ -1279,6 +1294,69 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 	return ret;
 }
 
+/**
+ * pm_genpd_add_callbacks - Add PM domain callbacks to a given device.
+ * @dev: Device to add the callbacks to.
+ * @ops: Set of callbacks to add.
+ */
+int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops)
+{
+	struct pm_domain_data *pdd;
+	int ret = 0;
+
+	if (!(dev && dev->power.subsys_data && ops))
+		return -EINVAL;
+
+	pm_runtime_disable(dev);
+	device_pm_lock();
+
+	pdd = dev->power.subsys_data->domain_data;
+	if (pdd) {
+		struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
+
+		gpd_data->ops = *ops;
+	} else {
+		ret = -EINVAL;
+	}
+
+	device_pm_unlock();
+	pm_runtime_enable(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks);
+
+/**
+ * pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device.
+ * @dev: Device to remove the callbacks from.
+ */
+int pm_genpd_remove_callbacks(struct device *dev)
+{
+	struct pm_domain_data *pdd;
+	int ret = 0;
+
+	if (!(dev && dev->power.subsys_data))
+		return -EINVAL;
+
+	pm_runtime_disable(dev);
+	device_pm_lock();
+
+	pdd = dev->power.subsys_data->domain_data;
+	if (pdd) {
+		struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
+
+		gpd_data->ops = (struct gpd_dev_ops){ 0 };
+	} else {
+		ret = -EINVAL;
+	}
+
+	device_pm_unlock();
+	pm_runtime_enable(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks);
+
 /**
  * pm_genpd_init - Initialize a generic I/O PM domain object.
  * @genpd: PM domain object to initialize.
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 65633e5a2bc0..8949d2d202ae 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -23,6 +23,12 @@ struct dev_power_governor {
 	bool (*power_down_ok)(struct dev_pm_domain *domain);
 };
 
+struct gpd_dev_ops {
+	int (*start)(struct device *dev);
+	int (*stop)(struct device *dev);
+	bool (*active_wakeup)(struct device *dev);
+};
+
 struct generic_pm_domain {
 	struct dev_pm_domain domain;	/* PM domain operations */
 	struct list_head gpd_list_node;	/* Node in the global PM domains list */
@@ -45,9 +51,7 @@ struct generic_pm_domain {
 	bool dev_irq_safe;	/* Device callbacks are IRQ-safe */
 	int (*power_off)(struct generic_pm_domain *domain);
 	int (*power_on)(struct generic_pm_domain *domain);
-	int (*start_device)(struct device *dev);
-	int (*stop_device)(struct device *dev);
-	bool (*active_wakeup)(struct device *dev);
+	struct gpd_dev_ops dev_ops;
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
@@ -64,6 +68,7 @@ struct gpd_link {
 
 struct generic_pm_domain_data {
 	struct pm_domain_data base;
+	struct gpd_dev_ops ops;
 	bool need_restore;
 };
 
@@ -73,6 +78,11 @@ static inline struct generic_pm_domain_data *to_gpd_data(struct pm_domain_data *
 }
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS
+static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
+{
+	return to_gpd_data(dev->power.subsys_data->domain_data);
+}
+
 extern int pm_genpd_add_device(struct generic_pm_domain *genpd,
 			       struct device *dev);
 extern int pm_genpd_remove_device(struct generic_pm_domain *genpd,
@@ -81,6 +91,8 @@ extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 				  struct generic_pm_domain *new_subdomain);
 extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
+extern int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops);
+extern int pm_genpd_remove_callbacks(struct device *dev);
 extern void pm_genpd_init(struct generic_pm_domain *genpd,
 			  struct dev_power_governor *gov, bool is_off);
 extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
@@ -105,6 +117,15 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
+static inline int pm_genpd_add_callbacks(struct device *dev,
+					 struct gpd_dev_ops *ops)
+{
+	return -ENOSYS;
+}
+static inline int pm_genpd_remove_callbacks(struct device *dev)
+{
+	return -ENOSYS;
+}
 static inline void pm_genpd_init(struct generic_pm_domain *genpd,
 				 struct dev_power_governor *gov, bool is_off) {}
 static inline int pm_genpd_poweron(struct generic_pm_domain *genpd)
-- 
cgit v1.2.3


From ecf00475f229fcf06362412ad2d15a3267e354a1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 27 Nov 2011 13:11:44 +0100
Subject: PM / Domains: Introduce "save/restore state" device callbacks

The current PM domains code uses device drivers' .runtime_suspend()
and .runtime_resume() callbacks as the "save device state" and
"restore device state" operations, which may not be appropriate in
general, because it forces drivers to assume that they always will
be used with generic PM domains.  However, in theory, the same
hardware may be used in devices that don't belong to any PM
domain, in which case it would be necessary to add "fake" PM
domains to satisfy the above assumption.  It also may be located in
a PM domain that's not handled with the help of the generic code.

To allow device drivers that may be used along with the generic PM
domains code of more flexibility, introduce new device callbacks,
.save_state() and .restore_state(), that can be supplied by the
drivers in addition to their "standard" runtime PM callbacks.  This
will allow the drivers to be designed to work with generic PM domains
as well as without them.

For backwards compatibility, introduce default .save_state() and
.restore_state() callback routines for PM domains that will execute
a device driver's .runtime_suspend() and .runtime_resume() callbacks,
respectively, for the given device if the driver doesn't provide its
own implementations of .save_state() and .restore_state().

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/domain.c | 68 +++++++++++++++++++++++++++++++++++++--------
 include/linux/pm_domain.h   |  2 ++
 2 files changed, 58 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 94afaa2686a6..3c9451b10427 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -56,6 +56,16 @@ static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev)
 	return GENPD_DEV_CALLBACK(genpd, int, start, dev);
 }
 
+static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
+}
+
+static int genpd_restore_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev);
+}
+
 static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd)
 {
 	bool ret = false;
@@ -217,7 +227,6 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd,
 {
 	struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
 	struct device *dev = pdd->dev;
-	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
 	if (gpd_data->need_restore)
@@ -225,11 +234,9 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd,
 
 	mutex_unlock(&genpd->lock);
 
-	if (drv && drv->pm && drv->pm->runtime_suspend) {
-		genpd_start_dev(genpd, dev);
-		ret = drv->pm->runtime_suspend(dev);
-		genpd_stop_dev(genpd, dev);
-	}
+	genpd_start_dev(genpd, dev);
+	ret = genpd_save_dev(genpd, dev);
+	genpd_stop_dev(genpd, dev);
 
 	mutex_lock(&genpd->lock);
 
@@ -250,18 +257,15 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd,
 {
 	struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
 	struct device *dev = pdd->dev;
-	struct device_driver *drv = dev->driver;
 
 	if (!gpd_data->need_restore)
 		return;
 
 	mutex_unlock(&genpd->lock);
 
-	if (drv && drv->pm && drv->pm->runtime_resume) {
-		genpd_start_dev(genpd, dev);
-		drv->pm->runtime_resume(dev);
-		genpd_stop_dev(genpd, dev);
-	}
+	genpd_start_dev(genpd, dev);
+	genpd_restore_dev(genpd, dev);
+	genpd_stop_dev(genpd, dev);
 
 	mutex_lock(&genpd->lock);
 
@@ -1357,6 +1361,44 @@ int pm_genpd_remove_callbacks(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks);
 
+/**
+ * pm_genpd_default_save_state - Default "save device state" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_save_state(struct device *dev)
+{
+	int (*cb)(struct device *__dev);
+	struct device_driver *drv = dev->driver;
+
+	cb = dev_gpd_data(dev)->ops.save_state;
+	if (cb)
+		return cb(dev);
+
+	if (drv && drv->pm && drv->pm->runtime_suspend)
+		return drv->pm->runtime_suspend(dev);
+
+	return 0;
+}
+
+/**
+ * pm_genpd_default_restore_state - Default PM domians "restore device state".
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_restore_state(struct device *dev)
+{
+	int (*cb)(struct device *__dev);
+	struct device_driver *drv = dev->driver;
+
+	cb = dev_gpd_data(dev)->ops.restore_state;
+	if (cb)
+		return cb(dev);
+
+	if (drv && drv->pm && drv->pm->runtime_resume)
+		return drv->pm->runtime_resume(dev);
+
+	return 0;
+}
+
 /**
  * pm_genpd_init - Initialize a generic I/O PM domain object.
  * @genpd: PM domain object to initialize.
@@ -1400,6 +1442,8 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq;
 	genpd->domain.ops.restore = pm_genpd_restore;
 	genpd->domain.ops.complete = pm_genpd_complete;
+	genpd->dev_ops.save_state = pm_genpd_default_save_state;
+	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 8949d2d202ae..731080dad250 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -26,6 +26,8 @@ struct dev_power_governor {
 struct gpd_dev_ops {
 	int (*start)(struct device *dev);
 	int (*stop)(struct device *dev);
+	int (*save_state)(struct device *dev);
+	int (*restore_state)(struct device *dev);
 	bool (*active_wakeup)(struct device *dev);
 };
 
-- 
cgit v1.2.3


From d23b9b00cdde5c93b914a172cecd57d5625fcd04 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 27 Nov 2011 13:11:51 +0100
Subject: PM / Domains: Rework system suspend callback routines (v2)

The current generic PM domains code attempts to use the generic
system suspend operations along with the domains' device stop/start
routines, which requires device drivers to assume that their
system suspend/resume (and hibernation/restore) callbacks will always
be used with generic PM domains.  However, in theory, the same
hardware may be used in devices that don't belong to any PM domain,
in which case it would be necessary to add "fake" PM domains to
satisfy the above assumption.  Also, the domain the hardware belongs
to may not be handled with the help of the generic code.

To allow device drivers that may be used along with the generic PM
domains code of more flexibility, add new device callbacks,
.suspend(), .suspend_late(), .resume_early(), .resume(), .freeze(),
.freeze_late(), .thaw_early(), and .thaw(), that can be supplied by
the drivers in addition to their "standard" system suspend and
hibernation callbacks.  These new callbacks, if defined, will be used
by the generic PM domains code for the handling of system suspend and
hibernation instead of the "standard" ones.  This will allow drivers
to be designed to work with generic PM domains as well as without
them.

For backwards compatibility, introduce default implementations of the
new callbacks for PM domains that will execute pm_generic_suspend(),
pm_generic_suspend_noirq(), pm_generic_resume_noirq(),
pm_generic_resume(), pm_generic_freeze(), pm_generic_freeze_noirq(),
pm_generic_thaw_noirq(), and pm_generic_thaw(), respectively, for the
given device if its driver doesn't define those callbacks.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/domain.c | 249 ++++++++++++++++++++++++++------------------
 include/linux/pm_domain.h   |   8 ++
 2 files changed, 158 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 3c9451b10427..9a77080cb799 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -561,6 +561,46 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd,
 	return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev);
 }
 
+static int genpd_suspend_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, suspend, dev);
+}
+
+static int genpd_suspend_late(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, suspend_late, dev);
+}
+
+static int genpd_resume_early(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, resume_early, dev);
+}
+
+static int genpd_resume_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, resume, dev);
+}
+
+static int genpd_freeze_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, freeze, dev);
+}
+
+static int genpd_freeze_late(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, freeze_late, dev);
+}
+
+static int genpd_thaw_early(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, thaw_early, dev);
+}
+
+static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev)
+{
+	return GENPD_DEV_CALLBACK(genpd, int, thaw, dev);
+}
+
 /**
  * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters.
  * @genpd: PM domain to power off, if possible.
@@ -712,7 +752,7 @@ static int pm_genpd_suspend(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev);
+	return genpd->suspend_power_off ? 0 : genpd_suspend_dev(genpd, dev);
 }
 
 /**
@@ -737,7 +777,7 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 	if (genpd->suspend_power_off)
 		return 0;
 
-	ret = pm_generic_suspend_noirq(dev);
+	ret = genpd_suspend_late(genpd, dev);
 	if (ret)
 		return ret;
 
@@ -788,7 +828,7 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	genpd->suspended_count--;
 	genpd_start_dev(genpd, dev);
 
-	return pm_generic_resume_noirq(dev);
+	return genpd_resume_early(genpd, dev);
 }
 
 /**
@@ -809,7 +849,7 @@ static int pm_genpd_resume(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : pm_generic_resume(dev);
+	return genpd->suspend_power_off ? 0 : genpd_resume_dev(genpd, dev);
 }
 
 /**
@@ -830,7 +870,7 @@ static int pm_genpd_freeze(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev);
+	return genpd->suspend_power_off ? 0 : genpd_freeze_dev(genpd, dev);
 }
 
 /**
@@ -856,7 +896,7 @@ static int pm_genpd_freeze_noirq(struct device *dev)
 	if (genpd->suspend_power_off)
 		return 0;
 
-	ret = pm_generic_freeze_noirq(dev);
+	ret = genpd_freeze_late(genpd, dev);
 	if (ret)
 		return ret;
 
@@ -889,7 +929,7 @@ static int pm_genpd_thaw_noirq(struct device *dev)
 
 	genpd_start_dev(genpd, dev);
 
-	return pm_generic_thaw_noirq(dev);
+	return genpd_thaw_early(genpd, dev);
 }
 
 /**
@@ -910,70 +950,7 @@ static int pm_genpd_thaw(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev);
-}
-
-/**
- * pm_genpd_dev_poweroff - Power off a device belonging to an I/O PM domain.
- * @dev: Device to suspend.
- *
- * Power off a device under the assumption that its pm_domain field points to
- * the domain member of an object of type struct generic_pm_domain representing
- * a PM domain consisting of I/O devices.
- */
-static int pm_genpd_dev_poweroff(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_poweroff(dev);
-}
-
-/**
- * pm_genpd_dev_poweroff_noirq - Late power off of a device from a PM domain.
- * @dev: Device to suspend.
- *
- * Carry out a late powering off of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a PM domain consisting of I/O devices.
- */
-static int pm_genpd_dev_poweroff_noirq(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-	int ret;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	if (genpd->suspend_power_off)
-		return 0;
-
-	ret = pm_generic_poweroff_noirq(dev);
-	if (ret)
-		return ret;
-
-	if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
-		return 0;
-
-	genpd_stop_dev(genpd, dev);
-
-	/*
-	 * Since all of the "noirq" callbacks are executed sequentially, it is
-	 * guaranteed that this function will never run twice in parallel for
-	 * the same PM domain, so it is not necessary to use locking here.
-	 */
-	genpd->suspended_count++;
-	pm_genpd_sync_poweroff(genpd);
-
-	return 0;
+	return genpd->suspend_power_off ? 0 : genpd_thaw_dev(genpd, dev);
 }
 
 /**
@@ -1015,28 +992,7 @@ static int pm_genpd_restore_noirq(struct device *dev)
 	genpd->suspended_count--;
 	genpd_start_dev(genpd, dev);
 
-	return pm_generic_restore_noirq(dev);
-}
-
-/**
- * pm_genpd_restore - Restore a device belonging to an I/O power domain.
- * @dev: Device to resume.
- *
- * Restore a device under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a power domain consisting of I/O devices.
- */
-static int pm_genpd_restore(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_restore(dev);
+	return genpd_resume_early(genpd, dev);
 }
 
 /**
@@ -1086,10 +1042,7 @@ static void pm_genpd_complete(struct device *dev)
 #define pm_genpd_freeze_noirq		NULL
 #define pm_genpd_thaw_noirq		NULL
 #define pm_genpd_thaw			NULL
-#define pm_genpd_dev_poweroff_noirq	NULL
-#define pm_genpd_dev_poweroff		NULL
 #define pm_genpd_restore_noirq		NULL
-#define pm_genpd_restore		NULL
 #define pm_genpd_complete		NULL
 
 #endif /* CONFIG_PM_SLEEP */
@@ -1361,6 +1314,8 @@ int pm_genpd_remove_callbacks(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks);
 
+/* Default device callbacks for generic PM domains. */
+
 /**
  * pm_genpd_default_save_state - Default "save device state" for PM domians.
  * @dev: Device to handle.
@@ -1399,6 +1354,94 @@ static int pm_genpd_default_restore_state(struct device *dev)
 	return 0;
 }
 
+/**
+ * pm_genpd_default_suspend - Default "device suspend" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_suspend(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze;
+
+	return cb ? cb(dev) : pm_generic_suspend(dev);
+}
+
+/**
+ * pm_genpd_default_suspend_late - Default "late device suspend" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_suspend_late(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late;
+
+	return cb ? cb(dev) : pm_generic_suspend_noirq(dev);
+}
+
+/**
+ * pm_genpd_default_resume_early - Default "early device resume" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_resume_early(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early;
+
+	return cb ? cb(dev) : pm_generic_resume_noirq(dev);
+}
+
+/**
+ * pm_genpd_default_resume - Default "device resume" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_resume(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw;
+
+	return cb ? cb(dev) : pm_generic_resume(dev);
+}
+
+/**
+ * pm_genpd_default_freeze - Default "device freeze" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_freeze(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze;
+
+	return cb ? cb(dev) : pm_generic_freeze(dev);
+}
+
+/**
+ * pm_genpd_default_freeze_late - Default "late device freeze" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_freeze_late(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late;
+
+	return cb ? cb(dev) : pm_generic_freeze_noirq(dev);
+}
+
+/**
+ * pm_genpd_default_thaw_early - Default "early device thaw" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_thaw_early(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early;
+
+	return cb ? cb(dev) : pm_generic_thaw_noirq(dev);
+}
+
+/**
+ * pm_genpd_default_thaw - Default "device thaw" for PM domians.
+ * @dev: Device to handle.
+ */
+static int pm_genpd_default_thaw(struct device *dev)
+{
+	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw;
+
+	return cb ? cb(dev) : pm_generic_thaw(dev);
+}
+
 /**
  * pm_genpd_init - Initialize a generic I/O PM domain object.
  * @genpd: PM domain object to initialize.
@@ -1437,13 +1480,21 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq;
 	genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq;
 	genpd->domain.ops.thaw = pm_genpd_thaw;
-	genpd->domain.ops.poweroff = pm_genpd_dev_poweroff;
-	genpd->domain.ops.poweroff_noirq = pm_genpd_dev_poweroff_noirq;
+	genpd->domain.ops.poweroff = pm_genpd_suspend;
+	genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq;
-	genpd->domain.ops.restore = pm_genpd_restore;
+	genpd->domain.ops.restore = pm_genpd_resume;
 	genpd->domain.ops.complete = pm_genpd_complete;
 	genpd->dev_ops.save_state = pm_genpd_default_save_state;
 	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
+	genpd->dev_ops.freeze = pm_genpd_default_suspend;
+	genpd->dev_ops.freeze_late = pm_genpd_default_suspend_late;
+	genpd->dev_ops.thaw_early = pm_genpd_default_resume_early;
+	genpd->dev_ops.thaw = pm_genpd_default_resume;
+	genpd->dev_ops.freeze = pm_genpd_default_freeze;
+	genpd->dev_ops.freeze_late = pm_genpd_default_freeze_late;
+	genpd->dev_ops.thaw_early = pm_genpd_default_thaw_early;
+	genpd->dev_ops.thaw = pm_genpd_default_thaw;
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 731080dad250..10a197dce07e 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -28,6 +28,14 @@ struct gpd_dev_ops {
 	int (*stop)(struct device *dev);
 	int (*save_state)(struct device *dev);
 	int (*restore_state)(struct device *dev);
+	int (*suspend)(struct device *dev);
+	int (*suspend_late)(struct device *dev);
+	int (*resume_early)(struct device *dev);
+	int (*resume)(struct device *dev);
+	int (*freeze)(struct device *dev);
+	int (*freeze_late)(struct device *dev);
+	int (*thaw_early)(struct device *dev);
+	int (*thaw)(struct device *dev);
 	bool (*active_wakeup)(struct device *dev);
 };
 
-- 
cgit v1.2.3


From b02c999ac325e977585abeb4caf6e0a2ee21e30b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 1 Dec 2011 00:02:05 +0100
Subject: PM / Domains: Add device stop governor function (v4)

Add a function deciding whether or not devices should be stopped in
pm_genpd_runtime_suspend() depending on their PM QoS constraints
and stop/start timing values.  Make it possible to add information
used by this function to device objects.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Magnus Damm <damm@opensource.se>
---
 arch/arm/mach-shmobile/pm-sh7372.c   |  4 ++-
 drivers/base/power/Makefile          |  2 +-
 drivers/base/power/domain.c          | 33 +++++++++++++++----
 drivers/base/power/domain_governor.c | 33 +++++++++++++++++++
 include/linux/pm_domain.h            | 63 +++++++++++++++++++++++++++++++-----
 5 files changed, 118 insertions(+), 17 deletions(-)
 create mode 100644 drivers/base/power/domain_governor.c

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c
index 6777bb1be059..adf1765e69c6 100644
--- a/arch/arm/mach-shmobile/pm-sh7372.c
+++ b/arch/arm/mach-shmobile/pm-sh7372.c
@@ -169,6 +169,7 @@ static bool sh7372_power_down_forbidden(struct dev_pm_domain *domain)
 
 struct dev_power_governor sh7372_always_on_gov = {
 	.power_down_ok = sh7372_power_down_forbidden,
+	.stop_ok = default_stop_ok,
 };
 
 static int sh7372_stop_dev(struct device *dev)
@@ -203,8 +204,9 @@ static int sh7372_start_dev(struct device *dev)
 void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd)
 {
 	struct generic_pm_domain *genpd = &sh7372_pd->genpd;
+	struct dev_power_governor *gov = sh7372_pd->gov;
 
-	pm_genpd_init(genpd, sh7372_pd->gov, false);
+	pm_genpd_init(genpd, gov ? : &simple_qos_governor, false);
 	genpd->dev_ops.stop = sh7372_stop_dev;
 	genpd->dev_ops.start = sh7372_start_dev;
 	genpd->dev_ops.active_wakeup = pd_active_wakeup;
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 81676dd17900..2e58ebb1f6c0 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_OPP)	+= opp.o
-obj-$(CONFIG_PM_GENERIC_DOMAINS)	+=  domain.o
+obj-$(CONFIG_PM_GENERIC_DOMAINS)	+=  domain.o domain_governor.o
 obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 9a77080cb799..3af9f5a71ad5 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -38,7 +38,7 @@ static DEFINE_MUTEX(gpd_list_lock);
 
 #ifdef CONFIG_PM
 
-static struct generic_pm_domain *dev_to_genpd(struct device *dev)
+struct generic_pm_domain *dev_to_genpd(struct device *dev)
 {
 	if (IS_ERR_OR_NULL(dev->pm_domain))
 		return ERR_PTR(-EINVAL);
@@ -436,6 +436,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 static int pm_genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	bool (*stop_ok)(struct device *__dev);
 	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
@@ -446,10 +447,17 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 
 	might_sleep_if(!genpd->dev_irq_safe);
 
+	stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
+	if (stop_ok && !stop_ok(dev))
+		return -EBUSY;
+
 	ret = genpd_stop_dev(genpd, dev);
 	if (ret)
 		return ret;
 
+	pm_runtime_update_max_time_suspended(dev,
+				dev_gpd_data(dev)->td.start_latency_ns);
+
 	/*
 	 * If power.irq_safe is set, this routine will be run with interrupts
 	 * off, so it can't use mutexes.
@@ -1048,11 +1056,13 @@ static void pm_genpd_complete(struct device *dev)
 #endif /* CONFIG_PM_SLEEP */
 
 /**
- * pm_genpd_add_device - Add a device to an I/O PM domain.
+ * __pm_genpd_add_device - Add a device to an I/O PM domain.
  * @genpd: PM domain to add the device to.
  * @dev: Device to be added.
+ * @td: Set of PM QoS timing parameters to attach to the device.
  */
-int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
+int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
+			  struct gpd_timing_data *td)
 {
 	struct generic_pm_domain_data *gpd_data;
 	struct pm_domain_data *pdd;
@@ -1095,6 +1105,8 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
 	gpd_data->base.dev = dev;
 	gpd_data->need_restore = false;
 	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
+	if (td)
+		gpd_data->td = *td;
 
  out:
 	genpd_release_lock(genpd);
@@ -1255,8 +1267,10 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
  * pm_genpd_add_callbacks - Add PM domain callbacks to a given device.
  * @dev: Device to add the callbacks to.
  * @ops: Set of callbacks to add.
+ * @td: Timing data to add to the device along with the callbacks (optional).
  */
-int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops)
+int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops,
+			   struct gpd_timing_data *td)
 {
 	struct pm_domain_data *pdd;
 	int ret = 0;
@@ -1272,6 +1286,8 @@ int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops)
 		struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
 
 		gpd_data->ops = *ops;
+		if (td)
+			gpd_data->td = *td;
 	} else {
 		ret = -EINVAL;
 	}
@@ -1284,10 +1300,11 @@ int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops)
 EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks);
 
 /**
- * pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device.
+ * __pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device.
  * @dev: Device to remove the callbacks from.
+ * @clear_td: If set, clear the device's timing data too.
  */
-int pm_genpd_remove_callbacks(struct device *dev)
+int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
 {
 	struct pm_domain_data *pdd;
 	int ret = 0;
@@ -1303,6 +1320,8 @@ int pm_genpd_remove_callbacks(struct device *dev)
 		struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
 
 		gpd_data->ops = (struct gpd_dev_ops){ 0 };
+		if (clear_td)
+			gpd_data->td = (struct gpd_timing_data){ 0 };
 	} else {
 		ret = -EINVAL;
 	}
@@ -1312,7 +1331,7 @@ int pm_genpd_remove_callbacks(struct device *dev)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks);
+EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks);
 
 /* Default device callbacks for generic PM domains. */
 
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
new file mode 100644
index 000000000000..97b21c10c496
--- /dev/null
+++ b/drivers/base/power/domain_governor.c
@@ -0,0 +1,33 @@
+/*
+ * drivers/base/power/domain_governor.c - Governors for device PM domains.
+ *
+ * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_qos.h>
+
+/**
+ * default_stop_ok - Default PM domain governor routine for stopping devices.
+ * @dev: Device to check.
+ */
+bool default_stop_ok(struct device *dev)
+{
+	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (dev->power.max_time_suspended_ns < 0 || td->break_even_ns == 0)
+		return true;
+
+	return td->stop_latency_ns + td->start_latency_ns < td->break_even_ns
+		&& td->break_even_ns < dev->power.max_time_suspended_ns;
+}
+
+struct dev_power_governor simple_qos_governor = {
+	.stop_ok = default_stop_ok,
+};
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 10a197dce07e..f6745c213a57 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -21,6 +21,7 @@ enum gpd_status {
 
 struct dev_power_governor {
 	bool (*power_down_ok)(struct dev_pm_domain *domain);
+	bool (*stop_ok)(struct device *dev);
 };
 
 struct gpd_dev_ops {
@@ -76,9 +77,16 @@ struct gpd_link {
 	struct list_head slave_node;
 };
 
+struct gpd_timing_data {
+	s64 stop_latency_ns;
+	s64 start_latency_ns;
+	s64 break_even_ns;
+};
+
 struct generic_pm_domain_data {
 	struct pm_domain_data base;
 	struct gpd_dev_ops ops;
+	struct gpd_timing_data td;
 	bool need_restore;
 };
 
@@ -93,20 +101,48 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
 	return to_gpd_data(dev->power.subsys_data->domain_data);
 }
 
-extern int pm_genpd_add_device(struct generic_pm_domain *genpd,
-			       struct device *dev);
+extern struct dev_power_governor simple_qos_governor;
+
+extern struct generic_pm_domain *dev_to_genpd(struct device *dev);
+extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
+				 struct device *dev,
+				 struct gpd_timing_data *td);
+
+static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
+				      struct device *dev)
+{
+	return __pm_genpd_add_device(genpd, dev, NULL);
+}
+
 extern int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 				  struct device *dev);
 extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 				  struct generic_pm_domain *new_subdomain);
 extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
-extern int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops);
-extern int pm_genpd_remove_callbacks(struct device *dev);
+extern int pm_genpd_add_callbacks(struct device *dev,
+				  struct gpd_dev_ops *ops,
+				  struct gpd_timing_data *td);
+extern int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td);
 extern void pm_genpd_init(struct generic_pm_domain *genpd,
 			  struct dev_power_governor *gov, bool is_off);
+
 extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
+
+extern bool default_stop_ok(struct device *dev);
+
 #else
+
+static inline struct generic_pm_domain *dev_to_genpd(struct device *dev)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd,
+					struct device *dev,
+					struct gpd_timing_data *td)
+{
+	return -ENOSYS;
+}
 static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
 				      struct device *dev)
 {
@@ -128,22 +164,33 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 	return -ENOSYS;
 }
 static inline int pm_genpd_add_callbacks(struct device *dev,
-					 struct gpd_dev_ops *ops)
+					 struct gpd_dev_ops *ops,
+					 struct gpd_timing_data *td)
 {
 	return -ENOSYS;
 }
-static inline int pm_genpd_remove_callbacks(struct device *dev)
+static inline int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
 {
 	return -ENOSYS;
 }
-static inline void pm_genpd_init(struct generic_pm_domain *genpd,
-				 struct dev_power_governor *gov, bool is_off) {}
+static inline void pm_genpd_init(struct generic_pm_domain *genpd, bool is_off)
+{
+}
 static inline int pm_genpd_poweron(struct generic_pm_domain *genpd)
 {
 	return -ENOSYS;
 }
+static inline bool default_stop_ok(struct device *dev)
+{
+	return false;
+}
 #endif
 
+static inline int pm_genpd_remove_callbacks(struct device *dev)
+{
+	return __pm_genpd_remove_callbacks(dev, true);
+}
+
 #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME
 extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd);
 extern void pm_genpd_poweroff_unused(void);
-- 
cgit v1.2.3


From 221e9b58380abdd6c05e11b4538597e2586ee141 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 1 Dec 2011 00:02:10 +0100
Subject: PM / Domains: Add default power off governor function (v4)

Add a function deciding whether or not a given PM domain should
be powered off on the basis of the PM QoS constraints of devices
belonging to it and their PM QoS timing data.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/domain.c          |  12 ++++
 drivers/base/power/domain_governor.c | 110 +++++++++++++++++++++++++++++++++++
 include/linux/pm_domain.h            |   7 +++
 3 files changed, 129 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 3af9f5a71ad5..91896194e76b 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -398,6 +398,17 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 	}
 
 	genpd->status = GPD_STATE_POWER_OFF;
+	genpd->power_off_time = ktime_get();
+
+	/* Update PM QoS information for devices in the domain. */
+	list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) {
+		struct gpd_timing_data *td = &to_gpd_data(pdd)->td;
+
+		pm_runtime_update_max_time_suspended(pdd->dev,
+					td->start_latency_ns +
+					td->restore_state_latency_ns +
+					genpd->power_on_latency_ns);
+	}
 
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
 		genpd_sd_counter_dec(link->master);
@@ -1487,6 +1498,7 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->resume_count = 0;
 	genpd->device_count = 0;
 	genpd->suspended_count = 0;
+	genpd->max_off_time_ns = -1;
 	genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend;
 	genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume;
 	genpd->domain.ops.runtime_idle = pm_generic_runtime_idle;
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 97b21c10c496..da78540e9b40 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_qos.h>
+#include <linux/hrtimer.h>
 
 /**
  * default_stop_ok - Default PM domain governor routine for stopping devices.
@@ -28,6 +29,115 @@ bool default_stop_ok(struct device *dev)
 		&& td->break_even_ns < dev->power.max_time_suspended_ns;
 }
 
+/**
+ * default_power_down_ok - Default generic PM domain power off governor routine.
+ * @pd: PM domain to check.
+ *
+ * This routine must be executed under the PM domain's lock.
+ */
+static bool default_power_down_ok(struct dev_pm_domain *pd)
+{
+	struct generic_pm_domain *genpd = pd_to_genpd(pd);
+	struct gpd_link *link;
+	struct pm_domain_data *pdd;
+	s64 min_dev_off_time_ns;
+	s64 off_on_time_ns;
+	ktime_t time_now = ktime_get();
+
+	off_on_time_ns = genpd->power_off_latency_ns +
+				genpd->power_on_latency_ns;
+	/*
+	 * It doesn't make sense to remove power from the domain if saving
+	 * the state of all devices in it and the power off/power on operations
+	 * take too much time.
+	 *
+	 * All devices in this domain have been stopped already at this point.
+	 */
+	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
+		if (pdd->dev->driver)
+			off_on_time_ns +=
+				to_gpd_data(pdd)->td.save_state_latency_ns;
+	}
+
+	/*
+	 * Check if subdomains can be off for enough time.
+	 *
+	 * All subdomains have been powered off already at this point.
+	 */
+	list_for_each_entry(link, &genpd->master_links, master_node) {
+		struct generic_pm_domain *sd = link->slave;
+		s64 sd_max_off_ns = sd->max_off_time_ns;
+
+		if (sd_max_off_ns < 0)
+			continue;
+
+		sd_max_off_ns -= ktime_to_ns(ktime_sub(time_now,
+						       sd->power_off_time));
+		/*
+		 * Check if the subdomain is allowed to be off long enough for
+		 * the current domain to turn off and on (that's how much time
+		 * it will have to wait worst case).
+		 */
+		if (sd_max_off_ns <= off_on_time_ns)
+			return false;
+	}
+
+	/*
+	 * Check if the devices in the domain can be off enough time.
+	 */
+	min_dev_off_time_ns = -1;
+	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
+		struct gpd_timing_data *td;
+		struct device *dev = pdd->dev;
+		s64 dev_off_time_ns;
+
+		if (!dev->driver || dev->power.max_time_suspended_ns < 0)
+			continue;
+
+		td = &to_gpd_data(pdd)->td;
+		dev_off_time_ns = dev->power.max_time_suspended_ns -
+			(td->start_latency_ns + td->restore_state_latency_ns +
+				ktime_to_ns(ktime_sub(time_now,
+						dev->power.suspend_time)));
+		if (dev_off_time_ns <= off_on_time_ns)
+			return false;
+
+		if (min_dev_off_time_ns > dev_off_time_ns
+		    || min_dev_off_time_ns < 0)
+			min_dev_off_time_ns = dev_off_time_ns;
+	}
+
+	if (min_dev_off_time_ns < 0) {
+		/*
+		 * There are no latency constraints, so the domain can spend
+		 * arbitrary time in the "off" state.
+		 */
+		genpd->max_off_time_ns = -1;
+		return true;
+	}
+
+	/*
+	 * The difference between the computed minimum delta and the time needed
+	 * to turn the domain on is the maximum theoretical time this domain can
+	 * spend in the "off" state.
+	 */
+	min_dev_off_time_ns -= genpd->power_on_latency_ns;
+
+	/*
+	 * If the difference between the computed minimum delta and the time
+	 * needed to turn the domain off and back on on is smaller than the
+	 * domain's power break even time, removing power from the domain is not
+	 * worth it.
+	 */
+	if (genpd->break_even_ns >
+	    min_dev_off_time_ns - genpd->power_off_latency_ns)
+		return false;
+
+	genpd->max_off_time_ns = min_dev_off_time_ns;
+	return true;
+}
+
 struct dev_power_governor simple_qos_governor = {
 	.stop_ok = default_stop_ok,
+	.power_down_ok = default_power_down_ok,
 };
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index f6745c213a57..cc1a2450ff7b 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -61,8 +61,13 @@ struct generic_pm_domain {
 	bool suspend_power_off;	/* Power status before system suspend */
 	bool dev_irq_safe;	/* Device callbacks are IRQ-safe */
 	int (*power_off)(struct generic_pm_domain *domain);
+	s64 power_off_latency_ns;
 	int (*power_on)(struct generic_pm_domain *domain);
+	s64 power_on_latency_ns;
 	struct gpd_dev_ops dev_ops;
+	s64 break_even_ns;	/* Power break even for the entire domain. */
+	s64 max_off_time_ns;	/* Maximum allowed "suspended" time. */
+	ktime_t power_off_time;
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
@@ -80,6 +85,8 @@ struct gpd_link {
 struct gpd_timing_data {
 	s64 stop_latency_ns;
 	s64 start_latency_ns;
+	s64 save_state_latency_ns;
+	s64 restore_state_latency_ns;
 	s64 break_even_ns;
 };
 
-- 
cgit v1.2.3


From 4f042cdad40e1566a53b7ae85e72b6945a4b0fde Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 1 Dec 2011 00:05:31 +0100
Subject: PM / Domains: fix compilation failure for CONFIG_PM_GENERIC_DOMAINS
 unset

Fix the following compalitaion breakage:

In file included from linux/drivers/sh/pm_runtime.c:15:
linux/include/linux/pm_domain.h: In function 'dev_to_genpd':
linux/include/linux/pm_domain.h:142: error: implicit declaration of function 'ERR_PTR'
linux/include/linux/pm_domain.h:142: warning: return makes pointer from integer without a cast
In file included from linux/include/linux/sh_clk.h:10,
                 from linux/drivers/sh/pm_runtime.c:19:
linux/include/linux/err.h: At top level:
linux/include/linux/err.h:22: error: conflicting types for 'ERR_PTR'
linux/include/linux/pm_domain.h:142: note: previous implicit declaration of 'ERR_PTR' was here
make[3]: *** [drivers/sh/pm_runtime.o] Error 1

Reported-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm_domain.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index cc1a2450ff7b..fbb81bc5065a 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -10,6 +10,7 @@
 #define _LINUX_PM_DOMAIN_H
 
 #include <linux/device.h>
+#include <linux/err.h>
 
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
-- 
cgit v1.2.3


From faf02f8fee5563ea7f950b3f5f08c654aa6c4525 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 2 Dec 2011 17:44:50 +0900
Subject: serial: sh-sci: per-port modem control.

The bulk of the ports do not support any sort of modem control, so
blindly twiddling the MCE bit doesn't accomplish much. We now require
ports to manually specify which line supports modem control signals.

While at it, tidy up the RTS/CTSIO handling in SCSPTR parts so it's a bit
more obvious what's going on (and without clobbering other configurations
in the process).

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/tty/serial/sh-sci.c | 31 ++++++++++++++++++++-----------
 include/linux/serial_sci.h  | 10 ++++++++++
 2 files changed, 30 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 46deaaec836d..fd60d72eac89 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -474,8 +474,15 @@ static void sci_init_pins(struct uart_port *port, unsigned int cflag)
 	if (!reg->size)
 		return;
 
-	if (!(cflag & CRTSCTS))
-		sci_out(port, SCSPTR, 0x0080); /* Set RTS = 1 */
+	if ((s->cfg->capabilities & SCIx_HAVE_RTSCTS) &&
+	    ((!(cflag & CRTSCTS)))) {
+		unsigned short status;
+
+		status = sci_in(port, SCSPTR);
+		status &= ~SCSPTR_CTSIO;
+		status |= SCSPTR_RTSIO;
+		sci_out(port, SCSPTR, status); /* Set RTS = 1 */
+	}
 }
 
 static int sci_txfill(struct uart_port *port)
@@ -1764,16 +1771,18 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	sci_init_pins(port, termios->c_cflag);
 
-	reg = sci_getreg(port, SCFCR);
-	if (reg->size) {
-		unsigned short ctrl;
+	if (s->cfg->capabilities & SCIx_HAVE_RTSCTS) {
+		reg = sci_getreg(port, SCFCR);
+		if (reg->size) {
+			unsigned short ctrl;
 
-		ctrl = sci_in(port, SCFCR);
-		if (termios->c_cflag & CRTSCTS)
-			ctrl |= SCFCR_MCE;
-		else
-			ctrl &= ~SCFCR_MCE;
-		sci_out(port, SCFCR, ctrl);
+			ctrl = sci_in(port, SCFCR);
+			if (termios->c_cflag & CRTSCTS)
+				ctrl |= SCFCR_MCE;
+			else
+				ctrl &= ~SCFCR_MCE;
+			sci_out(port, SCFCR, ctrl);
+		}
 	}
 
 	sci_out(port, SCSCR, s->cfg->scscr);
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 369273a52679..15b1bdcaa9f5 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -49,6 +49,10 @@ enum {
 
 #define SCIF_DEFAULT_ERROR_MASK (SCIF_PER | SCIF_FER | SCIF_ER | SCIF_BRK)
 
+/* SCSPTR, optional */
+#define SCSPTR_RTSIO	(1 << 7)
+#define SCSPTR_CTSIO	(1 << 5)
+
 /* Offsets into the sci_port->irqs array */
 enum {
 	SCIx_ERI_IRQ,
@@ -108,6 +112,11 @@ struct plat_sci_port_ops {
 	void (*init_pins)(struct uart_port *, unsigned int cflag);
 };
 
+/*
+ * Port-specific capabilities
+ */
+#define SCIx_HAVE_RTSCTS	(1 << 0)
+
 /*
  * Platform device specific platform_data struct
  */
@@ -116,6 +125,7 @@ struct plat_sci_port {
 	unsigned int	irqs[SCIx_NR_IRQS];	/* ERI, RXI, TXI, BRI */
 	unsigned int	type;			/* SCI / SCIF / IRDA */
 	upf_t		flags;			/* UPF_* flags */
+	unsigned long	capabilities;		/* Port features/capabilities */
 
 	unsigned int	scbrr_algo_id;		/* SCBRR calculation algo */
 	unsigned int	scscr;			/* SCSCR initialization */
-- 
cgit v1.2.3


From 50f0959ad4f9ac1c5ee208bb820de299a1b3730b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 2 Dec 2011 20:09:48 +0900
Subject: serial: sh-sci: Handle GPIO function requests.

This adds initial support for requesting the various GPIO functions
necessary for certain ports. This just plugs in dumb request/free logic,
but serves as a building block for migrating off of the ->init_pins mess
to a wholly gpiolib backed solution (primarily parts with external
RTS/CTS pins, but will also allow us to clean up RXD pin testing).

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/tty/serial/sh-sci.c | 71 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/serial_sci.h  | 12 ++++++++
 2 files changed, 81 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 761a800cb483..9e62349b3d9f 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -50,6 +50,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <linux/gpio.h>
 
 #ifdef CONFIG_SUPERH
 #include <asm/sh_bios.h>
@@ -73,6 +74,7 @@ struct sci_port {
 	struct clk		*fclk;
 
 	char			*irqstr[SCIx_NR_IRQS];
+	char			*gpiostr[SCIx_NR_FNS];
 
 	struct dma_chan			*chan_tx;
 	struct dma_chan			*chan_rx;
@@ -1105,6 +1107,67 @@ static void sci_free_irq(struct sci_port *port)
 	}
 }
 
+static const char *sci_gpio_names[SCIx_NR_FNS] = {
+	"sck", "rxd", "txd", "cts", "rts",
+};
+
+static const char *sci_gpio_str(unsigned int index)
+{
+	return sci_gpio_names[index];
+}
+
+static void __devinit sci_init_gpios(struct sci_port *port)
+{
+	struct uart_port *up = &port->port;
+	int i;
+
+	if (!port->cfg)
+		return;
+
+	for (i = 0; i < SCIx_NR_FNS; i++) {
+		const char *desc;
+		int ret;
+
+		if (!port->cfg->gpios[i])
+			continue;
+
+		desc = sci_gpio_str(i);
+
+		port->gpiostr[i] = kasprintf(GFP_KERNEL, "%s:%s",
+					     dev_name(up->dev), desc);
+
+		/*
+		 * If we've failed the allocation, we can still continue
+		 * on with a NULL string.
+		 */
+		if (!port->gpiostr[i])
+			dev_notice(up->dev, "%s string allocation failure\n",
+				   desc);
+
+		ret = gpio_request(port->cfg->gpios[i], port->gpiostr[i]);
+		if (unlikely(ret != 0)) {
+			dev_notice(up->dev, "failed %s gpio request\n", desc);
+
+			/*
+			 * If we can't get the GPIO for whatever reason,
+			 * no point in keeping the verbose string around.
+			 */
+			kfree(port->gpiostr[i]);
+		}
+	}
+}
+
+static void sci_free_gpios(struct sci_port *port)
+{
+	int i;
+
+	for (i = 0; i < SCIx_NR_FNS; i++)
+		if (port->cfg->gpios[i]) {
+			gpio_free(port->cfg->gpios[i]);
+			kfree(port->gpiostr[i]);
+		}
+}
+
 static unsigned int sci_tx_empty(struct uart_port *port)
 {
 	unsigned short status = sci_in(port, SCxSR);
@@ -1962,6 +2025,8 @@ static int __devinit sci_init_single(struct platform_device *dev,
 	struct uart_port *port = &sci_port->port;
 	int ret;
 
+	sci_port->cfg	= p;
+
 	port->ops	= &sci_uart_ops;
 	port->iotype	= UPIO_MEM;
 	port->line	= index;
@@ -2007,6 +2072,8 @@ static int __devinit sci_init_single(struct platform_device *dev,
 
 		port->dev = &dev->dev;
 
+		sci_init_gpios(sci_port);
+
 		pm_runtime_irq_safe(&dev->dev);
 		pm_runtime_enable(&dev->dev);
 	}
@@ -2041,8 +2108,6 @@ static int __devinit sci_init_single(struct platform_device *dev,
 		p->error_mask |= (1 << p->overrun_bit);
 	}
 
-	sci_port->cfg		= p;
-
 	port->mapbase		= p->mapbase;
 	port->type		= p->type;
 	port->flags		= p->flags;
@@ -2249,6 +2314,8 @@ static int sci_remove(struct platform_device *dev)
 	cpufreq_unregister_notifier(&port->freq_transition,
 				    CPUFREQ_TRANSITION_NOTIFIER);
 
+	sci_free_gpios(port);
+
 	uart_remove_one_port(&sci_uart_driver, &port->port);
 
 	clk_put(port->iclk);
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 15b1bdcaa9f5..78779074f6e8 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -64,6 +64,17 @@ enum {
 	SCIx_MUX_IRQ = SCIx_NR_IRQS,	/* special case */
 };
 
+/* Offsets into the sci_port->gpios array */
+enum {
+	SCIx_SCK,
+	SCIx_RXD,
+	SCIx_TXD,
+	SCIx_CTS,
+	SCIx_RTS,
+
+	SCIx_NR_FNS,
+};
+
 enum {
 	SCIx_PROBE_REGTYPE,
 
@@ -123,6 +134,7 @@ struct plat_sci_port_ops {
 struct plat_sci_port {
 	unsigned long	mapbase;		/* resource base */
 	unsigned int	irqs[SCIx_NR_IRQS];	/* ERI, RXI, TXI, BRI */
+	unsigned int	gpios[SCIx_NR_FNS];	/* SCK, RXD, TXD, CTS, RTS */
 	unsigned int	type;			/* SCI / SCIF / IRDA */
 	upf_t		flags;			/* UPF_* flags */
 	unsigned long	capabilities;		/* Port features/capabilities */
-- 
cgit v1.2.3


From 42b2aa86c6670347a2a07e6d7af0e0ecc8fdbff9 Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Mon, 28 Nov 2011 20:31:00 -0800
Subject: treewide: Fix typos in various parts of the kernel, and fix some
 comments.

The below patch fixes some typos in various parts of the kernel, as well as fixes some comments.
Please let me know if I missed anything, and I will try to get it changed and resent.

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/arm/plat-omap/include/plat/serial.h                  | 2 +-
 arch/powerpc/include/asm/io.h                             | 2 +-
 arch/powerpc/include/asm/keylargo.h                       | 2 +-
 arch/powerpc/mm/numa.c                                    | 2 +-
 arch/sparc/kernel/smp_64.c                                | 2 +-
 drivers/acpi/acpica/hwxface.c                             | 2 +-
 drivers/block/xen-blkback/xenbus.c                        | 2 +-
 drivers/char/ipmi/ipmi_bt_sm.c                            | 2 +-
 drivers/edac/ppc4xx_edac.c                                | 2 +-
 drivers/media/video/zoran/zoran_driver.c                  | 2 +-
 drivers/message/fusion/lsi/mpi_ioc.h                      | 2 +-
 drivers/net/irda/nsc-ircc.c                               | 2 +-
 drivers/net/irda/via-ircc.c                               | 4 ++--
 drivers/net/irda/w83977af_ir.c                            | 2 +-
 drivers/net/wimax/i2400m/i2400m.h                         | 2 +-
 drivers/net/wireless/rtlwifi/rtl8192de/hw.c               | 4 ++--
 drivers/parport/parport_mfc3.c                            | 2 +-
 drivers/scsi/aic7xxx/aicasm/aicasm.c                      | 2 +-
 drivers/scsi/ips.c                                        | 2 +-
 drivers/scsi/qla4xxx/ql4_fw.h                             | 2 +-
 drivers/scsi/vmw_pvscsi.c                                 | 2 +-
 drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c | 2 +-
 drivers/staging/cxt1e1/libsbew.h                          | 2 +-
 drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c          | 4 ++--
 drivers/staging/ft1000/ft1000-usb/ft1000_hw.c             | 2 +-
 drivers/staging/iio/industrialio-trigger.c                | 2 +-
 drivers/staging/sep/sep_driver.c                          | 2 +-
 drivers/staging/tidspbridge/Kconfig                       | 2 +-
 drivers/staging/tidspbridge/rmgr/dbdcd.c                  | 2 +-
 drivers/usb/host/hwa-hc.c                                 | 2 +-
 drivers/usb/host/imx21-hcd.c                              | 2 +-
 drivers/usb/otg/fsl_otg.c                                 | 2 +-
 drivers/uwb/i1480/dfu/usb.c                               | 2 +-
 fs/btrfs/inode.c                                          | 2 +-
 fs/ext3/inode.c                                           | 2 +-
 fs/ext4/inode.c                                           | 2 +-
 fs/nfsd/nfs4state.c                                       | 2 +-
 fs/ocfs2/file.c                                           | 2 +-
 fs/xfs/xfs_file.c                                         | 6 +++---
 fs/xfs/xfs_log_cil.c                                      | 2 +-
 include/drm/drmP.h                                        | 2 +-
 include/linux/wanrouter.h                                 | 2 +-
 include/net/mac80211.h                                    | 2 +-
 net/ipv4/ip_fragment.c                                    | 2 +-
 net/mac80211/work.c                                       | 2 +-
 net/sctp/endpointola.c                                    | 2 +-
 46 files changed, 51 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/include/plat/serial.h b/arch/arm/plat-omap/include/plat/serial.h
index 1ab9fd6abe6d..ac44bde5d36d 100644
--- a/arch/arm/plat-omap/include/plat/serial.h
+++ b/arch/arm/plat-omap/include/plat/serial.h
@@ -2,7 +2,7 @@
  * arch/arm/plat-omap/include/mach/serial.h
  *
  * Copyright (C) 2009 Texas Instruments
- * Addded OMAP4 support- Santosh Shilimkar <santosh.shilimkar@ti.com>
+ * Added OMAP4 support- Santosh Shilimkar <santosh.shilimkar@ti.com>
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 45698d55cd6a..a3855b81eada 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -394,7 +394,7 @@ __do_out_asm(_rec_outl, "stwbrx")
 #endif /* CONFIG_PPC32 */
 
 /* The "__do_*" operations below provide the actual "base" implementation
- * for each of the defined acccessor. Some of them use the out_* functions
+ * for each of the defined accessors. Some of them use the out_* functions
  * directly, some of them still use EEH, though we might change that in the
  * future. Those macros below provide the necessary argument swapping and
  * handling of the IO base for PIO.
diff --git a/arch/powerpc/include/asm/keylargo.h b/arch/powerpc/include/asm/keylargo.h
index d8520ef121f9..fc195d0b3c34 100644
--- a/arch/powerpc/include/asm/keylargo.h
+++ b/arch/powerpc/include/asm/keylargo.h
@@ -51,7 +51,7 @@
 
 #define KL_GPIO_SOUND_POWER		(KEYLARGO_GPIO_0+0x05)
 
-/* Hrm... this one is only to be used on Pismo. It seeem to also
+/* Hrm... this one is only to be used on Pismo. It seems to also
  * control the timebase enable on other machines. Still to be
  * experimented... --BenH.
  */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b22a83a91cb8..ae0a611f5741 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -521,7 +521,7 @@ static int of_get_assoc_arrays(struct device_node *memory,
 	aa->n_arrays = *prop++;
 	aa->array_sz = *prop++;
 
-	/* Now that we know the number of arrrays and size of each array,
+	/* Now that we know the number of arrays and size of each array,
 	 * revalidate the size of the property read in.
 	 */
 	if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 75607724d290..3b1bd7c50164 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -840,7 +840,7 @@ static void tsb_sync(void *info)
 	struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
 	struct mm_struct *mm = info;
 
-	/* It is not valid to test "currrent->active_mm == mm" here.
+	/* It is not valid to test "current->active_mm == mm" here.
 	 *
 	 * The value of "current" is not changed atomically with
 	 * switch_mm().  But that's OK, we just need to check the
diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index c2793a82f120..d707756228c2 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -356,7 +356,7 @@ ACPI_EXPORT_SYMBOL(acpi_read_bit_register)
  *
  * PARAMETERS:  register_id     - ID of ACPI Bit Register to access
  *              Value           - Value to write to the register, in bit
- *                                position zero. The bit is automaticallly
+ *                                position zero. The bit is automatically
  *                                shifted to the correct position.
  *
  * RETURN:      Status
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index f759ad4584c3..8069322e4c9e 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -613,7 +613,7 @@ static void frontend_changed(struct xenbus_device *dev,
 	case XenbusStateConnected:
 		/*
 		 * Ensure we connect even when two watches fire in
-		 * close successsion and we miss the intermediate value
+		 * close succession and we miss the intermediate value
 		 * of frontend_state.
 		 */
 		if (dev->state == XenbusStateConnected)
diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index 3ed20e8abc0d..cdd4c09fda96 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -560,7 +560,7 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
 		BT_CONTROL(BT_H_BUSY);		/* set */
 
 		/*
-		 * Uncached, ordered writes should just proceeed serially but
+		 * Uncached, ordered writes should just proceed serially but
 		 * some BMCs don't clear B2H_ATN with one hit.  Fast-path a
 		 * workaround without too much penalty to the general case.
 		 */
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index 38400963e245..fc757069c6af 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -142,7 +142,7 @@
 
 /*
  * The ibm,sdram-4xx-ddr2 Device Control Registers (DCRs) are
- * indirectly acccessed and have a base and length defined by the
+ * indirectly accessed and have a base and length defined by the
  * device tree. The base can be anything; however, we expect the
  * length to be precisely two registers, the first for the address
  * window and the second for the data window.
diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c
index d4d05d2ace65..f6d26419445e 100644
--- a/drivers/media/video/zoran/zoran_driver.c
+++ b/drivers/media/video/zoran/zoran_driver.c
@@ -1550,7 +1550,7 @@ static int zoran_enum_fmt(struct zoran *zr, struct v4l2_fmtdesc *fmt, int flag)
 		if (zoran_formats[i].flags & flag && num++ == fmt->index) {
 			strncpy(fmt->description, zoran_formats[i].name,
 				sizeof(fmt->description) - 1);
-			/* fmt struct pre-zeroed, so adding '\0' not neeed */
+			/* fmt struct pre-zeroed, so adding '\0' not needed */
 			fmt->pixelformat = zoran_formats[i].fourcc;
 			if (zoran_formats[i].flags & ZORAN_FORMAT_COMPRESSED)
 				fmt->flags |= V4L2_FMT_FLAG_COMPRESSED;
diff --git a/drivers/message/fusion/lsi/mpi_ioc.h b/drivers/message/fusion/lsi/mpi_ioc.h
index fd6222882a0e..19fb21b8f0ce 100644
--- a/drivers/message/fusion/lsi/mpi_ioc.h
+++ b/drivers/message/fusion/lsi/mpi_ioc.h
@@ -857,7 +857,7 @@ typedef struct _EVENT_DATA_SAS_DISCOVERY
 #define MPI_EVENT_SAS_DSCVRY_PHY_BITS_MASK                  (0xFFFF0000)
 #define MPI_EVENT_SAS_DSCVRY_PHY_BITS_SHIFT                 (16)
 
-/* SAS Discovery Errror Event data */
+/* SAS Discovery Error Event data */
 
 typedef struct _EVENT_DATA_DISCOVERY_ERROR
 {
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index b56636da6cc3..2a4f2f153244 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1664,7 +1664,7 @@ static int nsc_ircc_dma_xmit_complete(struct nsc_ircc_cb *self)
 	switch_bank(iobase, BANK0);
         outb(inb(iobase+MCR) & ~MCR_DMA_EN, iobase+MCR);
 	
-	/* Check for underrrun! */
+	/* Check for underrun! */
 	if (inb(iobase+ASCR) & ASCR_TXUR) {
 		self->netdev->stats.tx_errors++;
 		self->netdev->stats.tx_fifo_errors++;
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 6d6479049aa1..2d456dd164fb 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -942,14 +942,14 @@ static int via_ircc_dma_xmit_complete(struct via_ircc_cb *self)
 	iobase = self->io.fir_base;
 	/* Disable DMA */
 //      DisableDmaChannel(self->io.dma);
-	/* Check for underrrun! */
+	/* Check for underrun! */
 	/* Clear bit, by writing 1 into it */
 	Tx_status = GetTXStatus(iobase);
 	if (Tx_status & 0x08) {
 		self->netdev->stats.tx_errors++;
 		self->netdev->stats.tx_fifo_errors++;
 		hwreset(self);
-// how to clear underrrun ?
+	/* how to clear underrun? */
 	} else {
 		self->netdev->stats.tx_packets++;
 		ResetChip(iobase, 3);
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index c4366601b067..7d43506c7032 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -677,7 +677,7 @@ static void w83977af_dma_xmit_complete(struct w83977af_ir *self)
 	switch_bank(iobase, SET0);
 	outb(inb(iobase+HCR) & ~HCR_EN_DMA, iobase+HCR);
 	
-	/* Check for underrrun! */
+	/* Check for underrun! */
 	if (inb(iobase+AUDR) & AUDR_UNDR) {
 		IRDA_DEBUG(0, "%s(), Transmit underrun!\n", __func__ );
 		
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index c421a6141854..c806d4550212 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -75,7 +75,7 @@
  *        device is up and running or shutdown (through ifconfig up /
  *        down). Bus-generic only.
  *
- *  - control ops: control.c - implements various commmands for
+ *  - control ops: control.c - implements various commands for
  *        controlling the device. bus-generic only.
  *
  *  - device model glue: driver.c - implements helpers for the
diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/rtlwifi/rtl8192de/hw.c
index f5bd3a3cd34a..9d89d7ccdafb 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192de/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192de/hw.c
@@ -466,8 +466,8 @@ void rtl92de_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val)
 		bool int_migration = *(bool *) (val);
 
 		if (int_migration) {
-			/* Set interrrupt migration timer and
-			 * corresponging Tx/Rx counter.
+			/* Set interrupt migration timer and
+			 * corresponding Tx/Rx counter.
 			 * timer 25ns*0xfa0=100us for 0xf packets.
 			 * 0x306:Rx, 0x307:Tx */
 			rtl_write_dword(rtlpriv, REG_INT_MIG, 0xfe000fa0);
diff --git a/drivers/parport/parport_mfc3.c b/drivers/parport/parport_mfc3.c
index 362db31d8ca6..1c0c642b3e23 100644
--- a/drivers/parport/parport_mfc3.c
+++ b/drivers/parport/parport_mfc3.c
@@ -397,7 +397,7 @@ static void __exit parport_mfc3_exit(void)
 
 
 MODULE_AUTHOR("Joerg Dorchain <joerg@dorchain.net>");
-MODULE_DESCRIPTION("Parport Driver for Multiface 3 expansion cards Paralllel Port");
+MODULE_DESCRIPTION("Parport Driver for Multiface 3 expansion cards Parallel Port");
 MODULE_SUPPORTED_DEVICE("Multiface 3 Parallel Port");
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm.c b/drivers/scsi/aic7xxx/aicasm/aicasm.c
index e4a778720301..2e3117aa382f 100644
--- a/drivers/scsi/aic7xxx/aicasm/aicasm.c
+++ b/drivers/scsi/aic7xxx/aicasm/aicasm.c
@@ -1,5 +1,5 @@
 /*
- * Aic7xxx SCSI host adapter firmware asssembler
+ * Aic7xxx SCSI host adapter firmware assembler
  *
  * Copyright (c) 1997, 1998, 2000, 2001 Justin T. Gibbs.
  * Copyright (c) 2001, 2002 Adaptec Inc.
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 218f71a8726e..d77891e5683b 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -4494,7 +4494,7 @@ ips_init_scb(ips_ha_t * ha, ips_scb_t * scb)
 /*                                                                          */
 /*   Initialize a CCB to default values                                     */
 /*                                                                          */
-/* ASSUMED to be callled from within a lock                                 */
+/* ASSUMED to be called from within a lock                                 */
 /*                                                                          */
 /****************************************************************************/
 static ips_scb_t *
diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
index cbd5a20dbbd1..866af45b3d6c 100644
--- a/drivers/scsi/qla4xxx/ql4_fw.h
+++ b/drivers/scsi/qla4xxx/ql4_fw.h
@@ -744,7 +744,7 @@ struct dev_db_entry {
 	uint8_t res4[0x36];	/* 8A-BF */
 	uint8_t iscsi_name[0xE0];	/* C0-19F : xxzzy Make this a
 					 * pointer to a string so we
-					 * don't have to reserve soooo
+					 * don't have to reserve so
 					 * much RAM */
 	uint8_t link_local_ipv6_addr[0x10]; /* 1A0-1AF */
 	uint8_t res5[0x10];	/* 1B0-1BF */
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index a18996d24466..7264116185d5 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -1144,7 +1144,7 @@ static void pvscsi_release_resources(struct pvscsi_adapter *adapter)
  *
  * These are statically allocated.  Trying to be clever was not worth it.
  *
- * Dynamic allocation can fail, and we can't go deeep into the memory
+ * Dynamic allocation can fail, and we can't go deep into the memory
  * allocator, since we're a SCSI driver, and trying too hard to allocate
  * memory might generate disk I/O.  We also don't want to fail disk I/O
  * in that case because we can't get an allocation - the I/O could be
diff --git a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c
index c75a1a1fd775..f9545b064eaf 100644
--- a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c
+++ b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c
@@ -3598,7 +3598,7 @@ int i_APCI3200_InterruptHandleEos(struct comedi_device *dev)
 			n = comedi_buf_write_alloc(s->async,
 				(7 + 12) * sizeof(unsigned int));
 
-			/*  If not enougth memory available, event is set to Comedi Buffer Errror */
+			/*  If not enough memory available, event is set to Comedi Buffer Error */
 			if (n > ((7 + 12) * sizeof(unsigned int))) {
 				printk("\ncomedi_buf_write_alloc n = %i", n);
 				s->async->events |= COMEDI_CB_ERROR;
diff --git a/drivers/staging/cxt1e1/libsbew.h b/drivers/staging/cxt1e1/libsbew.h
index 5c99646cd103..ae8f06d05bed 100644
--- a/drivers/staging/cxt1e1/libsbew.h
+++ b/drivers/staging/cxt1e1/libsbew.h
@@ -323,7 +323,7 @@ struct sbecom_port_param
 #define CFG_CH_DINV_TX      0x02
 
 
-/* Posssible resettable chipsets/functions */
+/* Possible resettable chipsets/functions */
 #define RESET_DEV_TEMUX     1
 #define RESET_DEV_TECT3     RESET_DEV_TEMUX
 #define RESET_DEV_PLL       2
diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
index b3d743a3d308..917bbb082a6e 100644
--- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
+++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
@@ -344,10 +344,10 @@ static void ft1000_reset_asic(struct net_device *dev)
 	}
 	mdelay(1);
 	if (info->AsicID == ELECTRABUZZ_ID) {
-		// set watermark to -1 in order to not generate an interrrupt
+		// set watermark to -1 in order to not generate an interrupt
 		ft1000_write_reg(dev, FT1000_REG_WATERMARK, 0xffff);
 	} else {
-		// set watermark to -1 in order to not generate an interrrupt
+		// set watermark to -1 in order to not generate an interrupt
 		ft1000_write_reg(dev, FT1000_REG_MAG_WATERMARK, 0xffff);
 	}
 	// clear interrupts
diff --git a/drivers/staging/ft1000/ft1000-usb/ft1000_hw.c b/drivers/staging/ft1000/ft1000-usb/ft1000_hw.c
index aaf44c359827..43b1d363107e 100644
--- a/drivers/staging/ft1000/ft1000-usb/ft1000_hw.c
+++ b/drivers/staging/ft1000/ft1000-usb/ft1000_hw.c
@@ -601,7 +601,7 @@ static void ft1000_reset_asic(struct net_device *dev)
 
 	mdelay(1);
 
-	/* set watermark to -1 in order to not generate an interrrupt */
+	/* set watermark to -1 in order to not generate an interrupt */
 	ft1000_write_register(ft1000dev, 0xffff, FT1000_REG_MAG_WATERMARK);
 
 	/* clear interrupts */
diff --git a/drivers/staging/iio/industrialio-trigger.c b/drivers/staging/iio/industrialio-trigger.c
index 2c626e0cb29c..68a4d4e8c635 100644
--- a/drivers/staging/iio/industrialio-trigger.c
+++ b/drivers/staging/iio/industrialio-trigger.c
@@ -295,7 +295,7 @@ void iio_dealloc_pollfunc(struct iio_poll_func *pf)
 EXPORT_SYMBOL_GPL(iio_dealloc_pollfunc);
 
 /**
- * iio_trigger_read_currrent() - trigger consumer sysfs query which trigger
+ * iio_trigger_read_current() - trigger consumer sysfs query which trigger
  *
  * For trigger consumers the current_trigger interface allows the trigger
  * used by the device to be queried.
diff --git a/drivers/staging/sep/sep_driver.c b/drivers/staging/sep/sep_driver.c
index 8ac3faea2d2f..f47571ea745d 100644
--- a/drivers/staging/sep/sep_driver.c
+++ b/drivers/staging/sep/sep_driver.c
@@ -1235,7 +1235,7 @@ static void sep_build_lli_table(struct sep_device *sep,
 	/* Counter of lli array entry */
 	u32 array_counter;
 
-	/* Init currrent table data size and lli array entry counter */
+	/* Init current table data size and lli array entry counter */
 	curr_table_data_size = 0;
 	array_counter = 0;
 	*num_table_entries_ptr = 1;
diff --git a/drivers/staging/tidspbridge/Kconfig b/drivers/staging/tidspbridge/Kconfig
index 93de4f2e8bf8..21a559ecbbb1 100644
--- a/drivers/staging/tidspbridge/Kconfig
+++ b/drivers/staging/tidspbridge/Kconfig
@@ -78,7 +78,7 @@ config TIDSPBRIDGE_NTFY_PWRERR
 	bool "Notify power errors"
 	depends on TIDSPBRIDGE
 	help
-	  Enable notifications to registered clients on the event of power errror
+	  Enable notifications to registered clients on the event of power error
 	  trying to suspend bridge driver. Say Y, to signal this event as a fatal
 	  error, this will require a bridge restart to recover.
 
diff --git a/drivers/staging/tidspbridge/rmgr/dbdcd.c b/drivers/staging/tidspbridge/rmgr/dbdcd.c
index a7e407e25187..fda240214cd6 100644
--- a/drivers/staging/tidspbridge/rmgr/dbdcd.c
+++ b/drivers/staging/tidspbridge/rmgr/dbdcd.c
@@ -285,7 +285,7 @@ int dcd_enumerate_object(s32 index, enum dsp_dcdobjtype obj_type,
 			enum_refs = 0;
 
 			/*
-			 * TODO: Revisit, this is not an errror case but code
+			 * TODO: Revisit, this is not an error case but code
 			 * expects non-zero value.
 			 */
 			status = ENODATA;
diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 9bfac657572e..b5c56a99da02 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -481,7 +481,7 @@ static int __hwahc_op_set_ptk(struct wusbhc *wusbhc, u8 port_idx, u32 tkid,
 		encryption_value = 0;
 	}
 
-	/* Set the encryption type for commmunicating with the device */
+	/* Set the encryption type for communicating with the device */
 	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
 			USB_REQ_SET_ENCRYPTION,
 			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
diff --git a/drivers/usb/host/imx21-hcd.c b/drivers/usb/host/imx21-hcd.c
index 2ee18cfa1efe..dbf0f156ed9e 100644
--- a/drivers/usb/host/imx21-hcd.c
+++ b/drivers/usb/host/imx21-hcd.c
@@ -473,7 +473,7 @@ static void free_epdmem(struct imx21 *imx21, struct usb_host_endpoint *ep)
 /* End handling 				*/
 /* ===========================================	*/
 
-/* Endpoint now idle - release it's ETD(s) or asssign to queued request */
+/* Endpoint now idle - release its ETD(s) or assign to queued request */
 static void ep_idle(struct imx21 *imx21, struct ep_priv *ep_priv)
 {
 	int i;
diff --git a/drivers/usb/otg/fsl_otg.c b/drivers/usb/otg/fsl_otg.c
index 0f420b25e9a9..2d9cc445fc73 100644
--- a/drivers/usb/otg/fsl_otg.c
+++ b/drivers/usb/otg/fsl_otg.c
@@ -639,7 +639,7 @@ static int fsl_otg_set_power(struct otg_transceiver *otg_p, unsigned mA)
  * Delayed pin detect interrupt processing.
  *
  * When the Mini-A cable is disconnected from the board,
- * the pin-detect interrupt happens before the disconnnect
+ * the pin-detect interrupt happens before the disconnect
  * interrupts for the connected device(s).  In order to
  * process the disconnect interrupt(s) prior to switching
  * roles, the pin-detect interrupts are delayed, and handled
diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
index ba8664328afa..a315d4d25cc0 100644
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -104,7 +104,7 @@ void i1480_usb_destroy(struct i1480_usb *i1480_usb)
  *
  * Data buffers to USB cannot be on the stack or in vmalloc'ed areas,
  * so we copy it to the local i1480 buffer before proceeding. In any
- * case, we have a max size we can send, soooo.
+ * case, we have a max size we can send.
  */
 static
 int i1480_usb_write(struct i1480 *i1480, u32 memory_address,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 116ab67a06df..c3308c38ae75 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1943,7 +1943,7 @@ enum btrfs_orphan_cleanup_state {
 };
 
 /*
- * This is called in transaction commmit time. If there are no orphan
+ * This is called in transaction commit time. If there are no orphan
  * files in the subvolume, it removes orphan item and frees block_rsv
  * structure.
  */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 85fe655fe3e0..15cb47088aac 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2490,7 +2490,7 @@ int ext3_can_truncate(struct inode *inode)
  * transaction, and VFS/VM ensures that ext3_truncate() cannot run
  * simultaneously on behalf of the same inode.
  *
- * As we work through the truncate and commmit bits of it to the journal there
+ * As we work through the truncate and commit bits of it to the journal there
  * is one core, guiding principle: the file's tree must always be consistent on
  * disk.  We must be able to restart the truncate after a crash.
  *
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 240f6e2dc7ee..b1c57bf43132 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3502,7 +3502,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
  * transaction, and VFS/VM ensures that ext4_truncate() cannot run
  * simultaneously on behalf of the same inode.
  *
- * As we work through the truncate and commmit bits of it to the journal there
+ * As we work through the truncate and commit bits of it to the journal there
  * is one core, guiding principle: the file's tree must always be consistent on
  * disk.  We must be able to restart the truncate after a crash.
  *
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47e94e33a975..9ca16dc09e04 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -658,7 +658,7 @@ static int nfsd4_sanitize_slot_size(u32 size)
 /*
  * XXX: If we run out of reserved DRC memory we could (up to a point)
  * re-negotiate active sessions and reduce their slot usage to make
- * rooom for new connections. For now we just fail the create session.
+ * room for new connections. For now we just fail the create session.
  */
 static int nfsd4_get_drc_mem(int slotsize, u32 num)
 {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index de4ea1af041b..199c606c56a5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2108,7 +2108,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
 		 * remove_suid() calls ->setattr without any hint that
 		 * we may have already done our cluster locking. Since
 		 * ocfs2_setattr() *must* take cluster locks to
-		 * proceeed, this will lead us to recursively lock the
+		 * proceed, this will lead us to recursively lock the
 		 * inode. There's also the dinode i_size state which
 		 * can be lost via setattr during extending writes (we
 		 * set inode->i_size at the end of a write. */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 753ed9b5c70b..f675f3d9d7b3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -209,10 +209,10 @@ xfs_file_fsync(
 
 	/*
 	 * First check if the VFS inode is marked dirty.  All the dirtying
-	 * of non-transactional updates no goes through mark_inode_dirty*,
-	 * which allows us to distinguish beteeen pure timestamp updates
+	 * of non-transactional updates do not go through mark_inode_dirty*,
+	 * which allows us to distinguish between pure timestamp updates
 	 * and i_size updates which need to be caught for fdatasync.
-	 * After that also theck for the dirty state in the XFS inode, which
+	 * After that also check for the dirty state in the XFS inode, which
 	 * might gets cleared when the inode gets written out via the AIL
 	 * or xfs_iflush_cluster.
 	 */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index c7755d5a5fbe..3ba29b114323 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -256,7 +256,7 @@ xfs_cil_prepare_item(
  * Insert the log items into the CIL and calculate the difference in space
  * consumed by the item. Add the space to the checkpoint ticket and calculate
  * if the change requires additional log metadata. If it does, take that space
- * as well. Remove the amount of space we addded to the checkpoint ticket from
+ * as well. Remove the amount of space we added to the checkpoint ticket from
  * the current transaction ticket so that the accounting works out correctly.
  */
 static void
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 1f9e9516e2b7..e8acca892af0 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -820,7 +820,7 @@ struct drm_driver {
 	 * Specifically, the timestamp in @vblank_time should correspond as
 	 * closely as possible to the time when the first video scanline of
 	 * the video frame after the end of VBLANK will start scanning out,
-	 * the time immmediately after end of the VBLANK interval. If the
+	 * the time immediately after end of the VBLANK interval. If the
 	 * @crtc is currently inside VBLANK, this will be a time in the future.
 	 * If the @crtc is currently scanning out a frame, this will be the
 	 * past start time of the current scanout. This is meant to adhere
diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h
index e0aa39612eba..3157cc1fada6 100644
--- a/include/linux/wanrouter.h
+++ b/include/linux/wanrouter.h
@@ -309,7 +309,7 @@ typedef struct wandev_conf
 #define WANOPT_EVEN	2
 
 /* CHDLC Protocol Options */
-/* DF Commmented out for now.
+/* DF Commented out for now.
 
 #define WANOPT_CHDLC_NO_DCD		IGNORE_DCD_FOR_LINK_STAT
 #define WANOPT_CHDLC_NO_CTS		IGNORE_CTS_FOR_LINK_STAT
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 72eddd1b410b..bd3487d5ac84 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1423,7 +1423,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * DOC: Beacon filter support
  *
  * Some hardware have beacon filter support to reduce host cpu wakeups
- * which will reduce system power consumption. It usuallly works so that
+ * which will reduce system power consumption. It usually works so that
  * the firmware creates a checksum of the beacon but omits all constantly
  * changing elements (TSF, TIM etc). Whenever the checksum changes the
  * beacon is forwarded to the host, otherwise it will be just dropped. That
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fdaabf2f2b68..1f23a57aa9e6 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -392,7 +392,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	/* Is this the final fragment? */
 	if ((flags & IP_MF) == 0) {
 		/* If we already have some bits beyond end
-		 * or have different end, the segment is corrrupted.
+		 * or have different end, the segment is corrupted.
 		 */
 		if (end < qp->q.len ||
 		    ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len))
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 6c53b6d1002b..2accea37742e 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -1094,7 +1094,7 @@ static void ieee80211_work_work(struct work_struct *work)
 		local->tmp_channel = NULL;
 		/* If tmp_channel wasn't operating channel, then
 		 * we need to go back on-channel.
-		 * NOTE:  If we can ever be here while scannning,
+		 * NOTE:  If we can ever be here while scanning,
 		 * or if the hw_config() channel config logic changes,
 		 * then we may need to do a more thorough check to see if
 		 * we still need to do a hardware config.  Currently,
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index c8cc24e282c3..68a385d7c3bd 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -415,7 +415,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
 	sctp_subtype_t subtype;
 	sctp_state_t state;
 	int error = 0;
-	int first_time = 1;	/* is this the first time through the looop */
+	int first_time = 1;	/* is this the first time through the loop */
 
 	if (ep->base.dead)
 		return;
-- 
cgit v1.2.3


From f528f0b8e53d73b18be71e96693cfab9322f33c7 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 26 Sep 2011 17:12:53 +0100
Subject: kmemleak: Handle percpu memory allocation

This patch adds kmemleak callbacks from the percpu allocator, reducing a
number of false positives caused by kmemleak not scanning such memory
blocks. The percpu chunks are never reported as leaks because of current
kmemleak limitations with the __percpu pointer not pointing directly to
the actual chunks.

Reported-by: Huajun Li <huajun.li.lee@gmail.com>
Acked-by: Christoph Lameter <cl@gentwo.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/kmemleak.txt |  3 ++
 include/linux/kmemleak.h   |  8 ++++++
 mm/kmemleak.c              | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/percpu.c                | 12 +++++++-
 4 files changed, 94 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
index 51063e681ca4..b6e39739a36d 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/kmemleak.txt
@@ -127,7 +127,10 @@ See the include/linux/kmemleak.h header for the functions prototype.
 
 kmemleak_init		 - initialize kmemleak
 kmemleak_alloc		 - notify of a memory block allocation
+kmemleak_alloc_percpu	 - notify of a percpu memory block allocation
 kmemleak_free		 - notify of a memory block freeing
+kmemleak_free_part	 - notify of a partial memory block freeing
+kmemleak_free_percpu	 - notify of a percpu memory block freeing
 kmemleak_not_leak	 - mark an object as not a leak
 kmemleak_ignore		 - do not scan or report an object as leak
 kmemleak_scan_area	 - add scan areas inside a memory block
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 99d9a6766f7e..2a5e5548a1d2 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -26,8 +26,10 @@
 extern void kmemleak_init(void) __ref;
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
 			   gfp_t gfp) __ref;
+extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref;
 extern void kmemleak_free(const void *ptr) __ref;
 extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
+extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
 extern void kmemleak_padding(const void *ptr, unsigned long offset,
 			     size_t size) __ref;
 extern void kmemleak_not_leak(const void *ptr) __ref;
@@ -68,6 +70,9 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
 					    gfp_t gfp)
 {
 }
+static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
+{
+}
 static inline void kmemleak_free(const void *ptr)
 {
 }
@@ -77,6 +82,9 @@ static inline void kmemleak_free_part(const void *ptr, size_t size)
 static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
 {
 }
+static inline void kmemleak_free_percpu(const void __percpu *ptr)
+{
+}
 static inline void kmemleak_not_leak(const void *ptr)
 {
 }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index b4f4e6021c1b..15c50302ff93 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -230,8 +230,10 @@ static int kmemleak_skip_disable;
 /* kmemleak operation type for early logging */
 enum {
 	KMEMLEAK_ALLOC,
+	KMEMLEAK_ALLOC_PERCPU,
 	KMEMLEAK_FREE,
 	KMEMLEAK_FREE_PART,
+	KMEMLEAK_FREE_PERCPU,
 	KMEMLEAK_NOT_LEAK,
 	KMEMLEAK_IGNORE,
 	KMEMLEAK_SCAN_AREA,
@@ -852,6 +854,20 @@ out:
 	rcu_read_unlock();
 }
 
+/*
+ * Log an early allocated block and populate the stack trace.
+ */
+static void early_alloc_percpu(struct early_log *log)
+{
+	unsigned int cpu;
+	const void __percpu *ptr = log->ptr;
+
+	for_each_possible_cpu(cpu) {
+		log->ptr = per_cpu_ptr(ptr, cpu);
+		early_alloc(log);
+	}
+}
+
 /**
  * kmemleak_alloc - register a newly allocated object
  * @ptr:	pointer to beginning of the object
@@ -878,6 +894,34 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc);
 
+/**
+ * kmemleak_alloc_percpu - register a newly allocated __percpu object
+ * @ptr:	__percpu pointer to beginning of the object
+ * @size:	size of the object
+ *
+ * This function is called from the kernel percpu allocator when a new object
+ * (memory block) is allocated (alloc_percpu). It assumes GFP_KERNEL
+ * allocation.
+ */
+void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
+{
+	unsigned int cpu;
+
+	pr_debug("%s(0x%p, %zu)\n", __func__, ptr, size);
+
+	/*
+	 * Percpu allocations are only scanned and not reported as leaks
+	 * (min_count is set to 0).
+	 */
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		for_each_possible_cpu(cpu)
+			create_object((unsigned long)per_cpu_ptr(ptr, cpu),
+				      size, 0, GFP_KERNEL);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
+
 /**
  * kmemleak_free - unregister a previously registered object
  * @ptr:	pointer to beginning of the object
@@ -916,6 +960,28 @@ void __ref kmemleak_free_part(const void *ptr, size_t size)
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_part);
 
+/**
+ * kmemleak_free_percpu - unregister a previously registered __percpu object
+ * @ptr:	__percpu pointer to beginning of the object
+ *
+ * This function is called from the kernel percpu allocator when an object
+ * (memory block) is freed (free_percpu).
+ */
+void __ref kmemleak_free_percpu(const void __percpu *ptr)
+{
+	unsigned int cpu;
+
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		for_each_possible_cpu(cpu)
+			delete_object_full((unsigned long)per_cpu_ptr(ptr,
+								      cpu));
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_FREE_PERCPU, ptr, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
+
 /**
  * kmemleak_not_leak - mark an allocated object as false positive
  * @ptr:	pointer to beginning of the object
@@ -1727,12 +1793,18 @@ void __init kmemleak_init(void)
 		case KMEMLEAK_ALLOC:
 			early_alloc(log);
 			break;
+		case KMEMLEAK_ALLOC_PERCPU:
+			early_alloc_percpu(log);
+			break;
 		case KMEMLEAK_FREE:
 			kmemleak_free(log->ptr);
 			break;
 		case KMEMLEAK_FREE_PART:
 			kmemleak_free_part(log->ptr, log->size);
 			break;
+		case KMEMLEAK_FREE_PERCPU:
+			kmemleak_free_percpu(log->ptr);
+			break;
 		case KMEMLEAK_NOT_LEAK:
 			kmemleak_not_leak(log->ptr);
 			break;
diff --git a/mm/percpu.c b/mm/percpu.c
index 3bb810a72006..86c5bdbdc370 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -67,6 +67,7 @@
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
+#include <linux/kmemleak.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
@@ -710,6 +711,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
 	const char *err;
 	int slot, off, new_alloc;
 	unsigned long flags;
+	void __percpu *ptr;
 
 	if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
 		WARN(true, "illegal size (%zu) or align (%zu) for "
@@ -802,7 +804,9 @@ area_found:
 	mutex_unlock(&pcpu_alloc_mutex);
 
 	/* return address relative to base address */
-	return __addr_to_pcpu_ptr(chunk->base_addr + off);
+	ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
+	kmemleak_alloc_percpu(ptr, size);
+	return ptr;
 
 fail_unlock:
 	spin_unlock_irqrestore(&pcpu_lock, flags);
@@ -916,6 +920,8 @@ void free_percpu(void __percpu *ptr)
 	if (!ptr)
 		return;
 
+	kmemleak_free_percpu(ptr);
+
 	addr = __pcpu_ptr_to_addr(ptr);
 
 	spin_lock_irqsave(&pcpu_lock, flags);
@@ -1637,6 +1643,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 			rc = -ENOMEM;
 			goto out_free_areas;
 		}
+		/* kmemleak tracks the percpu allocations separately */
+		kmemleak_free(ptr);
 		areas[group] = ptr;
 
 		base = min(ptr, base);
@@ -1751,6 +1759,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 					   "for cpu%u\n", psize_str, cpu);
 				goto enomem;
 			}
+			/* kmemleak tracks the percpu allocations separately */
+			kmemleak_free(ptr);
 			pages[j++] = virt_to_page(ptr);
 		}
 
-- 
cgit v1.2.3


From 86b1309c7e411b7c25dc0dc7a092582a4d291044 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 10 Nov 2011 19:14:51 -0800
Subject: genetlink: Add lockdep_genl_is_held().

Open vSwitch uses genl_mutex locking to protect datapath
data-structures like flow-table, flow-actions. Following patch adds
lockdep_genl_is_held() which is used for rcu annotation to prove
locking.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/genetlink.h | 3 +++
 net/netlink/genetlink.c   | 8 ++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 61549b26ad6f..59311adfb0e0 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -85,6 +85,9 @@ enum {
 /* All generic netlink requests are serialized by a global lock.  */
 extern void genl_lock(void);
 extern void genl_unlock(void);
+#ifdef CONFIG_PROVE_LOCKING
+extern int lockdep_genl_is_held(void);
+#endif
 
 #endif /* __KERNEL__ */
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 8a36599d3555..28453ae2a97b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -33,6 +33,14 @@ void genl_unlock(void)
 }
 EXPORT_SYMBOL(genl_unlock);
 
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_genl_is_held(void)
+{
+	return lockdep_is_held(&genl_mutex);
+}
+EXPORT_SYMBOL(lockdep_genl_is_held);
+#endif
+
 #define GENL_FAM_TAB_SIZE	16
 #define GENL_FAM_TAB_MASK	(GENL_FAM_TAB_SIZE - 1)
 
-- 
cgit v1.2.3


From b4e16611c4e1cd98765269c8fdaf43f96baa57b1 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sat, 19 Nov 2011 16:21:37 -0800
Subject: genetlink: Add rcu_dereference_genl and genl_dereference.

This adds rcu_dereference_genl and genl_dereference, which are genl
variants of the RTNL functions to enforce proper locking with lockdep
and sparse.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/genetlink.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 59311adfb0e0..73c28dea10ae 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -89,6 +89,27 @@ extern void genl_unlock(void);
 extern int lockdep_genl_is_held(void);
 #endif
 
+/**
+ * rcu_dereference_genl - rcu_dereference with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
+ * or genl mutex. Note : Please prefer genl_dereference() or rcu_dereference()
+ */
+#define rcu_dereference_genl(p)					\
+	rcu_dereference_check(p, lockdep_genl_is_held())
+
+/**
+ * genl_dereference - fetch RCU pointer when updates are prevented by genl mutex
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Return the value of the specified RCU-protected pointer, but omit
+ * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
+ * caller holds genl mutex.
+ */
+#define genl_dereference(p)					\
+	rcu_dereference_protected(p, lockdep_genl_is_held())
+
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_GENERIC_NETLINK_H */
-- 
cgit v1.2.3


From 396cf9430505cfba529a2f2a037d782719fa5844 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 18 Nov 2011 13:15:54 -0800
Subject: vlan: Move vlan_set_encap_proto() to vlan header file

Open vSwitch needs this function for vlan handling.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/if_vlan.h | 34 ++++++++++++++++++++++++++++++++++
 net/8021q/vlan_core.c   | 33 ---------------------------------
 2 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 12d5543b14f2..070ac50c1d2d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -310,6 +310,40 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb)
 
 	return protocol;
 }
+
+static inline void vlan_set_encap_proto(struct sk_buff *skb,
+					struct vlan_hdr *vhdr)
+{
+	__be16 proto;
+	unsigned char *rawp;
+
+	/*
+	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
+	 * three protocols care about.
+	 */
+
+	proto = vhdr->h_vlan_encapsulated_proto;
+	if (ntohs(proto) >= 1536) {
+		skb->protocol = proto;
+		return;
+	}
+
+	rawp = skb->data;
+	if (*(unsigned short *) rawp == 0xFFFF)
+		/*
+		 * This is a magic hack to spot IPX packets. Older Novell
+		 * breaks the protocol design and runs IPX over 802.3 without
+		 * an 802.2 LLC layer. We look for FFFF which isn't a used
+		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
+		 * but does for the rest.
+		 */
+		skb->protocol = htons(ETH_P_802_3);
+	else
+		/*
+		 * Real 802.2 LLC
+		 */
+		skb->protocol = htons(ETH_P_802_2);
+}
 #endif /* __KERNEL__ */
 
 /* VLAN IOCTLs are found in sockios.h */
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f5ffc02729d6..9c95e8e054f9 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -110,39 +110,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
 	return skb;
 }
 
-static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
-{
-	__be16 proto;
-	unsigned char *rawp;
-
-	/*
-	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
-	 * three protocols care about.
-	 */
-
-	proto = vhdr->h_vlan_encapsulated_proto;
-	if (ntohs(proto) >= 1536) {
-		skb->protocol = proto;
-		return;
-	}
-
-	rawp = skb->data;
-	if (*(unsigned short *) rawp == 0xFFFF)
-		/*
-		 * This is a magic hack to spot IPX packets. Older Novell
-		 * breaks the protocol design and runs IPX over 802.3 without
-		 * an 802.2 LLC layer. We look for FFFF which isn't a used
-		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
-		 * but does for the rest.
-		 */
-		skb->protocol = htons(ETH_P_802_3);
-	else
-		/*
-		 * Real 802.2 LLC
-		 */
-		skb->protocol = htons(ETH_P_802_2);
-}
-
 struct sk_buff *vlan_untag(struct sk_buff *skb)
 {
 	struct vlan_hdr *vhdr;
-- 
cgit v1.2.3


From ccb1352e76cff0524e7ccb2074826a092dd13016 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 25 Oct 2011 19:26:31 -0700
Subject: net: Add Open vSwitch kernel components.

Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments.  In addition to supporting a variety of features
expected in a traditional hardware switch, it enables fine-grained
programmatic extension and flow-based control of the network.
This control is useful in a wide variety of applications but is
particularly important in multi-server virtualization deployments,
which are often characterized by highly dynamic endpoints and the need
to maintain logical abstractions for multiple tenants.

The Open vSwitch datapath provides an in-kernel fast path for packet
forwarding.  It is complemented by a userspace daemon, ovs-vswitchd,
which is able to accept configuration from a variety of sources and
translate it into packet processing rules.

See http://openvswitch.org for more information and userspace
utilities.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 Documentation/networking/00-INDEX        |    2 +
 Documentation/networking/openvswitch.txt |  195 +++
 MAINTAINERS                              |    8 +
 include/linux/openvswitch.h              |  452 +++++++
 net/Kconfig                              |    1 +
 net/Makefile                             |    1 +
 net/openvswitch/Kconfig                  |   28 +
 net/openvswitch/Makefile                 |   14 +
 net/openvswitch/actions.c                |  415 +++++++
 net/openvswitch/datapath.c               | 1912 ++++++++++++++++++++++++++++++
 net/openvswitch/datapath.h               |  125 ++
 net/openvswitch/dp_notify.c              |   66 ++
 net/openvswitch/flow.c                   | 1346 +++++++++++++++++++++
 net/openvswitch/flow.h                   |  199 ++++
 net/openvswitch/vport-internal_dev.c     |  241 ++++
 net/openvswitch/vport-internal_dev.h     |   28 +
 net/openvswitch/vport-netdev.c           |  198 ++++
 net/openvswitch/vport-netdev.h           |   42 +
 net/openvswitch/vport.c                  |  396 +++++++
 net/openvswitch/vport.h                  |  205 ++++
 20 files changed, 5874 insertions(+)
 create mode 100644 Documentation/networking/openvswitch.txt
 create mode 100644 include/linux/openvswitch.h
 create mode 100644 net/openvswitch/Kconfig
 create mode 100644 net/openvswitch/Makefile
 create mode 100644 net/openvswitch/actions.c
 create mode 100644 net/openvswitch/datapath.c
 create mode 100644 net/openvswitch/datapath.h
 create mode 100644 net/openvswitch/dp_notify.c
 create mode 100644 net/openvswitch/flow.c
 create mode 100644 net/openvswitch/flow.h
 create mode 100644 net/openvswitch/vport-internal_dev.c
 create mode 100644 net/openvswitch/vport-internal_dev.h
 create mode 100644 net/openvswitch/vport-netdev.c
 create mode 100644 net/openvswitch/vport-netdev.h
 create mode 100644 net/openvswitch/vport.c
 create mode 100644 net/openvswitch/vport.h

(limited to 'include/linux')

diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index bbce1215434a..9ad9ddeb384c 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -144,6 +144,8 @@ nfc.txt
 	- The Linux Near Field Communication (NFS) subsystem.
 olympic.txt
 	- IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info.
+openvswitch.txt
+	- Open vSwitch developer documentation.
 operstates.txt
 	- Overview of network interface operational states.
 packet_mmap.txt
diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
new file mode 100644
index 000000000000..b8a048b8df3a
--- /dev/null
+++ b/Documentation/networking/openvswitch.txt
@@ -0,0 +1,195 @@
+Open vSwitch datapath developer documentation
+=============================================
+
+The Open vSwitch kernel module allows flexible userspace control over
+flow-level packet processing on selected network devices.  It can be
+used to implement a plain Ethernet switch, network device bonding,
+VLAN processing, network access control, flow-based network control,
+and so on.
+
+The kernel module implements multiple "datapaths" (analogous to
+bridges), each of which can have multiple "vports" (analogous to ports
+within a bridge).  Each datapath also has associated with it a "flow
+table" that userspace populates with "flows" that map from keys based
+on packet headers and metadata to sets of actions.  The most common
+action forwards the packet to another vport; other actions are also
+implemented.
+
+When a packet arrives on a vport, the kernel module processes it by
+extracting its flow key and looking it up in the flow table.  If there
+is a matching flow, it executes the associated actions.  If there is
+no match, it queues the packet to userspace for processing (as part of
+its processing, userspace will likely set up a flow to handle further
+packets of the same type entirely in-kernel).
+
+
+Flow key compatibility
+----------------------
+
+Network protocols evolve over time.  New protocols become important
+and existing protocols lose their prominence.  For the Open vSwitch
+kernel module to remain relevant, it must be possible for newer
+versions to parse additional protocols as part of the flow key.  It
+might even be desirable, someday, to drop support for parsing
+protocols that have become obsolete.  Therefore, the Netlink interface
+to Open vSwitch is designed to allow carefully written userspace
+applications to work with any version of the flow key, past or future.
+
+To support this forward and backward compatibility, whenever the
+kernel module passes a packet to userspace, it also passes along the
+flow key that it parsed from the packet.  Userspace then extracts its
+own notion of a flow key from the packet and compares it against the
+kernel-provided version:
+
+    - If userspace's notion of the flow key for the packet matches the
+      kernel's, then nothing special is necessary.
+
+    - If the kernel's flow key includes more fields than the userspace
+      version of the flow key, for example if the kernel decoded IPv6
+      headers but userspace stopped at the Ethernet type (because it
+      does not understand IPv6), then again nothing special is
+      necessary.  Userspace can still set up a flow in the usual way,
+      as long as it uses the kernel-provided flow key to do it.
+
+    - If the userspace flow key includes more fields than the
+      kernel's, for example if userspace decoded an IPv6 header but
+      the kernel stopped at the Ethernet type, then userspace can
+      forward the packet manually, without setting up a flow in the
+      kernel.  This case is bad for performance because every packet
+      that the kernel considers part of the flow must go to userspace,
+      but the forwarding behavior is correct.  (If userspace can
+      determine that the values of the extra fields would not affect
+      forwarding behavior, then it could set up a flow anyway.)
+
+How flow keys evolve over time is important to making this work, so
+the following sections go into detail.
+
+
+Flow key format
+---------------
+
+A flow key is passed over a Netlink socket as a sequence of Netlink
+attributes.  Some attributes represent packet metadata, defined as any
+information about a packet that cannot be extracted from the packet
+itself, e.g. the vport on which the packet was received.  Most
+attributes, however, are extracted from headers within the packet,
+e.g. source and destination addresses from Ethernet, IP, or TCP
+headers.
+
+The <linux/openvswitch.h> header file defines the exact format of the
+flow key attributes.  For informal explanatory purposes here, we write
+them as comma-separated strings, with parentheses indicating arguments
+and nesting.  For example, the following could represent a flow key
+corresponding to a TCP packet that arrived on vport 1:
+
+    in_port(1), eth(src=e0:91:f5:21:d0:b2, dst=00:02:e3:0f:80:a4),
+    eth_type(0x0800), ipv4(src=172.16.0.20, dst=172.18.0.52, proto=17, tos=0,
+    frag=no), tcp(src=49163, dst=80)
+
+Often we ellipsize arguments not important to the discussion, e.g.:
+
+    in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)
+
+
+Basic rule for evolving flow keys
+---------------------------------
+
+Some care is needed to really maintain forward and backward
+compatibility for applications that follow the rules listed under
+"Flow key compatibility" above.
+
+The basic rule is obvious:
+
+    ------------------------------------------------------------------
+    New network protocol support must only supplement existing flow
+    key attributes.  It must not change the meaning of already defined
+    flow key attributes.
+    ------------------------------------------------------------------
+
+This rule does have less-obvious consequences so it is worth working
+through a few examples.  Suppose, for example, that the kernel module
+did not already implement VLAN parsing.  Instead, it just interpreted
+the 802.1Q TPID (0x8100) as the Ethertype then stopped parsing the
+packet.  The flow key for any packet with an 802.1Q header would look
+essentially like this, ignoring metadata:
+
+    eth(...), eth_type(0x8100)
+
+Naively, to add VLAN support, it makes sense to add a new "vlan" flow
+key attribute to contain the VLAN tag, then continue to decode the
+encapsulated headers beyond the VLAN tag using the existing field
+definitions.  With this change, an TCP packet in VLAN 10 would have a
+flow key much like this:
+
+    eth(...), vlan(vid=10, pcp=0), eth_type(0x0800), ip(proto=6, ...), tcp(...)
+
+But this change would negatively affect a userspace application that
+has not been updated to understand the new "vlan" flow key attribute.
+The application could, following the flow compatibility rules above,
+ignore the "vlan" attribute that it does not understand and therefore
+assume that the flow contained IP packets.  This is a bad assumption
+(the flow only contains IP packets if one parses and skips over the
+802.1Q header) and it could cause the application's behavior to change
+across kernel versions even though it follows the compatibility rules.
+
+The solution is to use a set of nested attributes.  This is, for
+example, why 802.1Q support uses nested attributes.  A TCP packet in
+VLAN 10 is actually expressed as:
+
+    eth(...), eth_type(0x8100), vlan(vid=10, pcp=0), encap(eth_type(0x0800),
+    ip(proto=6, ...), tcp(...)))
+
+Notice how the "eth_type", "ip", and "tcp" flow key attributes are
+nested inside the "encap" attribute.  Thus, an application that does
+not understand the "vlan" key will not see either of those attributes
+and therefore will not misinterpret them.  (Also, the outer eth_type
+is still 0x8100, not changed to 0x0800.)
+
+Handling malformed packets
+--------------------------
+
+Don't drop packets in the kernel for malformed protocol headers, bad
+checksums, etc.  This would prevent userspace from implementing a
+simple Ethernet switch that forwards every packet.
+
+Instead, in such a case, include an attribute with "empty" content.
+It doesn't matter if the empty content could be valid protocol values,
+as long as those values are rarely seen in practice, because userspace
+can always forward all packets with those values to userspace and
+handle them individually.
+
+For example, consider a packet that contains an IP header that
+indicates protocol 6 for TCP, but which is truncated just after the IP
+header, so that the TCP header is missing.  The flow key for this
+packet would include a tcp attribute with all-zero src and dst, like
+this:
+
+    eth(...), eth_type(0x0800), ip(proto=6, ...), tcp(src=0, dst=0)
+
+As another example, consider a packet with an Ethernet type of 0x8100,
+indicating that a VLAN TCI should follow, but which is truncated just
+after the Ethernet type.  The flow key for this packet would include
+an all-zero-bits vlan and an empty encap attribute, like this:
+
+    eth(...), eth_type(0x8100), vlan(0), encap()
+
+Unlike a TCP packet with source and destination ports 0, an
+all-zero-bits VLAN TCI is not that rare, so the CFI bit (aka
+VLAN_TAG_PRESENT inside the kernel) is ordinarily set in a vlan
+attribute expressly to allow this situation to be distinguished.
+Thus, the flow key in this second example unambiguously indicates a
+missing or malformed VLAN TCI.
+
+Other rules
+-----------
+
+The other rules for flow keys are much less subtle:
+
+    - Duplicate attributes are not allowed at a given nesting level.
+
+    - Ordering of attributes is not significant.
+
+    - When the kernel sends a given flow key to userspace, it always
+      composes it the same way.  This allows userspace to hash and
+      compare entire flow keys that it may not be able to fully
+      interpret.
diff --git a/MAINTAINERS b/MAINTAINERS
index c88eb7bb3a69..209ad0695ba2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4868,6 +4868,14 @@ S:	Maintained
 T:	git git://openrisc.net/~jonas/linux
 F:	arch/openrisc
 
+OPENVSWITCH
+M:	Jesse Gross <jesse@nicira.com>
+L:	dev@openvswitch.org
+W:	http://openvswitch.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch.git
+S:	Maintained
+F:	net/openvswitch/
+
 OPL4 DRIVER
 M:	Clemens Ladisch <clemens@ladisch.de>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
new file mode 100644
index 000000000000..eb1efa54fe84
--- /dev/null
+++ b/include/linux/openvswitch.h
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef _LINUX_OPENVSWITCH_H
+#define _LINUX_OPENVSWITCH_H 1
+
+#include <linux/types.h>
+
+/**
+ * struct ovs_header - header for OVS Generic Netlink messages.
+ * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
+ * specific to a datapath).
+ *
+ * Attributes following the header are specific to a particular OVS Generic
+ * Netlink family, but all of the OVS families use this header.
+ */
+
+struct ovs_header {
+	int dp_ifindex;
+};
+
+/* Datapaths. */
+
+#define OVS_DATAPATH_FAMILY  "ovs_datapath"
+#define OVS_DATAPATH_MCGROUP "ovs_datapath"
+#define OVS_DATAPATH_VERSION 0x1
+
+enum ovs_datapath_cmd {
+	OVS_DP_CMD_UNSPEC,
+	OVS_DP_CMD_NEW,
+	OVS_DP_CMD_DEL,
+	OVS_DP_CMD_GET,
+	OVS_DP_CMD_SET
+};
+
+/**
+ * enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
+ * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
+ * port".  This is the name of the network device whose dp_ifindex is given in
+ * the &struct ovs_header.  Always present in notifications.  Required in
+ * %OVS_DP_NEW requests.  May be used as an alternative to specifying
+ * dp_ifindex in other requests (with a dp_ifindex of 0).
+ * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
+ * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
+ * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
+ * not be sent.
+ * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
+ * datapath.  Always present in notifications.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_DP_* commands.
+ */
+enum ovs_datapath_attr {
+	OVS_DP_ATTR_UNSPEC,
+	OVS_DP_ATTR_NAME,       /* name of dp_ifindex netdev */
+	OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
+	OVS_DP_ATTR_STATS,      /* struct ovs_dp_stats */
+	__OVS_DP_ATTR_MAX
+};
+
+#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
+
+struct ovs_dp_stats {
+	__u64 n_hit;             /* Number of flow table matches. */
+	__u64 n_missed;          /* Number of flow table misses. */
+	__u64 n_lost;            /* Number of misses not sent to userspace. */
+	__u64 n_flows;           /* Number of flows present */
+};
+
+struct ovs_vport_stats {
+	__u64   rx_packets;		/* total packets received       */
+	__u64   tx_packets;		/* total packets transmitted    */
+	__u64   rx_bytes;		/* total bytes received         */
+	__u64   tx_bytes;		/* total bytes transmitted      */
+	__u64   rx_errors;		/* bad packets received         */
+	__u64   tx_errors;		/* packet transmit problems     */
+	__u64   rx_dropped;		/* no space in linux buffers    */
+	__u64   tx_dropped;		/* no space available in linux  */
+};
+
+/* Fixed logical ports. */
+#define OVSP_LOCAL      ((__u16)0)
+
+/* Packet transfer. */
+
+#define OVS_PACKET_FAMILY "ovs_packet"
+#define OVS_PACKET_VERSION 0x1
+
+enum ovs_packet_cmd {
+	OVS_PACKET_CMD_UNSPEC,
+
+	/* Kernel-to-user notifications. */
+	OVS_PACKET_CMD_MISS,    /* Flow table miss. */
+	OVS_PACKET_CMD_ACTION,  /* OVS_ACTION_ATTR_USERSPACE action. */
+
+	/* Userspace commands. */
+	OVS_PACKET_CMD_EXECUTE  /* Apply actions to a packet. */
+};
+
+/**
+ * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
+ * @OVS_PACKET_ATTR_PACKET: Present for all notifications.  Contains the entire
+ * packet as received, from the start of the Ethernet header onward.  For
+ * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
+ * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
+ * the flow key extracted from the packet as originally received.
+ * @OVS_PACKET_ATTR_KEY: Present for all notifications.  Contains the flow key
+ * extracted from the packet as nested %OVS_KEY_ATTR_* attributes.  This allows
+ * userspace to adapt its flow setup strategy by comparing its notion of the
+ * flow key against the kernel's.
+ * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet.  Used
+ * for %OVS_PACKET_CMD_EXECUTE.  It has nested %OVS_ACTION_ATTR_* attributes.
+ * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
+ * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
+ * %OVS_USERSPACE_ATTR_USERDATA attribute.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_PACKET_* commands.
+ */
+enum ovs_packet_attr {
+	OVS_PACKET_ATTR_UNSPEC,
+	OVS_PACKET_ATTR_PACKET,      /* Packet data. */
+	OVS_PACKET_ATTR_KEY,         /* Nested OVS_KEY_ATTR_* attributes. */
+	OVS_PACKET_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
+	OVS_PACKET_ATTR_USERDATA,    /* u64 OVS_ACTION_ATTR_USERSPACE arg. */
+	__OVS_PACKET_ATTR_MAX
+};
+
+#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
+
+/* Virtual ports. */
+
+#define OVS_VPORT_FAMILY  "ovs_vport"
+#define OVS_VPORT_MCGROUP "ovs_vport"
+#define OVS_VPORT_VERSION 0x1
+
+enum ovs_vport_cmd {
+	OVS_VPORT_CMD_UNSPEC,
+	OVS_VPORT_CMD_NEW,
+	OVS_VPORT_CMD_DEL,
+	OVS_VPORT_CMD_GET,
+	OVS_VPORT_CMD_SET
+};
+
+enum ovs_vport_type {
+	OVS_VPORT_TYPE_UNSPEC,
+	OVS_VPORT_TYPE_NETDEV,   /* network device */
+	OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
+	__OVS_VPORT_TYPE_MAX
+};
+
+#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
+
+/**
+ * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
+ * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
+ * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
+ * of vport.
+ * @OVS_VPORT_ATTR_NAME: Name of vport.  For a vport based on a network device
+ * this is the name of the network device.  Maximum length %IFNAMSIZ-1 bytes
+ * plus a null terminator.
+ * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
+ * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
+ * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
+ * this port.  A value of zero indicates that upcalls should not be sent.
+ * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
+ * packets sent or received through the vport.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_VPORT_* commands.
+ *
+ * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
+ * %OVS_VPORT_ATTR_NAME attributes are required.  %OVS_VPORT_ATTR_PORT_NO is
+ * optional; if not specified a free port number is automatically selected.
+ * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
+ * of vport.
+ * and other attributes are ignored.
+ *
+ * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
+ * look up the vport to operate on; otherwise dp_idx from the &struct
+ * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
+ */
+enum ovs_vport_attr {
+	OVS_VPORT_ATTR_UNSPEC,
+	OVS_VPORT_ATTR_PORT_NO,	/* u32 port number within datapath */
+	OVS_VPORT_ATTR_TYPE,	/* u32 OVS_VPORT_TYPE_* constant. */
+	OVS_VPORT_ATTR_NAME,	/* string name, up to IFNAMSIZ bytes long */
+	OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
+	OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
+	OVS_VPORT_ATTR_STATS,	/* struct ovs_vport_stats */
+	__OVS_VPORT_ATTR_MAX
+};
+
+#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
+
+/* Flows. */
+
+#define OVS_FLOW_FAMILY  "ovs_flow"
+#define OVS_FLOW_MCGROUP "ovs_flow"
+#define OVS_FLOW_VERSION 0x1
+
+enum ovs_flow_cmd {
+	OVS_FLOW_CMD_UNSPEC,
+	OVS_FLOW_CMD_NEW,
+	OVS_FLOW_CMD_DEL,
+	OVS_FLOW_CMD_GET,
+	OVS_FLOW_CMD_SET
+};
+
+struct ovs_flow_stats {
+	__u64 n_packets;         /* Number of matched packets. */
+	__u64 n_bytes;           /* Number of matched bytes. */
+};
+
+enum ovs_key_attr {
+	OVS_KEY_ATTR_UNSPEC,
+	OVS_KEY_ATTR_ENCAP,	/* Nested set of encapsulated attributes. */
+	OVS_KEY_ATTR_PRIORITY,  /* u32 skb->priority */
+	OVS_KEY_ATTR_IN_PORT,   /* u32 OVS dp port number */
+	OVS_KEY_ATTR_ETHERNET,  /* struct ovs_key_ethernet */
+	OVS_KEY_ATTR_VLAN,	/* be16 VLAN TCI */
+	OVS_KEY_ATTR_ETHERTYPE,	/* be16 Ethernet type */
+	OVS_KEY_ATTR_IPV4,      /* struct ovs_key_ipv4 */
+	OVS_KEY_ATTR_IPV6,      /* struct ovs_key_ipv6 */
+	OVS_KEY_ATTR_TCP,       /* struct ovs_key_tcp */
+	OVS_KEY_ATTR_UDP,       /* struct ovs_key_udp */
+	OVS_KEY_ATTR_ICMP,      /* struct ovs_key_icmp */
+	OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
+	OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
+	OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
+	__OVS_KEY_ATTR_MAX
+};
+
+#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
+
+/**
+ * enum ovs_frag_type - IPv4 and IPv6 fragment type
+ * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
+ * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
+ * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
+ *
+ * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
+ * ovs_key_ipv6.
+ */
+enum ovs_frag_type {
+	OVS_FRAG_TYPE_NONE,
+	OVS_FRAG_TYPE_FIRST,
+	OVS_FRAG_TYPE_LATER,
+	__OVS_FRAG_TYPE_MAX
+};
+
+#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
+
+struct ovs_key_ethernet {
+	__u8	 eth_src[6];
+	__u8	 eth_dst[6];
+};
+
+struct ovs_key_ipv4 {
+	__be32 ipv4_src;
+	__be32 ipv4_dst;
+	__u8   ipv4_proto;
+	__u8   ipv4_tos;
+	__u8   ipv4_ttl;
+	__u8   ipv4_frag;	/* One of OVS_FRAG_TYPE_*. */
+};
+
+struct ovs_key_ipv6 {
+	__be32 ipv6_src[4];
+	__be32 ipv6_dst[4];
+	__be32 ipv6_label;	/* 20-bits in least-significant bits. */
+	__u8   ipv6_proto;
+	__u8   ipv6_tclass;
+	__u8   ipv6_hlimit;
+	__u8   ipv6_frag;	/* One of OVS_FRAG_TYPE_*. */
+};
+
+struct ovs_key_tcp {
+	__be16 tcp_src;
+	__be16 tcp_dst;
+};
+
+struct ovs_key_udp {
+	__be16 udp_src;
+	__be16 udp_dst;
+};
+
+struct ovs_key_icmp {
+	__u8 icmp_type;
+	__u8 icmp_code;
+};
+
+struct ovs_key_icmpv6 {
+	__u8 icmpv6_type;
+	__u8 icmpv6_code;
+};
+
+struct ovs_key_arp {
+	__be32 arp_sip;
+	__be32 arp_tip;
+	__be16 arp_op;
+	__u8   arp_sha[6];
+	__u8   arp_tha[6];
+};
+
+struct ovs_key_nd {
+	__u32 nd_target[4];
+	__u8  nd_sll[6];
+	__u8  nd_tll[6];
+};
+
+/**
+ * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
+ * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
+ * key.  Always present in notifications.  Required for all requests (except
+ * dumps).
+ * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
+ * the actions to take for packets that match the key.  Always present in
+ * notifications.  Required for %OVS_FLOW_CMD_NEW requests, optional for
+ * %OVS_FLOW_CMD_SET requests.
+ * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
+ * flow.  Present in notifications if the stats would be nonzero.  Ignored in
+ * requests.
+ * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
+ * TCP flags seen on packets in this flow.  Only present in notifications for
+ * TCP flows, and only if it would be nonzero.  Ignored in requests.
+ * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
+ * the system monotonic clock, at which a packet was last processed for this
+ * flow.  Only present in notifications if a packet has been processed for this
+ * flow.  Ignored in requests.
+ * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
+ * last-used time, accumulated TCP flags, and statistics for this flow.
+ * Otherwise ignored in requests.  Never present in notifications.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_FLOW_* commands.
+ */
+enum ovs_flow_attr {
+	OVS_FLOW_ATTR_UNSPEC,
+	OVS_FLOW_ATTR_KEY,       /* Sequence of OVS_KEY_ATTR_* attributes. */
+	OVS_FLOW_ATTR_ACTIONS,   /* Nested OVS_ACTION_ATTR_* attributes. */
+	OVS_FLOW_ATTR_STATS,     /* struct ovs_flow_stats. */
+	OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
+	OVS_FLOW_ATTR_USED,      /* u64 msecs last used in monotonic time. */
+	OVS_FLOW_ATTR_CLEAR,     /* Flag to clear stats, tcp_flags, used. */
+	__OVS_FLOW_ATTR_MAX
+};
+
+#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
+
+/**
+ * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
+ * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
+ * @OVS_ACTION_ATTR_SAMPLE.  A value of 0 samples no packets, a value of
+ * %UINT32_MAX samples all packets and intermediate values sample intermediate
+ * fractions of packets.
+ * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
+ * Actions are passed as nested attributes.
+ *
+ * Executes the specified actions with the given probability on a per-packet
+ * basis.
+ */
+enum ovs_sample_attr {
+	OVS_SAMPLE_ATTR_UNSPEC,
+	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
+	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
+	__OVS_SAMPLE_ATTR_MAX,
+};
+
+#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
+
+/**
+ * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
+ * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
+ * message should be sent.  Required.
+ * @OVS_USERSPACE_ATTR_USERDATA: If present, its u64 argument is copied to the
+ * %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA,
+ */
+enum ovs_userspace_attr {
+	OVS_USERSPACE_ATTR_UNSPEC,
+	OVS_USERSPACE_ATTR_PID,	      /* u32 Netlink PID to receive upcalls. */
+	OVS_USERSPACE_ATTR_USERDATA,  /* u64 optional user-specified cookie. */
+	__OVS_USERSPACE_ATTR_MAX
+};
+
+#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
+
+/**
+ * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
+ * @vlan_tpid: Tag protocol identifier (TPID) to push.
+ * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
+ * (but it will not be set in the 802.1Q header that is pushed).
+ *
+ * The @vlan_tpid value is typically %ETH_P_8021Q.  The only acceptable TPID
+ * values are those that the kernel module also parses as 802.1Q headers, to
+ * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
+ * from having surprising results.
+ */
+struct ovs_action_push_vlan {
+	__be16 vlan_tpid;	/* 802.1Q TPID. */
+	__be16 vlan_tci;	/* 802.1Q TCI (VLAN ID and priority). */
+};
+
+/**
+ * enum ovs_action_attr - Action types.
+ *
+ * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
+ * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
+ * %OVS_USERSPACE_ATTR_* attributes.
+ * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
+ * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
+ * value.
+ * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
+ * packet.
+ * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
+ * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
+ * the nested %OVS_SAMPLE_ATTR_* attributes.
+ *
+ * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
+ * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
+ * type may not be changed.
+ */
+
+enum ovs_action_attr {
+	OVS_ACTION_ATTR_UNSPEC,
+	OVS_ACTION_ATTR_OUTPUT,	      /* u32 port number. */
+	OVS_ACTION_ATTR_USERSPACE,    /* Nested OVS_USERSPACE_ATTR_*. */
+	OVS_ACTION_ATTR_SET,          /* One nested OVS_KEY_ATTR_*. */
+	OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
+	OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
+	OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
+	__OVS_ACTION_ATTR_MAX
+};
+
+#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
+
+#endif /* _LINUX_OPENVSWITCH_H */
diff --git a/net/Kconfig b/net/Kconfig
index 2d998735c4d8..e07272d0bb2d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -215,6 +215,7 @@ source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
 source "net/dns_resolver/Kconfig"
 source "net/batman-adv/Kconfig"
+source "net/openvswitch/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/Makefile b/net/Makefile
index acdde4950de4..ad432fa4d934 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -69,3 +69,4 @@ obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
 obj-$(CONFIG_CEPH_LIB)		+= ceph/
 obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
 obj-$(CONFIG_NFC)		+= nfc/
+obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
new file mode 100644
index 000000000000..d9ea33c361be
--- /dev/null
+++ b/net/openvswitch/Kconfig
@@ -0,0 +1,28 @@
+#
+# Open vSwitch
+#
+
+config OPENVSWITCH
+	tristate "Open vSwitch"
+	---help---
+	  Open vSwitch is a multilayer Ethernet switch targeted at virtualized
+	  environments.  In addition to supporting a variety of features
+	  expected in a traditional hardware switch, it enables fine-grained
+	  programmatic extension and flow-based control of the network.  This
+	  control is useful in a wide variety of applications but is
+	  particularly important in multi-server virtualization deployments,
+	  which are often characterized by highly dynamic endpoints and the
+	  need to maintain logical abstractions for multiple tenants.
+
+	  The Open vSwitch datapath provides an in-kernel fast path for packet
+	  forwarding.  It is complemented by a userspace daemon, ovs-vswitchd,
+	  which is able to accept configuration from a variety of sources and
+	  translate it into packet processing rules.
+
+	  See http://openvswitch.org for more information and userspace
+	  utilities.
+
+	  To compile this code as a module, choose M here: the module will be
+	  called openvswitch.
+
+	  If unsure, say N.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
new file mode 100644
index 000000000000..15e7384745c1
--- /dev/null
+++ b/net/openvswitch/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for Open vSwitch.
+#
+
+obj-$(CONFIG_OPENVSWITCH) += openvswitch.o
+
+openvswitch-y := \
+	actions.o \
+	datapath.o \
+	dp_notify.o \
+	flow.o \
+	vport.o \
+	vport-internal_dev.o \
+	vport-netdev.o \
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
new file mode 100644
index 000000000000..2725d1bdf291
--- /dev/null
+++ b/net/openvswitch/actions.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/openvswitch.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/in6.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+
+#include "datapath.h"
+#include "vport.h"
+
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
+			const struct nlattr *attr, int len, bool keep_skb);
+
+static int make_writable(struct sk_buff *skb, int write_len)
+{
+	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
+		return 0;
+
+	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+
+/* remove VLAN header from packet and update csum accrodingly. */
+static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
+{
+	struct vlan_hdr *vhdr;
+	int err;
+
+	err = make_writable(skb, VLAN_ETH_HLEN);
+	if (unlikely(err))
+		return err;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_sub(skb->csum, csum_partial(skb->data
+					+ ETH_HLEN, VLAN_HLEN, 0));
+
+	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
+	*current_tci = vhdr->h_vlan_TCI;
+
+	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+	__skb_pull(skb, VLAN_HLEN);
+
+	vlan_set_encap_proto(skb, vhdr);
+	skb->mac_header += VLAN_HLEN;
+	skb_reset_mac_len(skb);
+
+	return 0;
+}
+
+static int pop_vlan(struct sk_buff *skb)
+{
+	__be16 tci;
+	int err;
+
+	if (likely(vlan_tx_tag_present(skb))) {
+		skb->vlan_tci = 0;
+	} else {
+		if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
+			     skb->len < VLAN_ETH_HLEN))
+			return 0;
+
+		err = __pop_vlan_tci(skb, &tci);
+		if (err)
+			return err;
+	}
+	/* move next vlan tag to hw accel tag */
+	if (likely(skb->protocol != htons(ETH_P_8021Q) ||
+		   skb->len < VLAN_ETH_HLEN))
+		return 0;
+
+	err = __pop_vlan_tci(skb, &tci);
+	if (unlikely(err))
+		return err;
+
+	__vlan_hwaccel_put_tag(skb, ntohs(tci));
+	return 0;
+}
+
+static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+{
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		u16 current_tag;
+
+		/* push down current VLAN tag */
+		current_tag = vlan_tx_tag_get(skb);
+
+		if (!__vlan_put_tag(skb, current_tag))
+			return -ENOMEM;
+
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->csum = csum_add(skb->csum, csum_partial(skb->data
+					+ ETH_HLEN, VLAN_HLEN, 0));
+
+	}
+	__vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+	return 0;
+}
+
+static int set_eth_addr(struct sk_buff *skb,
+			const struct ovs_key_ethernet *eth_key)
+{
+	int err;
+	err = make_writable(skb, ETH_HLEN);
+	if (unlikely(err))
+		return err;
+
+	memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
+	memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
+
+	return 0;
+}
+
+static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
+				__be32 *addr, __be32 new_addr)
+{
+	int transport_len = skb->len - skb_transport_offset(skb);
+
+	if (nh->protocol == IPPROTO_TCP) {
+		if (likely(transport_len >= sizeof(struct tcphdr)))
+			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
+						 *addr, new_addr, 1);
+	} else if (nh->protocol == IPPROTO_UDP) {
+		if (likely(transport_len >= sizeof(struct udphdr)))
+			inet_proto_csum_replace4(&udp_hdr(skb)->check, skb,
+						 *addr, new_addr, 1);
+	}
+
+	csum_replace4(&nh->check, *addr, new_addr);
+	skb->rxhash = 0;
+	*addr = new_addr;
+}
+
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
+{
+	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
+	nh->ttl = new_ttl;
+}
+
+static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+{
+	struct iphdr *nh;
+	int err;
+
+	err = make_writable(skb, skb_network_offset(skb) +
+				 sizeof(struct iphdr));
+	if (unlikely(err))
+		return err;
+
+	nh = ip_hdr(skb);
+
+	if (ipv4_key->ipv4_src != nh->saddr)
+		set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
+
+	if (ipv4_key->ipv4_dst != nh->daddr)
+		set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
+
+	if (ipv4_key->ipv4_tos != nh->tos)
+		ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+
+	if (ipv4_key->ipv4_ttl != nh->ttl)
+		set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
+
+	return 0;
+}
+
+/* Must follow make_writable() since that can move the skb data. */
+static void set_tp_port(struct sk_buff *skb, __be16 *port,
+			 __be16 new_port, __sum16 *check)
+{
+	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
+	*port = new_port;
+	skb->rxhash = 0;
+}
+
+static int set_udp_port(struct sk_buff *skb,
+			const struct ovs_key_udp *udp_port_key)
+{
+	struct udphdr *uh;
+	int err;
+
+	err = make_writable(skb, skb_transport_offset(skb) +
+				 sizeof(struct udphdr));
+	if (unlikely(err))
+		return err;
+
+	uh = udp_hdr(skb);
+	if (udp_port_key->udp_src != uh->source)
+		set_tp_port(skb, &uh->source, udp_port_key->udp_src, &uh->check);
+
+	if (udp_port_key->udp_dst != uh->dest)
+		set_tp_port(skb, &uh->dest, udp_port_key->udp_dst, &uh->check);
+
+	return 0;
+}
+
+static int set_tcp_port(struct sk_buff *skb,
+			const struct ovs_key_tcp *tcp_port_key)
+{
+	struct tcphdr *th;
+	int err;
+
+	err = make_writable(skb, skb_transport_offset(skb) +
+				 sizeof(struct tcphdr));
+	if (unlikely(err))
+		return err;
+
+	th = tcp_hdr(skb);
+	if (tcp_port_key->tcp_src != th->source)
+		set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
+
+	if (tcp_port_key->tcp_dst != th->dest)
+		set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
+
+	return 0;
+}
+
+static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+{
+	struct vport *vport;
+
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+	vport = rcu_dereference(dp->ports[out_port]);
+	if (unlikely(!vport)) {
+		kfree_skb(skb);
+		return -ENODEV;
+	}
+
+	ovs_vport_send(vport, skb);
+	return 0;
+}
+
+static int output_userspace(struct datapath *dp, struct sk_buff *skb,
+			    const struct nlattr *attr)
+{
+	struct dp_upcall_info upcall;
+	const struct nlattr *a;
+	int rem;
+
+	upcall.cmd = OVS_PACKET_CMD_ACTION;
+	upcall.key = &OVS_CB(skb)->flow->key;
+	upcall.userdata = NULL;
+	upcall.pid = 0;
+
+	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
+		 a = nla_next(a, &rem)) {
+		switch (nla_type(a)) {
+		case OVS_USERSPACE_ATTR_USERDATA:
+			upcall.userdata = a;
+			break;
+
+		case OVS_USERSPACE_ATTR_PID:
+			upcall.pid = nla_get_u32(a);
+			break;
+		}
+	}
+
+	return ovs_dp_upcall(dp, skb, &upcall);
+}
+
+static int sample(struct datapath *dp, struct sk_buff *skb,
+		  const struct nlattr *attr)
+{
+	const struct nlattr *acts_list = NULL;
+	const struct nlattr *a;
+	int rem;
+
+	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
+		 a = nla_next(a, &rem)) {
+		switch (nla_type(a)) {
+		case OVS_SAMPLE_ATTR_PROBABILITY:
+			if (net_random() >= nla_get_u32(a))
+				return 0;
+			break;
+
+		case OVS_SAMPLE_ATTR_ACTIONS:
+			acts_list = a;
+			break;
+		}
+	}
+
+	return do_execute_actions(dp, skb, nla_data(acts_list),
+						 nla_len(acts_list), true);
+}
+
+static int execute_set_action(struct sk_buff *skb,
+				 const struct nlattr *nested_attr)
+{
+	int err = 0;
+
+	switch (nla_type(nested_attr)) {
+	case OVS_KEY_ATTR_PRIORITY:
+		skb->priority = nla_get_u32(nested_attr);
+		break;
+
+	case OVS_KEY_ATTR_ETHERNET:
+		err = set_eth_addr(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_IPV4:
+		err = set_ipv4(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_TCP:
+		err = set_tcp_port(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_UDP:
+		err = set_udp_port(skb, nla_data(nested_attr));
+		break;
+	}
+
+	return err;
+}
+
+/* Execute a list of actions against 'skb'. */
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
+			const struct nlattr *attr, int len, bool keep_skb)
+{
+	/* Every output action needs a separate clone of 'skb', but the common
+	 * case is just a single output action, so that doing a clone and
+	 * then freeing the original skbuff is wasteful.  So the following code
+	 * is slightly obscure just to avoid that. */
+	int prev_port = -1;
+	const struct nlattr *a;
+	int rem;
+
+	for (a = attr, rem = len; rem > 0;
+	     a = nla_next(a, &rem)) {
+		int err = 0;
+
+		if (prev_port != -1) {
+			do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
+			prev_port = -1;
+		}
+
+		switch (nla_type(a)) {
+		case OVS_ACTION_ATTR_OUTPUT:
+			prev_port = nla_get_u32(a);
+			break;
+
+		case OVS_ACTION_ATTR_USERSPACE:
+			output_userspace(dp, skb, a);
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_VLAN:
+			err = push_vlan(skb, nla_data(a));
+			if (unlikely(err)) /* skb already freed. */
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_POP_VLAN:
+			err = pop_vlan(skb);
+			break;
+
+		case OVS_ACTION_ATTR_SET:
+			err = execute_set_action(skb, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			err = sample(dp, skb, a);
+			break;
+		}
+
+		if (unlikely(err)) {
+			kfree_skb(skb);
+			return err;
+		}
+	}
+
+	if (prev_port != -1) {
+		if (keep_skb)
+			skb = skb_clone(skb, GFP_ATOMIC);
+
+		do_output(dp, skb, prev_port);
+	} else if (!keep_skb)
+		consume_skb(skb);
+
+	return 0;
+}
+
+/* Execute a list of actions against 'skb'. */
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
+{
+	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+
+	return do_execute_actions(dp, skb, acts->actions,
+					 acts->actions_len, false);
+}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
new file mode 100644
index 000000000000..9a2725114e99
--- /dev/null
+++ b/net/openvswitch/datapath.c
@@ -0,0 +1,1912 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/jhash.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+#include <linux/etherdevice.h>
+#include <linux/genetlink.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/version.h>
+#include <linux/ethtool.h>
+#include <linux/wait.h>
+#include <asm/system.h>
+#include <asm/div64.h>
+#include <linux/highmem.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/inetdevice.h>
+#include <linux/list.h>
+#include <linux/openvswitch.h>
+#include <linux/rculist.h>
+#include <linux/dmi.h>
+#include <linux/workqueue.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "flow.h"
+#include "vport-internal_dev.h"
+
+/**
+ * DOC: Locking:
+ *
+ * Writes to device state (add/remove datapath, port, set operations on vports,
+ * etc.) are protected by RTNL.
+ *
+ * Writes to other state (flow table modifications, set miscellaneous datapath
+ * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests inside
+ * genl_mutex.
+ *
+ * Reads are protected by RCU.
+ *
+ * There are a few special cases (mostly stats) that have their own
+ * synchronization but they nest under all of above and don't interact with
+ * each other.
+ */
+
+/* Global list of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ */
+static LIST_HEAD(dps);
+
+#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
+static void rehash_flow_table(struct work_struct *work);
+static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
+
+static struct vport *new_vport(const struct vport_parms *);
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
+			     const struct dp_upcall_info *);
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
+				  const struct dp_upcall_info *);
+
+/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
+static struct datapath *get_dp(int dp_ifindex)
+{
+	struct datapath *dp = NULL;
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
+	if (dev) {
+		struct vport *vport = ovs_internal_dev_get_vport(dev);
+		if (vport)
+			dp = vport->dp;
+	}
+	rcu_read_unlock();
+
+	return dp;
+}
+
+/* Must be called with rcu_read_lock or RTNL lock. */
+const char *ovs_dp_name(const struct datapath *dp)
+{
+	struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]);
+	return vport->ops->get_name(vport);
+}
+
+static int get_dpifindex(struct datapath *dp)
+{
+	struct vport *local;
+	int ifindex;
+
+	rcu_read_lock();
+
+	local = rcu_dereference(dp->ports[OVSP_LOCAL]);
+	if (local)
+		ifindex = local->ops->get_ifindex(local);
+	else
+		ifindex = 0;
+
+	rcu_read_unlock();
+
+	return ifindex;
+}
+
+static void destroy_dp_rcu(struct rcu_head *rcu)
+{
+	struct datapath *dp = container_of(rcu, struct datapath, rcu);
+
+	ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
+	free_percpu(dp->stats_percpu);
+	kfree(dp);
+}
+
+/* Called with RTNL lock and genl_lock. */
+static struct vport *new_vport(const struct vport_parms *parms)
+{
+	struct vport *vport;
+
+	vport = ovs_vport_add(parms);
+	if (!IS_ERR(vport)) {
+		struct datapath *dp = parms->dp;
+
+		rcu_assign_pointer(dp->ports[parms->port_no], vport);
+		list_add(&vport->node, &dp->port_list);
+	}
+
+	return vport;
+}
+
+/* Called with RTNL lock. */
+void ovs_dp_detach_port(struct vport *p)
+{
+	ASSERT_RTNL();
+
+	/* First drop references to device. */
+	list_del(&p->node);
+	rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
+
+	/* Then destroy it. */
+	ovs_vport_del(p);
+}
+
+/* Must be called with rcu_read_lock. */
+void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
+{
+	struct datapath *dp = p->dp;
+	struct sw_flow *flow;
+	struct dp_stats_percpu *stats;
+	struct sw_flow_key key;
+	u64 *stats_counter;
+	int error;
+	int key_len;
+
+	stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+	/* Extract flow from 'skb' into 'key'. */
+	error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
+	if (unlikely(error)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* Look up flow. */
+	flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
+	if (unlikely(!flow)) {
+		struct dp_upcall_info upcall;
+
+		upcall.cmd = OVS_PACKET_CMD_MISS;
+		upcall.key = &key;
+		upcall.userdata = NULL;
+		upcall.pid = p->upcall_pid;
+		ovs_dp_upcall(dp, skb, &upcall);
+		consume_skb(skb);
+		stats_counter = &stats->n_missed;
+		goto out;
+	}
+
+	OVS_CB(skb)->flow = flow;
+
+	stats_counter = &stats->n_hit;
+	ovs_flow_used(OVS_CB(skb)->flow, skb);
+	ovs_execute_actions(dp, skb);
+
+out:
+	/* Update datapath statistics. */
+	u64_stats_update_begin(&stats->sync);
+	(*stats_counter)++;
+	u64_stats_update_end(&stats->sync);
+}
+
+static struct genl_family dp_packet_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_PACKET_FAMILY,
+	.version = OVS_PACKET_VERSION,
+	.maxattr = OVS_PACKET_ATTR_MAX
+};
+
+int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
+	      const struct dp_upcall_info *upcall_info)
+{
+	struct dp_stats_percpu *stats;
+	int dp_ifindex;
+	int err;
+
+	if (upcall_info->pid == 0) {
+		err = -ENOTCONN;
+		goto err;
+	}
+
+	dp_ifindex = get_dpifindex(dp);
+	if (!dp_ifindex) {
+		err = -ENODEV;
+		goto err;
+	}
+
+	if (!skb_is_gso(skb))
+		err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+	else
+		err = queue_gso_packets(dp_ifindex, skb, upcall_info);
+	if (err)
+		goto err;
+
+	return 0;
+
+err:
+	stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+	u64_stats_update_begin(&stats->sync);
+	stats->n_lost++;
+	u64_stats_update_end(&stats->sync);
+
+	return err;
+}
+
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
+			     const struct dp_upcall_info *upcall_info)
+{
+	struct dp_upcall_info later_info;
+	struct sw_flow_key later_key;
+	struct sk_buff *segs, *nskb;
+	int err;
+
+	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	/* Queue all of the segments. */
+	skb = segs;
+	do {
+		err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+		if (err)
+			break;
+
+		if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
+			/* The initial flow key extracted by ovs_flow_extract()
+			 * in this case is for a first fragment, so we need to
+			 * properly mark later fragments.
+			 */
+			later_key = *upcall_info->key;
+			later_key.ip.frag = OVS_FRAG_TYPE_LATER;
+
+			later_info = *upcall_info;
+			later_info.key = &later_key;
+			upcall_info = &later_info;
+		}
+	} while ((skb = skb->next));
+
+	/* Free all of the segments. */
+	skb = segs;
+	do {
+		nskb = skb->next;
+		if (err)
+			kfree_skb(skb);
+		else
+			consume_skb(skb);
+	} while ((skb = nskb));
+	return err;
+}
+
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
+				  const struct dp_upcall_info *upcall_info)
+{
+	struct ovs_header *upcall;
+	struct sk_buff *nskb = NULL;
+	struct sk_buff *user_skb; /* to be queued to userspace */
+	struct nlattr *nla;
+	unsigned int len;
+	int err;
+
+	if (vlan_tx_tag_present(skb)) {
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		if (!nskb)
+			return -ENOMEM;
+
+		nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
+		if (!skb)
+			return -ENOMEM;
+
+		nskb->vlan_tci = 0;
+		skb = nskb;
+	}
+
+	if (nla_attr_size(skb->len) > USHRT_MAX) {
+		err = -EFBIG;
+		goto out;
+	}
+
+	len = sizeof(struct ovs_header);
+	len += nla_total_size(skb->len);
+	len += nla_total_size(FLOW_BUFSIZE);
+	if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
+		len += nla_total_size(8);
+
+	user_skb = genlmsg_new(len, GFP_ATOMIC);
+	if (!user_skb) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
+			     0, upcall_info->cmd);
+	upcall->dp_ifindex = dp_ifindex;
+
+	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
+	ovs_flow_to_nlattrs(upcall_info->key, user_skb);
+	nla_nest_end(user_skb, nla);
+
+	if (upcall_info->userdata)
+		nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
+			    nla_get_u64(upcall_info->userdata));
+
+	nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
+
+	skb_copy_and_csum_dev(skb, nla_data(nla));
+
+	err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
+
+out:
+	kfree_skb(nskb);
+	return err;
+}
+
+/* Called with genl_mutex. */
+static int flush_flows(int dp_ifindex)
+{
+	struct flow_table *old_table;
+	struct flow_table *new_table;
+	struct datapath *dp;
+
+	dp = get_dp(dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	old_table = genl_dereference(dp->table);
+	new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
+	if (!new_table)
+		return -ENOMEM;
+
+	rcu_assign_pointer(dp->table, new_table);
+
+	ovs_flow_tbl_deferred_destroy(old_table);
+	return 0;
+}
+
+static int validate_actions(const struct nlattr *attr,
+				const struct sw_flow_key *key, int depth);
+
+static int validate_sample(const struct nlattr *attr,
+				const struct sw_flow_key *key, int depth)
+{
+	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
+	const struct nlattr *probability, *actions;
+	const struct nlattr *a;
+	int rem;
+
+	memset(attrs, 0, sizeof(attrs));
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
+			return -EINVAL;
+		attrs[type] = a;
+	}
+	if (rem)
+		return -EINVAL;
+
+	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
+	if (!probability || nla_len(probability) != sizeof(u32))
+		return -EINVAL;
+
+	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
+	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+		return -EINVAL;
+	return validate_actions(actions, key, depth + 1);
+}
+
+static int validate_set(const struct nlattr *a,
+			const struct sw_flow_key *flow_key)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	int key_type = nla_type(ovs_key);
+
+	/* There can be only one key in a action */
+	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
+		return -EINVAL;
+
+	if (key_type > OVS_KEY_ATTR_MAX ||
+	    nla_len(ovs_key) != ovs_key_lens[key_type])
+		return -EINVAL;
+
+	switch (key_type) {
+	const struct ovs_key_ipv4 *ipv4_key;
+
+	case OVS_KEY_ATTR_PRIORITY:
+	case OVS_KEY_ATTR_ETHERNET:
+		break;
+
+	case OVS_KEY_ATTR_IPV4:
+		if (flow_key->eth.type != htons(ETH_P_IP))
+			return -EINVAL;
+
+		if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst)
+			return -EINVAL;
+
+		ipv4_key = nla_data(ovs_key);
+		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+			return -EINVAL;
+
+		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+			return -EINVAL;
+
+		break;
+
+	case OVS_KEY_ATTR_TCP:
+		if (flow_key->ip.proto != IPPROTO_TCP)
+			return -EINVAL;
+
+		if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
+			return -EINVAL;
+
+		break;
+
+	case OVS_KEY_ATTR_UDP:
+		if (flow_key->ip.proto != IPPROTO_UDP)
+			return -EINVAL;
+
+		if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
+			return -EINVAL;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int validate_userspace(const struct nlattr *attr)
+{
+	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =	{
+		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
+		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
+	};
+	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
+	int error;
+
+	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
+				 attr, userspace_policy);
+	if (error)
+		return error;
+
+	if (!a[OVS_USERSPACE_ATTR_PID] ||
+	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int validate_actions(const struct nlattr *attr,
+				const struct sw_flow_key *key,  int depth)
+{
+	const struct nlattr *a;
+	int rem, err;
+
+	if (depth >= SAMPLE_ACTION_DEPTH)
+		return -EOVERFLOW;
+
+	nla_for_each_nested(a, attr, rem) {
+		/* Expected argument lengths, (u32)-1 for variable length. */
+		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
+			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
+			[OVS_ACTION_ATTR_POP_VLAN] = 0,
+			[OVS_ACTION_ATTR_SET] = (u32)-1,
+			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
+		};
+		const struct ovs_action_push_vlan *vlan;
+		int type = nla_type(a);
+
+		if (type > OVS_ACTION_ATTR_MAX ||
+		    (action_lens[type] != nla_len(a) &&
+		     action_lens[type] != (u32)-1))
+			return -EINVAL;
+
+		switch (type) {
+		case OVS_ACTION_ATTR_UNSPEC:
+			return -EINVAL;
+
+		case OVS_ACTION_ATTR_USERSPACE:
+			err = validate_userspace(a);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_OUTPUT:
+			if (nla_get_u32(a) >= DP_MAX_PORTS)
+				return -EINVAL;
+			break;
+
+
+		case OVS_ACTION_ATTR_POP_VLAN:
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_VLAN:
+			vlan = nla_data(a);
+			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+				return -EINVAL;
+			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
+				return -EINVAL;
+			break;
+
+		case OVS_ACTION_ATTR_SET:
+			err = validate_set(a, key);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			err = validate_sample(a, key, depth);
+			if (err)
+				return err;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void clear_stats(struct sw_flow *flow)
+{
+	flow->used = 0;
+	flow->tcp_flags = 0;
+	flow->packet_count = 0;
+	flow->byte_count = 0;
+}
+
+static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ovs_header *ovs_header = info->userhdr;
+	struct nlattr **a = info->attrs;
+	struct sw_flow_actions *acts;
+	struct sk_buff *packet;
+	struct sw_flow *flow;
+	struct datapath *dp;
+	struct ethhdr *eth;
+	int len;
+	int err;
+	int key_len;
+
+	err = -EINVAL;
+	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
+	    !a[OVS_PACKET_ATTR_ACTIONS] ||
+	    nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
+		goto err;
+
+	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
+	packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
+	err = -ENOMEM;
+	if (!packet)
+		goto err;
+	skb_reserve(packet, NET_IP_ALIGN);
+
+	memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
+
+	skb_reset_mac_header(packet);
+	eth = eth_hdr(packet);
+
+	/* Normally, setting the skb 'protocol' field would be handled by a
+	 * call to eth_type_trans(), but it assumes there's a sending
+	 * device, which we may not have. */
+	if (ntohs(eth->h_proto) >= 1536)
+		packet->protocol = eth->h_proto;
+	else
+		packet->protocol = htons(ETH_P_802_2);
+
+	/* Build an sw_flow for sending this packet. */
+	flow = ovs_flow_alloc();
+	err = PTR_ERR(flow);
+	if (IS_ERR(flow))
+		goto err_kfree_skb;
+
+	err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
+	if (err)
+		goto err_flow_free;
+
+	err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
+					     &flow->key.phy.in_port,
+					     a[OVS_PACKET_ATTR_KEY]);
+	if (err)
+		goto err_flow_free;
+
+	err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
+	if (err)
+		goto err_flow_free;
+
+	flow->hash = ovs_flow_hash(&flow->key, key_len);
+
+	acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
+	err = PTR_ERR(acts);
+	if (IS_ERR(acts))
+		goto err_flow_free;
+	rcu_assign_pointer(flow->sf_acts, acts);
+
+	OVS_CB(packet)->flow = flow;
+	packet->priority = flow->key.phy.priority;
+
+	rcu_read_lock();
+	dp = get_dp(ovs_header->dp_ifindex);
+	err = -ENODEV;
+	if (!dp)
+		goto err_unlock;
+
+	local_bh_disable();
+	err = ovs_execute_actions(dp, packet);
+	local_bh_enable();
+	rcu_read_unlock();
+
+	ovs_flow_free(flow);
+	return err;
+
+err_unlock:
+	rcu_read_unlock();
+err_flow_free:
+	ovs_flow_free(flow);
+err_kfree_skb:
+	kfree_skb(packet);
+err:
+	return err;
+}
+
+static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
+	[OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
+	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
+	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_ops dp_packet_genl_ops[] = {
+	{ .cmd = OVS_PACKET_CMD_EXECUTE,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = packet_policy,
+	  .doit = ovs_packet_cmd_execute
+	}
+};
+
+static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
+{
+	int i;
+	struct flow_table *table = genl_dereference(dp->table);
+
+	stats->n_flows = ovs_flow_tbl_count(table);
+
+	stats->n_hit = stats->n_missed = stats->n_lost = 0;
+	for_each_possible_cpu(i) {
+		const struct dp_stats_percpu *percpu_stats;
+		struct dp_stats_percpu local_stats;
+		unsigned int start;
+
+		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
+
+		do {
+			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+			local_stats = *percpu_stats;
+		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+
+		stats->n_hit += local_stats.n_hit;
+		stats->n_missed += local_stats.n_missed;
+		stats->n_lost += local_stats.n_lost;
+	}
+}
+
+static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
+	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+};
+
+static struct genl_family dp_flow_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_FLOW_FAMILY,
+	.version = OVS_FLOW_VERSION,
+	.maxattr = OVS_FLOW_ATTR_MAX
+};
+
+static struct genl_multicast_group ovs_dp_flow_multicast_group = {
+	.name = OVS_FLOW_MCGROUP
+};
+
+/* Called with genl_lock. */
+static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
+				  struct sk_buff *skb, u32 pid,
+				  u32 seq, u32 flags, u8 cmd)
+{
+	const int skb_orig_len = skb->len;
+	const struct sw_flow_actions *sf_acts;
+	struct ovs_flow_stats stats;
+	struct ovs_header *ovs_header;
+	struct nlattr *nla;
+	unsigned long used;
+	u8 tcp_flags;
+	int err;
+
+	sf_acts = rcu_dereference_protected(flow->sf_acts,
+					    lockdep_genl_is_held());
+
+	ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
+	if (!ovs_header)
+		return -EMSGSIZE;
+
+	ovs_header->dp_ifindex = get_dpifindex(dp);
+
+	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
+	if (!nla)
+		goto nla_put_failure;
+	err = ovs_flow_to_nlattrs(&flow->key, skb);
+	if (err)
+		goto error;
+	nla_nest_end(skb, nla);
+
+	spin_lock_bh(&flow->lock);
+	used = flow->used;
+	stats.n_packets = flow->packet_count;
+	stats.n_bytes = flow->byte_count;
+	tcp_flags = flow->tcp_flags;
+	spin_unlock_bh(&flow->lock);
+
+	if (used)
+		NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used));
+
+	if (stats.n_packets)
+		NLA_PUT(skb, OVS_FLOW_ATTR_STATS,
+			sizeof(struct ovs_flow_stats), &stats);
+
+	if (tcp_flags)
+		NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
+
+	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
+	 * this is the first flow to be dumped into 'skb'.  This is unusual for
+	 * Netlink but individual action lists can be longer than
+	 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
+	 * The userspace caller can always fetch the actions separately if it
+	 * really wants them.  (Most userspace callers in fact don't care.)
+	 *
+	 * This can only fail for dump operations because the skb is always
+	 * properly sized for single flows.
+	 */
+	err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
+		      sf_acts->actions);
+	if (err < 0 && skb_orig_len)
+		goto error;
+
+	return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+	err = -EMSGSIZE;
+error:
+	genlmsg_cancel(skb, ovs_header);
+	return err;
+}
+
+static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
+{
+	const struct sw_flow_actions *sf_acts;
+	int len;
+
+	sf_acts = rcu_dereference_protected(flow->sf_acts,
+					    lockdep_genl_is_held());
+
+	/* OVS_FLOW_ATTR_KEY */
+	len = nla_total_size(FLOW_BUFSIZE);
+	/* OVS_FLOW_ATTR_ACTIONS */
+	len += nla_total_size(sf_acts->actions_len);
+	/* OVS_FLOW_ATTR_STATS */
+	len += nla_total_size(sizeof(struct ovs_flow_stats));
+	/* OVS_FLOW_ATTR_TCP_FLAGS */
+	len += nla_total_size(1);
+	/* OVS_FLOW_ATTR_USED */
+	len += nla_total_size(8);
+
+	len += NLMSG_ALIGN(sizeof(struct ovs_header));
+
+	return genlmsg_new(len, GFP_KERNEL);
+}
+
+static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
+					       struct datapath *dp,
+					       u32 pid, u32 seq, u8 cmd)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = ovs_flow_cmd_alloc_info(flow);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
+	BUG_ON(retval < 0);
+	return skb;
+}
+
+static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key;
+	struct sw_flow *flow;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	struct flow_table *table;
+	int error;
+	int key_len;
+
+	/* Extract key. */
+	error = -EINVAL;
+	if (!a[OVS_FLOW_ATTR_KEY])
+		goto error;
+	error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+	if (error)
+		goto error;
+
+	/* Validate actions. */
+	if (a[OVS_FLOW_ATTR_ACTIONS]) {
+		error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
+		if (error)
+			goto error;
+	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
+		error = -EINVAL;
+		goto error;
+	}
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	error = -ENODEV;
+	if (!dp)
+		goto error;
+
+	table = genl_dereference(dp->table);
+	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	if (!flow) {
+		struct sw_flow_actions *acts;
+
+		/* Bail out if we're not allowed to create a new flow. */
+		error = -ENOENT;
+		if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
+			goto error;
+
+		/* Expand table, if necessary, to make room. */
+		if (ovs_flow_tbl_need_to_expand(table)) {
+			struct flow_table *new_table;
+
+			new_table = ovs_flow_tbl_expand(table);
+			if (!IS_ERR(new_table)) {
+				rcu_assign_pointer(dp->table, new_table);
+				ovs_flow_tbl_deferred_destroy(table);
+				table = genl_dereference(dp->table);
+			}
+		}
+
+		/* Allocate flow. */
+		flow = ovs_flow_alloc();
+		if (IS_ERR(flow)) {
+			error = PTR_ERR(flow);
+			goto error;
+		}
+		flow->key = key;
+		clear_stats(flow);
+
+		/* Obtain actions. */
+		acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
+		error = PTR_ERR(acts);
+		if (IS_ERR(acts))
+			goto error_free_flow;
+		rcu_assign_pointer(flow->sf_acts, acts);
+
+		/* Put flow in bucket. */
+		flow->hash = ovs_flow_hash(&key, key_len);
+		ovs_flow_tbl_insert(table, flow);
+
+		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+						info->snd_seq,
+						OVS_FLOW_CMD_NEW);
+	} else {
+		/* We found a matching flow. */
+		struct sw_flow_actions *old_acts;
+		struct nlattr *acts_attrs;
+
+		/* Bail out if we're not allowed to modify an existing flow.
+		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
+		 * because Generic Netlink treats the latter as a dump
+		 * request.  We also accept NLM_F_EXCL in case that bug ever
+		 * gets fixed.
+		 */
+		error = -EEXIST;
+		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
+		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
+			goto error;
+
+		/* Update actions. */
+		old_acts = rcu_dereference_protected(flow->sf_acts,
+						     lockdep_genl_is_held());
+		acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
+		if (acts_attrs &&
+		   (old_acts->actions_len != nla_len(acts_attrs) ||
+		   memcmp(old_acts->actions, nla_data(acts_attrs),
+			  old_acts->actions_len))) {
+			struct sw_flow_actions *new_acts;
+
+			new_acts = ovs_flow_actions_alloc(acts_attrs);
+			error = PTR_ERR(new_acts);
+			if (IS_ERR(new_acts))
+				goto error;
+
+			rcu_assign_pointer(flow->sf_acts, new_acts);
+			ovs_flow_deferred_free_acts(old_acts);
+		}
+
+		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+					       info->snd_seq, OVS_FLOW_CMD_NEW);
+
+		/* Clear stats. */
+		if (a[OVS_FLOW_ATTR_CLEAR]) {
+			spin_lock_bh(&flow->lock);
+			clear_stats(flow);
+			spin_unlock_bh(&flow->lock);
+		}
+	}
+
+	if (!IS_ERR(reply))
+		genl_notify(reply, genl_info_net(info), info->snd_pid,
+			   ovs_dp_flow_multicast_group.id, info->nlhdr,
+			   GFP_KERNEL);
+	else
+		netlink_set_err(init_net.genl_sock, 0,
+				ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
+	return 0;
+
+error_free_flow:
+	ovs_flow_free(flow);
+error:
+	return error;
+}
+
+static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key;
+	struct sk_buff *reply;
+	struct sw_flow *flow;
+	struct datapath *dp;
+	struct flow_table *table;
+	int err;
+	int key_len;
+
+	if (!a[OVS_FLOW_ATTR_KEY])
+		return -EINVAL;
+	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+	if (err)
+		return err;
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	table = genl_dereference(dp->table);
+	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	if (!flow)
+		return -ENOENT;
+
+	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+					info->snd_seq, OVS_FLOW_CMD_NEW);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	return genlmsg_reply(reply, info);
+}
+
+static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key;
+	struct sk_buff *reply;
+	struct sw_flow *flow;
+	struct datapath *dp;
+	struct flow_table *table;
+	int err;
+	int key_len;
+
+	if (!a[OVS_FLOW_ATTR_KEY])
+		return flush_flows(ovs_header->dp_ifindex);
+	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+	if (err)
+		return err;
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	table = genl_dereference(dp->table);
+	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	if (!flow)
+		return -ENOENT;
+
+	reply = ovs_flow_cmd_alloc_info(flow);
+	if (!reply)
+		return -ENOMEM;
+
+	ovs_flow_tbl_remove(table, flow);
+
+	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
+				     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
+	BUG_ON(err < 0);
+
+	ovs_flow_deferred_free(flow);
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
+	return 0;
+}
+
+static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+	struct datapath *dp;
+	struct flow_table *table;
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	table = genl_dereference(dp->table);
+
+	for (;;) {
+		struct sw_flow *flow;
+		u32 bucket, obj;
+
+		bucket = cb->args[0];
+		obj = cb->args[1];
+		flow = ovs_flow_tbl_next(table, &bucket, &obj);
+		if (!flow)
+			break;
+
+		if (ovs_flow_cmd_fill_info(flow, dp, skb,
+					   NETLINK_CB(cb->skb).pid,
+					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					   OVS_FLOW_CMD_NEW) < 0)
+			break;
+
+		cb->args[0] = bucket;
+		cb->args[1] = obj;
+	}
+	return skb->len;
+}
+
+static struct genl_ops dp_flow_genl_ops[] = {
+	{ .cmd = OVS_FLOW_CMD_NEW,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_new_or_set
+	},
+	{ .cmd = OVS_FLOW_CMD_DEL,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_del
+	},
+	{ .cmd = OVS_FLOW_CMD_GET,
+	  .flags = 0,		    /* OK for unprivileged users. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_get,
+	  .dumpit = ovs_flow_cmd_dump
+	},
+	{ .cmd = OVS_FLOW_CMD_SET,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_new_or_set,
+	},
+};
+
+static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
+	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+};
+
+static struct genl_family dp_datapath_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_DATAPATH_FAMILY,
+	.version = OVS_DATAPATH_VERSION,
+	.maxattr = OVS_DP_ATTR_MAX
+};
+
+static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
+	.name = OVS_DATAPATH_MCGROUP
+};
+
+static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
+				u32 pid, u32 seq, u32 flags, u8 cmd)
+{
+	struct ovs_header *ovs_header;
+	struct ovs_dp_stats dp_stats;
+	int err;
+
+	ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
+				   flags, cmd);
+	if (!ovs_header)
+		goto error;
+
+	ovs_header->dp_ifindex = get_dpifindex(dp);
+
+	rcu_read_lock();
+	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
+	rcu_read_unlock();
+	if (err)
+		goto nla_put_failure;
+
+	get_dp_stats(dp, &dp_stats);
+	NLA_PUT(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats);
+
+	return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+	genlmsg_cancel(skb, ovs_header);
+error:
+	return -EMSGSIZE;
+}
+
+static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
+					     u32 seq, u8 cmd)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
+	if (retval < 0) {
+		kfree_skb(skb);
+		return ERR_PTR(retval);
+	}
+	return skb;
+}
+
+/* Called with genl_mutex and optionally with RTNL lock also. */
+static struct datapath *lookup_datapath(struct ovs_header *ovs_header,
+					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
+{
+	struct datapath *dp;
+
+	if (!a[OVS_DP_ATTR_NAME])
+		dp = get_dp(ovs_header->dp_ifindex);
+	else {
+		struct vport *vport;
+
+		rcu_read_lock();
+		vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
+		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
+		rcu_read_unlock();
+	}
+	return dp ? dp : ERR_PTR(-ENODEV);
+}
+
+static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct vport_parms parms;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	struct vport *vport;
+	int err;
+
+	err = -EINVAL;
+	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
+		goto err;
+
+	rtnl_lock();
+	err = -ENODEV;
+	if (!try_module_get(THIS_MODULE))
+		goto err_unlock_rtnl;
+
+	err = -ENOMEM;
+	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+	if (dp == NULL)
+		goto err_put_module;
+	INIT_LIST_HEAD(&dp->port_list);
+
+	/* Allocate table. */
+	err = -ENOMEM;
+	rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
+	if (!dp->table)
+		goto err_free_dp;
+
+	dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+	if (!dp->stats_percpu) {
+		err = -ENOMEM;
+		goto err_destroy_table;
+	}
+
+	/* Set up our datapath device. */
+	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
+	parms.type = OVS_VPORT_TYPE_INTERNAL;
+	parms.options = NULL;
+	parms.dp = dp;
+	parms.port_no = OVSP_LOCAL;
+	parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+
+	vport = new_vport(&parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		if (err == -EBUSY)
+			err = -EEXIST;
+
+		goto err_destroy_percpu;
+	}
+
+	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+				      info->snd_seq, OVS_DP_CMD_NEW);
+	err = PTR_ERR(reply);
+	if (IS_ERR(reply))
+		goto err_destroy_local_port;
+
+	list_add_tail(&dp->list_node, &dps);
+	rtnl_unlock();
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
+		    GFP_KERNEL);
+	return 0;
+
+err_destroy_local_port:
+	ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+err_destroy_percpu:
+	free_percpu(dp->stats_percpu);
+err_destroy_table:
+	ovs_flow_tbl_destroy(genl_dereference(dp->table));
+err_free_dp:
+	kfree(dp);
+err_put_module:
+	module_put(THIS_MODULE);
+err_unlock_rtnl:
+	rtnl_unlock();
+err:
+	return err;
+}
+
+static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct vport *vport, *next_vport;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err;
+
+	rtnl_lock();
+	dp = lookup_datapath(info->userhdr, info->attrs);
+	err = PTR_ERR(dp);
+	if (IS_ERR(dp))
+		goto exit_unlock;
+
+	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+				      info->snd_seq, OVS_DP_CMD_DEL);
+	err = PTR_ERR(reply);
+	if (IS_ERR(reply))
+		goto exit_unlock;
+
+	list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
+		if (vport->port_no != OVSP_LOCAL)
+			ovs_dp_detach_port(vport);
+
+	list_del(&dp->list_node);
+	ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+
+	/* rtnl_unlock() will wait until all the references to devices that
+	 * are pending unregistration have been dropped.  We do it here to
+	 * ensure that any internal devices (which contain DP pointers) are
+	 * fully destroyed before freeing the datapath.
+	 */
+	rtnl_unlock();
+
+	call_rcu(&dp->rcu, destroy_dp_rcu);
+	module_put(THIS_MODULE);
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
+		    GFP_KERNEL);
+
+	return 0;
+
+exit_unlock:
+	rtnl_unlock();
+	return err;
+}
+
+static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err;
+
+	dp = lookup_datapath(info->userhdr, info->attrs);
+	if (IS_ERR(dp))
+		return PTR_ERR(dp);
+
+	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+				      info->snd_seq, OVS_DP_CMD_NEW);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		netlink_set_err(init_net.genl_sock, 0,
+				ovs_dp_datapath_multicast_group.id, err);
+		return 0;
+	}
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
+		    GFP_KERNEL);
+
+	return 0;
+}
+
+static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *reply;
+	struct datapath *dp;
+
+	dp = lookup_datapath(info->userhdr, info->attrs);
+	if (IS_ERR(dp))
+		return PTR_ERR(dp);
+
+	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+				      info->snd_seq, OVS_DP_CMD_NEW);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	return genlmsg_reply(reply, info);
+}
+
+static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct datapath *dp;
+	int skip = cb->args[0];
+	int i = 0;
+
+	list_for_each_entry(dp, &dps, list_node) {
+		if (i < skip)
+			continue;
+		if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 OVS_DP_CMD_NEW) < 0)
+			break;
+		i++;
+	}
+
+	cb->args[0] = i;
+
+	return skb->len;
+}
+
+static struct genl_ops dp_datapath_genl_ops[] = {
+	{ .cmd = OVS_DP_CMD_NEW,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_new
+	},
+	{ .cmd = OVS_DP_CMD_DEL,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_del
+	},
+	{ .cmd = OVS_DP_CMD_GET,
+	  .flags = 0,		    /* OK for unprivileged users. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_get,
+	  .dumpit = ovs_dp_cmd_dump
+	},
+	{ .cmd = OVS_DP_CMD_SET,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_set,
+	},
+};
+
+static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
+	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
+	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_family dp_vport_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_VPORT_FAMILY,
+	.version = OVS_VPORT_VERSION,
+	.maxattr = OVS_VPORT_ATTR_MAX
+};
+
+struct genl_multicast_group ovs_dp_vport_multicast_group = {
+	.name = OVS_VPORT_MCGROUP
+};
+
+/* Called with RTNL lock or RCU read lock. */
+static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
+				   u32 pid, u32 seq, u32 flags, u8 cmd)
+{
+	struct ovs_header *ovs_header;
+	struct ovs_vport_stats vport_stats;
+	int err;
+
+	ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
+				 flags, cmd);
+	if (!ovs_header)
+		return -EMSGSIZE;
+
+	ovs_header->dp_ifindex = get_dpifindex(vport->dp);
+
+	NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
+	NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type);
+	NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport));
+	NLA_PUT_U32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid);
+
+	ovs_vport_get_stats(vport, &vport_stats);
+	NLA_PUT(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
+		&vport_stats);
+
+	err = ovs_vport_get_options(vport, skb);
+	if (err == -EMSGSIZE)
+		goto error;
+
+	return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+	err = -EMSGSIZE;
+error:
+	genlmsg_cancel(skb, ovs_header);
+	return err;
+}
+
+/* Called with RTNL lock or RCU read lock. */
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
+					 u32 seq, u8 cmd)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
+	if (retval < 0) {
+		kfree_skb(skb);
+		return ERR_PTR(retval);
+	}
+	return skb;
+}
+
+/* Called with RTNL lock or RCU read lock. */
+static struct vport *lookup_vport(struct ovs_header *ovs_header,
+				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
+{
+	struct datapath *dp;
+	struct vport *vport;
+
+	if (a[OVS_VPORT_ATTR_NAME]) {
+		vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
+		if (!vport)
+			return ERR_PTR(-ENODEV);
+		return vport;
+	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
+		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
+
+		if (port_no >= DP_MAX_PORTS)
+			return ERR_PTR(-EFBIG);
+
+		dp = get_dp(ovs_header->dp_ifindex);
+		if (!dp)
+			return ERR_PTR(-ENODEV);
+
+		vport = rcu_dereference_rtnl(dp->ports[port_no]);
+		if (!vport)
+			return ERR_PTR(-ENOENT);
+		return vport;
+	} else
+		return ERR_PTR(-EINVAL);
+}
+
+static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct vport_parms parms;
+	struct sk_buff *reply;
+	struct vport *vport;
+	struct datapath *dp;
+	u32 port_no;
+	int err;
+
+	err = -EINVAL;
+	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
+	    !a[OVS_VPORT_ATTR_UPCALL_PID])
+		goto exit;
+
+	rtnl_lock();
+	dp = get_dp(ovs_header->dp_ifindex);
+	err = -ENODEV;
+	if (!dp)
+		goto exit_unlock;
+
+	if (a[OVS_VPORT_ATTR_PORT_NO]) {
+		port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
+
+		err = -EFBIG;
+		if (port_no >= DP_MAX_PORTS)
+			goto exit_unlock;
+
+		vport = rtnl_dereference(dp->ports[port_no]);
+		err = -EBUSY;
+		if (vport)
+			goto exit_unlock;
+	} else {
+		for (port_no = 1; ; port_no++) {
+			if (port_no >= DP_MAX_PORTS) {
+				err = -EFBIG;
+				goto exit_unlock;
+			}
+			vport = rtnl_dereference(dp->ports[port_no]);
+			if (!vport)
+				break;
+		}
+	}
+
+	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
+	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
+	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
+	parms.dp = dp;
+	parms.port_no = port_no;
+	parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+	vport = new_vport(&parms);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock;
+
+	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+					 OVS_VPORT_CMD_NEW);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		ovs_dp_detach_port(vport);
+		goto exit_unlock;
+	}
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+exit_unlock:
+	rtnl_unlock();
+exit:
+	return err;
+}
+
+static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct sk_buff *reply;
+	struct vport *vport;
+	int err;
+
+	rtnl_lock();
+	vport = lookup_vport(info->userhdr, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock;
+
+	err = 0;
+	if (a[OVS_VPORT_ATTR_TYPE] &&
+	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
+		err = -EINVAL;
+
+	if (!err && a[OVS_VPORT_ATTR_OPTIONS])
+		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
+	if (!err && a[OVS_VPORT_ATTR_UPCALL_PID])
+		vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+					 OVS_VPORT_CMD_NEW);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		netlink_set_err(init_net.genl_sock, 0,
+				ovs_dp_vport_multicast_group.id, err);
+		return 0;
+	}
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+exit_unlock:
+	rtnl_unlock();
+	return err;
+}
+
+static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct sk_buff *reply;
+	struct vport *vport;
+	int err;
+
+	rtnl_lock();
+	vport = lookup_vport(info->userhdr, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock;
+
+	if (vport->port_no == OVSP_LOCAL) {
+		err = -EINVAL;
+		goto exit_unlock;
+	}
+
+	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+					 OVS_VPORT_CMD_DEL);
+	err = PTR_ERR(reply);
+	if (IS_ERR(reply))
+		goto exit_unlock;
+
+	ovs_dp_detach_port(vport);
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+exit_unlock:
+	rtnl_unlock();
+	return err;
+}
+
+static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sk_buff *reply;
+	struct vport *vport;
+	int err;
+
+	rcu_read_lock();
+	vport = lookup_vport(ovs_header, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock;
+
+	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+					 OVS_VPORT_CMD_NEW);
+	err = PTR_ERR(reply);
+	if (IS_ERR(reply))
+		goto exit_unlock;
+
+	rcu_read_unlock();
+
+	return genlmsg_reply(reply, info);
+
+exit_unlock:
+	rcu_read_unlock();
+	return err;
+}
+
+static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+	struct datapath *dp;
+	u32 port_no;
+	int retval;
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	rcu_read_lock();
+	for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
+		struct vport *vport;
+
+		vport = rcu_dereference(dp->ports[port_no]);
+		if (!vport)
+			continue;
+
+		if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					    OVS_VPORT_CMD_NEW) < 0)
+			break;
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = port_no;
+	retval = skb->len;
+
+	return retval;
+}
+
+static void rehash_flow_table(struct work_struct *work)
+{
+	struct datapath *dp;
+
+	genl_lock();
+
+	list_for_each_entry(dp, &dps, list_node) {
+		struct flow_table *old_table = genl_dereference(dp->table);
+		struct flow_table *new_table;
+
+		new_table = ovs_flow_tbl_rehash(old_table);
+		if (!IS_ERR(new_table)) {
+			rcu_assign_pointer(dp->table, new_table);
+			ovs_flow_tbl_deferred_destroy(old_table);
+		}
+	}
+
+	genl_unlock();
+
+	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
+}
+
+static struct genl_ops dp_vport_genl_ops[] = {
+	{ .cmd = OVS_VPORT_CMD_NEW,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_new
+	},
+	{ .cmd = OVS_VPORT_CMD_DEL,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_del
+	},
+	{ .cmd = OVS_VPORT_CMD_GET,
+	  .flags = 0,		    /* OK for unprivileged users. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_get,
+	  .dumpit = ovs_vport_cmd_dump
+	},
+	{ .cmd = OVS_VPORT_CMD_SET,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_set,
+	},
+};
+
+struct genl_family_and_ops {
+	struct genl_family *family;
+	struct genl_ops *ops;
+	int n_ops;
+	struct genl_multicast_group *group;
+};
+
+static const struct genl_family_and_ops dp_genl_families[] = {
+	{ &dp_datapath_genl_family,
+	  dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
+	  &ovs_dp_datapath_multicast_group },
+	{ &dp_vport_genl_family,
+	  dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
+	  &ovs_dp_vport_multicast_group },
+	{ &dp_flow_genl_family,
+	  dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
+	  &ovs_dp_flow_multicast_group },
+	{ &dp_packet_genl_family,
+	  dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
+	  NULL },
+};
+
+static void dp_unregister_genl(int n_families)
+{
+	int i;
+
+	for (i = 0; i < n_families; i++)
+		genl_unregister_family(dp_genl_families[i].family);
+}
+
+static int dp_register_genl(void)
+{
+	int n_registered;
+	int err;
+	int i;
+
+	n_registered = 0;
+	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
+		const struct genl_family_and_ops *f = &dp_genl_families[i];
+
+		err = genl_register_family_with_ops(f->family, f->ops,
+						    f->n_ops);
+		if (err)
+			goto error;
+		n_registered++;
+
+		if (f->group) {
+			err = genl_register_mc_group(f->family, f->group);
+			if (err)
+				goto error;
+		}
+	}
+
+	return 0;
+
+error:
+	dp_unregister_genl(n_registered);
+	return err;
+}
+
+static int __init dp_init(void)
+{
+	struct sk_buff *dummy_skb;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
+
+	pr_info("Open vSwitch switching datapath\n");
+
+	err = ovs_flow_init();
+	if (err)
+		goto error;
+
+	err = ovs_vport_init();
+	if (err)
+		goto error_flow_exit;
+
+	err = register_netdevice_notifier(&ovs_dp_device_notifier);
+	if (err)
+		goto error_vport_exit;
+
+	err = dp_register_genl();
+	if (err < 0)
+		goto error_unreg_notifier;
+
+	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
+
+	return 0;
+
+error_unreg_notifier:
+	unregister_netdevice_notifier(&ovs_dp_device_notifier);
+error_vport_exit:
+	ovs_vport_exit();
+error_flow_exit:
+	ovs_flow_exit();
+error:
+	return err;
+}
+
+static void dp_cleanup(void)
+{
+	cancel_delayed_work_sync(&rehash_flow_wq);
+	rcu_barrier();
+	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
+	unregister_netdevice_notifier(&ovs_dp_device_notifier);
+	ovs_vport_exit();
+	ovs_flow_exit();
+}
+
+module_init(dp_init);
+module_exit(dp_cleanup);
+
+MODULE_DESCRIPTION("Open vSwitch switching datapath");
+MODULE_LICENSE("GPL");
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
new file mode 100644
index 000000000000..5b9f884b7055
--- /dev/null
+++ b/net/openvswitch/datapath.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef DATAPATH_H
+#define DATAPATH_H 1
+
+#include <asm/page.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/version.h>
+
+#include "flow.h"
+
+struct vport;
+
+#define DP_MAX_PORTS 1024
+#define SAMPLE_ACTION_DEPTH 3
+
+/**
+ * struct dp_stats_percpu - per-cpu packet processing statistics for a given
+ * datapath.
+ * @n_hit: Number of received packets for which a matching flow was found in
+ * the flow table.
+ * @n_miss: Number of received packets that had no matching flow in the flow
+ * table.  The sum of @n_hit and @n_miss is the number of packets that have
+ * been received by the datapath.
+ * @n_lost: Number of received packets that had no matching flow in the flow
+ * table that could not be sent to userspace (normally due to an overflow in
+ * one of the datapath's queues).
+ */
+struct dp_stats_percpu {
+	u64 n_hit;
+	u64 n_missed;
+	u64 n_lost;
+	struct u64_stats_sync sync;
+};
+
+/**
+ * struct datapath - datapath for flow-based packet switching
+ * @rcu: RCU callback head for deferred destruction.
+ * @list_node: Element in global 'dps' list.
+ * @n_flows: Number of flows currently in flow table.
+ * @table: Current flow table.  Protected by genl_lock and RCU.
+ * @ports: Map from port number to &struct vport.  %OVSP_LOCAL port
+ * always exists, other ports may be %NULL.  Protected by RTNL and RCU.
+ * @port_list: List of all ports in @ports in arbitrary order.  RTNL required
+ * to iterate or modify.
+ * @stats_percpu: Per-CPU datapath statistics.
+ *
+ * Context: See the comment on locking at the top of datapath.c for additional
+ * locking information.
+ */
+struct datapath {
+	struct rcu_head rcu;
+	struct list_head list_node;
+
+	/* Flow table. */
+	struct flow_table __rcu *table;
+
+	/* Switch ports. */
+	struct vport __rcu *ports[DP_MAX_PORTS];
+	struct list_head port_list;
+
+	/* Stats. */
+	struct dp_stats_percpu __percpu *stats_percpu;
+};
+
+/**
+ * struct ovs_skb_cb - OVS data in skb CB
+ * @flow: The flow associated with this packet.  May be %NULL if no flow.
+ */
+struct ovs_skb_cb {
+	struct sw_flow		*flow;
+};
+#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
+
+/**
+ * struct dp_upcall - metadata to include with a packet to send to userspace
+ * @cmd: One of %OVS_PACKET_CMD_*.
+ * @key: Becomes %OVS_PACKET_ATTR_KEY.  Must be nonnull.
+ * @userdata: If nonnull, its u64 value is extracted and passed to userspace as
+ * %OVS_PACKET_ATTR_USERDATA.
+ * @pid: Netlink PID to which packet should be sent.  If @pid is 0 then no
+ * packet is sent and the packet is accounted in the datapath's @n_lost
+ * counter.
+ */
+struct dp_upcall_info {
+	u8 cmd;
+	const struct sw_flow_key *key;
+	const struct nlattr *userdata;
+	u32 pid;
+};
+
+extern struct notifier_block ovs_dp_device_notifier;
+extern struct genl_multicast_group ovs_dp_vport_multicast_group;
+
+void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
+void ovs_dp_detach_port(struct vport *);
+int ovs_dp_upcall(struct datapath *, struct sk_buff *,
+		  const struct dp_upcall_info *);
+
+const char *ovs_dp_name(const struct datapath *dp);
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
+					 u8 cmd);
+
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
new file mode 100644
index 000000000000..46736518c453
--- /dev/null
+++ b/net/openvswitch/dp_notify.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+static int dp_device_event(struct notifier_block *unused, unsigned long event,
+			   void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct vport *vport;
+
+	if (ovs_is_internal_dev(dev))
+		vport = ovs_internal_dev_get_vport(dev);
+	else
+		vport = ovs_netdev_get_vport(dev);
+
+	if (!vport)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		if (!ovs_is_internal_dev(dev)) {
+			struct sk_buff *notify;
+
+			notify = ovs_vport_cmd_build_info(vport, 0, 0,
+							  OVS_VPORT_CMD_DEL);
+			ovs_dp_detach_port(vport);
+			if (IS_ERR(notify)) {
+				netlink_set_err(init_net.genl_sock, 0,
+						ovs_dp_vport_multicast_group.id,
+						PTR_ERR(notify));
+				break;
+			}
+
+			genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id,
+					  GFP_KERNEL);
+		}
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct notifier_block ovs_dp_device_notifier = {
+	.notifier_call = dp_device_event
+};
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
new file mode 100644
index 000000000000..fe7f020a843e
--- /dev/null
+++ b/net/openvswitch/flow.c
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+static struct kmem_cache *flow_cache;
+
+static int check_header(struct sk_buff *skb, int len)
+{
+	if (unlikely(skb->len < len))
+		return -EINVAL;
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
+	return 0;
+}
+
+static bool arphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_network_offset(skb) +
+				  sizeof(struct arp_eth_header));
+}
+
+static int check_iphdr(struct sk_buff *skb)
+{
+	unsigned int nh_ofs = skb_network_offset(skb);
+	unsigned int ip_len;
+	int err;
+
+	err = check_header(skb, nh_ofs + sizeof(struct iphdr));
+	if (unlikely(err))
+		return err;
+
+	ip_len = ip_hdrlen(skb);
+	if (unlikely(ip_len < sizeof(struct iphdr) ||
+		     skb->len < nh_ofs + ip_len))
+		return -EINVAL;
+
+	skb_set_transport_header(skb, nh_ofs + ip_len);
+	return 0;
+}
+
+static bool tcphdr_ok(struct sk_buff *skb)
+{
+	int th_ofs = skb_transport_offset(skb);
+	int tcp_len;
+
+	if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
+		return false;
+
+	tcp_len = tcp_hdrlen(skb);
+	if (unlikely(tcp_len < sizeof(struct tcphdr) ||
+		     skb->len < th_ofs + tcp_len))
+		return false;
+
+	return true;
+}
+
+static bool udphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct udphdr));
+}
+
+static bool icmphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct icmphdr));
+}
+
+u64 ovs_flow_used_time(unsigned long flow_jiffies)
+{
+	struct timespec cur_ts;
+	u64 cur_ms, idle_ms;
+
+	ktime_get_ts(&cur_ts);
+	idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
+	cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+		 cur_ts.tv_nsec / NSEC_PER_MSEC;
+
+	return cur_ms - idle_ms;
+}
+
+#define SW_FLOW_KEY_OFFSET(field)		\
+	(offsetof(struct sw_flow_key, field) +	\
+	 FIELD_SIZEOF(struct sw_flow_key, field))
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
+			 int *key_lenp)
+{
+	unsigned int nh_ofs = skb_network_offset(skb);
+	unsigned int nh_len;
+	int payload_ofs;
+	struct ipv6hdr *nh;
+	uint8_t nexthdr;
+	__be16 frag_off;
+	int err;
+
+	*key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
+
+	err = check_header(skb, nh_ofs + sizeof(*nh));
+	if (unlikely(err))
+		return err;
+
+	nh = ipv6_hdr(skb);
+	nexthdr = nh->nexthdr;
+	payload_ofs = (u8 *)(nh + 1) - skb->data;
+
+	key->ip.proto = NEXTHDR_NONE;
+	key->ip.tos = ipv6_get_dsfield(nh);
+	key->ip.ttl = nh->hop_limit;
+	key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+	key->ipv6.addr.src = nh->saddr;
+	key->ipv6.addr.dst = nh->daddr;
+
+	payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
+	if (unlikely(payload_ofs < 0))
+		return -EINVAL;
+
+	if (frag_off) {
+		if (frag_off & htons(~0x7))
+			key->ip.frag = OVS_FRAG_TYPE_LATER;
+		else
+			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+	}
+
+	nh_len = payload_ofs - nh_ofs;
+	skb_set_transport_header(skb, nh_ofs + nh_len);
+	key->ip.proto = nexthdr;
+	return nh_len;
+}
+
+static bool icmp6hdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct icmp6hdr));
+}
+
+#define TCP_FLAGS_OFFSET 13
+#define TCP_FLAG_MASK 0x3f
+
+void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
+{
+	u8 tcp_flags = 0;
+
+	if (flow->key.eth.type == htons(ETH_P_IP) &&
+	    flow->key.ip.proto == IPPROTO_TCP) {
+		u8 *tcp = (u8 *)tcp_hdr(skb);
+		tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
+	}
+
+	spin_lock(&flow->lock);
+	flow->used = jiffies;
+	flow->packet_count++;
+	flow->byte_count += skb->len;
+	flow->tcp_flags |= tcp_flags;
+	spin_unlock(&flow->lock);
+}
+
+struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
+{
+	int actions_len = nla_len(actions);
+	struct sw_flow_actions *sfa;
+
+	/* At least DP_MAX_PORTS actions are required to be able to flood a
+	 * packet to every port.  Factor of 2 allows for setting VLAN tags,
+	 * etc. */
+	if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4))
+		return ERR_PTR(-EINVAL);
+
+	sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL);
+	if (!sfa)
+		return ERR_PTR(-ENOMEM);
+
+	sfa->actions_len = actions_len;
+	memcpy(sfa->actions, nla_data(actions), actions_len);
+	return sfa;
+}
+
+struct sw_flow *ovs_flow_alloc(void)
+{
+	struct sw_flow *flow;
+
+	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&flow->lock);
+	flow->sf_acts = NULL;
+
+	return flow;
+}
+
+static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
+{
+	hash = jhash_1word(hash, table->hash_seed);
+	return flex_array_get(table->buckets,
+				(hash & (table->n_buckets - 1)));
+}
+
+static struct flex_array *alloc_buckets(unsigned int n_buckets)
+{
+	struct flex_array *buckets;
+	int i, err;
+
+	buckets = flex_array_alloc(sizeof(struct hlist_head *),
+				   n_buckets, GFP_KERNEL);
+	if (!buckets)
+		return NULL;
+
+	err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
+	if (err) {
+		flex_array_free(buckets);
+		return NULL;
+	}
+
+	for (i = 0; i < n_buckets; i++)
+		INIT_HLIST_HEAD((struct hlist_head *)
+					flex_array_get(buckets, i));
+
+	return buckets;
+}
+
+static void free_buckets(struct flex_array *buckets)
+{
+	flex_array_free(buckets);
+}
+
+struct flow_table *ovs_flow_tbl_alloc(int new_size)
+{
+	struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
+
+	if (!table)
+		return NULL;
+
+	table->buckets = alloc_buckets(new_size);
+
+	if (!table->buckets) {
+		kfree(table);
+		return NULL;
+	}
+	table->n_buckets = new_size;
+	table->count = 0;
+	table->node_ver = 0;
+	table->keep_flows = false;
+	get_random_bytes(&table->hash_seed, sizeof(u32));
+
+	return table;
+}
+
+void ovs_flow_tbl_destroy(struct flow_table *table)
+{
+	int i;
+
+	if (!table)
+		return;
+
+	if (table->keep_flows)
+		goto skip_flows;
+
+	for (i = 0; i < table->n_buckets; i++) {
+		struct sw_flow *flow;
+		struct hlist_head *head = flex_array_get(table->buckets, i);
+		struct hlist_node *node, *n;
+		int ver = table->node_ver;
+
+		hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) {
+			hlist_del_rcu(&flow->hash_node[ver]);
+			ovs_flow_free(flow);
+		}
+	}
+
+skip_flows:
+	free_buckets(table->buckets);
+	kfree(table);
+}
+
+static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
+{
+	struct flow_table *table = container_of(rcu, struct flow_table, rcu);
+
+	ovs_flow_tbl_destroy(table);
+}
+
+void ovs_flow_tbl_deferred_destroy(struct flow_table *table)
+{
+	if (!table)
+		return;
+
+	call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+}
+
+struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last)
+{
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	struct hlist_node *n;
+	int ver;
+	int i;
+
+	ver = table->node_ver;
+	while (*bucket < table->n_buckets) {
+		i = 0;
+		head = flex_array_get(table->buckets, *bucket);
+		hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) {
+			if (i < *last) {
+				i++;
+				continue;
+			}
+			*last = i + 1;
+			return flow;
+		}
+		(*bucket)++;
+		*last = 0;
+	}
+
+	return NULL;
+}
+
+static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
+{
+	int old_ver;
+	int i;
+
+	old_ver = old->node_ver;
+	new->node_ver = !old_ver;
+
+	/* Insert in new table. */
+	for (i = 0; i < old->n_buckets; i++) {
+		struct sw_flow *flow;
+		struct hlist_head *head;
+		struct hlist_node *n;
+
+		head = flex_array_get(old->buckets, i);
+
+		hlist_for_each_entry(flow, n, head, hash_node[old_ver])
+			ovs_flow_tbl_insert(new, flow);
+	}
+	old->keep_flows = true;
+}
+
+static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets)
+{
+	struct flow_table *new_table;
+
+	new_table = ovs_flow_tbl_alloc(n_buckets);
+	if (!new_table)
+		return ERR_PTR(-ENOMEM);
+
+	flow_table_copy_flows(table, new_table);
+
+	return new_table;
+}
+
+struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table)
+{
+	return __flow_tbl_rehash(table, table->n_buckets);
+}
+
+struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
+{
+	return __flow_tbl_rehash(table, table->n_buckets * 2);
+}
+
+void ovs_flow_free(struct sw_flow *flow)
+{
+	if (unlikely(!flow))
+		return;
+
+	kfree((struct sf_flow_acts __force *)flow->sf_acts);
+	kmem_cache_free(flow_cache, flow);
+}
+
+/* RCU callback used by ovs_flow_deferred_free. */
+static void rcu_free_flow_callback(struct rcu_head *rcu)
+{
+	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
+
+	ovs_flow_free(flow);
+}
+
+/* Schedules 'flow' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_flow_deferred_free(struct sw_flow *flow)
+{
+	call_rcu(&flow->rcu, rcu_free_flow_callback);
+}
+
+/* RCU callback used by ovs_flow_deferred_free_acts. */
+static void rcu_free_acts_callback(struct rcu_head *rcu)
+{
+	struct sw_flow_actions *sf_acts = container_of(rcu,
+			struct sw_flow_actions, rcu);
+	kfree(sf_acts);
+}
+
+/* Schedules 'sf_acts' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
+{
+	call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	struct qtag_prefix {
+		__be16 eth_type; /* ETH_P_8021Q */
+		__be16 tci;
+	};
+	struct qtag_prefix *qp;
+
+	if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
+		return 0;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
+					 sizeof(__be16))))
+		return -ENOMEM;
+
+	qp = (struct qtag_prefix *) skb->data;
+	key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
+	__skb_pull(skb, sizeof(struct qtag_prefix));
+
+	return 0;
+}
+
+static __be16 parse_ethertype(struct sk_buff *skb)
+{
+	struct llc_snap_hdr {
+		u8  dsap;  /* Always 0xAA */
+		u8  ssap;  /* Always 0xAA */
+		u8  ctrl;
+		u8  oui[3];
+		__be16 ethertype;
+	};
+	struct llc_snap_hdr *llc;
+	__be16 proto;
+
+	proto = *(__be16 *) skb->data;
+	__skb_pull(skb, sizeof(__be16));
+
+	if (ntohs(proto) >= 1536)
+		return proto;
+
+	if (skb->len < sizeof(struct llc_snap_hdr))
+		return htons(ETH_P_802_2);
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
+		return htons(0);
+
+	llc = (struct llc_snap_hdr *) skb->data;
+	if (llc->dsap != LLC_SAP_SNAP ||
+	    llc->ssap != LLC_SAP_SNAP ||
+	    (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
+		return htons(ETH_P_802_2);
+
+	__skb_pull(skb, sizeof(struct llc_snap_hdr));
+	return llc->ethertype;
+}
+
+static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
+			int *key_lenp, int nh_len)
+{
+	struct icmp6hdr *icmp = icmp6_hdr(skb);
+	int error = 0;
+	int key_len;
+
+	/* The ICMPv6 type and code fields use the 16-bit transport port
+	 * fields, so we need to store them in 16-bit network byte order.
+	 */
+	key->ipv6.tp.src = htons(icmp->icmp6_type);
+	key->ipv6.tp.dst = htons(icmp->icmp6_code);
+	key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+
+	if (icmp->icmp6_code == 0 &&
+	    (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+	     icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+		int icmp_len = skb->len - skb_transport_offset(skb);
+		struct nd_msg *nd;
+		int offset;
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+
+		/* In order to process neighbor discovery options, we need the
+		 * entire packet.
+		 */
+		if (unlikely(icmp_len < sizeof(*nd)))
+			goto out;
+		if (unlikely(skb_linearize(skb))) {
+			error = -ENOMEM;
+			goto out;
+		}
+
+		nd = (struct nd_msg *)skb_transport_header(skb);
+		key->ipv6.nd.target = nd->target;
+		key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+
+		icmp_len -= sizeof(*nd);
+		offset = 0;
+		while (icmp_len >= 8) {
+			struct nd_opt_hdr *nd_opt =
+				 (struct nd_opt_hdr *)(nd->opt + offset);
+			int opt_len = nd_opt->nd_opt_len * 8;
+
+			if (unlikely(!opt_len || opt_len > icmp_len))
+				goto invalid;
+
+			/* Store the link layer address if the appropriate
+			 * option is provided.  It is considered an error if
+			 * the same link layer option is specified twice.
+			 */
+			if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
+			    && opt_len == 8) {
+				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
+					goto invalid;
+				memcpy(key->ipv6.nd.sll,
+				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+			} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
+				   && opt_len == 8) {
+				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
+					goto invalid;
+				memcpy(key->ipv6.nd.tll,
+				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+			}
+
+			icmp_len -= opt_len;
+			offset += opt_len;
+		}
+	}
+
+	goto out;
+
+invalid:
+	memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
+	memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
+	memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
+
+out:
+	*key_lenp = key_len;
+	return error;
+}
+
+/**
+ * ovs_flow_extract - extracts a flow key from an Ethernet frame.
+ * @skb: sk_buff that contains the frame, with skb->data pointing to the
+ * Ethernet header
+ * @in_port: port number on which @skb was received.
+ * @key: output flow key
+ * @key_lenp: length of output flow key
+ *
+ * The caller must ensure that skb->len >= ETH_HLEN.
+ *
+ * Returns 0 if successful, otherwise a negative errno value.
+ *
+ * Initializes @skb header pointers as follows:
+ *
+ *    - skb->mac_header: the Ethernet header.
+ *
+ *    - skb->network_header: just past the Ethernet header, or just past the
+ *      VLAN header, to the first byte of the Ethernet payload.
+ *
+ *    - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6
+ *      on output, then just past the IP header, if one is present and
+ *      of a correct length, otherwise the same as skb->network_header.
+ *      For other key->dl_type values it is left untouched.
+ */
+int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
+		 int *key_lenp)
+{
+	int error = 0;
+	int key_len = SW_FLOW_KEY_OFFSET(eth);
+	struct ethhdr *eth;
+
+	memset(key, 0, sizeof(*key));
+
+	key->phy.priority = skb->priority;
+	key->phy.in_port = in_port;
+
+	skb_reset_mac_header(skb);
+
+	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
+	 * header in the linear data area.
+	 */
+	eth = eth_hdr(skb);
+	memcpy(key->eth.src, eth->h_source, ETH_ALEN);
+	memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+
+	__skb_pull(skb, 2 * ETH_ALEN);
+
+	if (vlan_tx_tag_present(skb))
+		key->eth.tci = htons(skb->vlan_tci);
+	else if (eth->h_proto == htons(ETH_P_8021Q))
+		if (unlikely(parse_vlan(skb, key)))
+			return -ENOMEM;
+
+	key->eth.type = parse_ethertype(skb);
+	if (unlikely(key->eth.type == htons(0)))
+		return -ENOMEM;
+
+	skb_reset_network_header(skb);
+	__skb_push(skb, skb->data - skb_mac_header(skb));
+
+	/* Network layer. */
+	if (key->eth.type == htons(ETH_P_IP)) {
+		struct iphdr *nh;
+		__be16 offset;
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
+
+		error = check_iphdr(skb);
+		if (unlikely(error)) {
+			if (error == -EINVAL) {
+				skb->transport_header = skb->network_header;
+				error = 0;
+			}
+			goto out;
+		}
+
+		nh = ip_hdr(skb);
+		key->ipv4.addr.src = nh->saddr;
+		key->ipv4.addr.dst = nh->daddr;
+
+		key->ip.proto = nh->protocol;
+		key->ip.tos = nh->tos;
+		key->ip.ttl = nh->ttl;
+
+		offset = nh->frag_off & htons(IP_OFFSET);
+		if (offset) {
+			key->ip.frag = OVS_FRAG_TYPE_LATER;
+			goto out;
+		}
+		if (nh->frag_off & htons(IP_MF) ||
+			 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+
+		/* Transport layer. */
+		if (key->ip.proto == IPPROTO_TCP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (tcphdr_ok(skb)) {
+				struct tcphdr *tcp = tcp_hdr(skb);
+				key->ipv4.tp.src = tcp->source;
+				key->ipv4.tp.dst = tcp->dest;
+			}
+		} else if (key->ip.proto == IPPROTO_UDP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (udphdr_ok(skb)) {
+				struct udphdr *udp = udp_hdr(skb);
+				key->ipv4.tp.src = udp->source;
+				key->ipv4.tp.dst = udp->dest;
+			}
+		} else if (key->ip.proto == IPPROTO_ICMP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (icmphdr_ok(skb)) {
+				struct icmphdr *icmp = icmp_hdr(skb);
+				/* The ICMP type and code fields use the 16-bit
+				 * transport port fields, so we need to store
+				 * them in 16-bit network byte order. */
+				key->ipv4.tp.src = htons(icmp->type);
+				key->ipv4.tp.dst = htons(icmp->code);
+			}
+		}
+
+	} else if (key->eth.type == htons(ETH_P_ARP) && arphdr_ok(skb)) {
+		struct arp_eth_header *arp;
+
+		arp = (struct arp_eth_header *)skb_network_header(skb);
+
+		if (arp->ar_hrd == htons(ARPHRD_ETHER)
+				&& arp->ar_pro == htons(ETH_P_IP)
+				&& arp->ar_hln == ETH_ALEN
+				&& arp->ar_pln == 4) {
+
+			/* We only match on the lower 8 bits of the opcode. */
+			if (ntohs(arp->ar_op) <= 0xff)
+				key->ip.proto = ntohs(arp->ar_op);
+
+			if (key->ip.proto == ARPOP_REQUEST
+					|| key->ip.proto == ARPOP_REPLY) {
+				memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
+				memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
+				memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
+				memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+				key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
+			}
+		}
+	} else if (key->eth.type == htons(ETH_P_IPV6)) {
+		int nh_len;             /* IPv6 Header + Extensions */
+
+		nh_len = parse_ipv6hdr(skb, key, &key_len);
+		if (unlikely(nh_len < 0)) {
+			if (nh_len == -EINVAL)
+				skb->transport_header = skb->network_header;
+			else
+				error = nh_len;
+			goto out;
+		}
+
+		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
+			goto out;
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+
+		/* Transport layer. */
+		if (key->ip.proto == NEXTHDR_TCP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (tcphdr_ok(skb)) {
+				struct tcphdr *tcp = tcp_hdr(skb);
+				key->ipv6.tp.src = tcp->source;
+				key->ipv6.tp.dst = tcp->dest;
+			}
+		} else if (key->ip.proto == NEXTHDR_UDP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (udphdr_ok(skb)) {
+				struct udphdr *udp = udp_hdr(skb);
+				key->ipv6.tp.src = udp->source;
+				key->ipv6.tp.dst = udp->dest;
+			}
+		} else if (key->ip.proto == NEXTHDR_ICMP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (icmp6hdr_ok(skb)) {
+				error = parse_icmpv6(skb, key, &key_len, nh_len);
+				if (error < 0)
+					goto out;
+			}
+		}
+	}
+
+out:
+	*key_lenp = key_len;
+	return error;
+}
+
+u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len)
+{
+	return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
+				struct sw_flow_key *key, int key_len)
+{
+	struct sw_flow *flow;
+	struct hlist_node *n;
+	struct hlist_head *head;
+	u32 hash;
+
+	hash = ovs_flow_hash(key, key_len);
+
+	head = find_bucket(table, hash);
+	hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) {
+
+		if (flow->hash == hash &&
+		    !memcmp(&flow->key, key, key_len)) {
+			return flow;
+		}
+	}
+	return NULL;
+}
+
+void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(table, flow->hash);
+	hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
+	table->count++;
+}
+
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+{
+	hlist_del_rcu(&flow->hash_node[table->node_ver]);
+	table->count--;
+	BUG_ON(table->count < 0);
+}
+
+/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
+const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+	[OVS_KEY_ATTR_ENCAP] = -1,
+	[OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
+	[OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
+	[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
+	[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
+	[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
+	[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
+	[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
+	[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
+	[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+	[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
+	[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
+	[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
+	[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+};
+
+static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
+				  const struct nlattr *a[], u32 *attrs)
+{
+	const struct ovs_key_icmp *icmp_key;
+	const struct ovs_key_tcp *tcp_key;
+	const struct ovs_key_udp *udp_key;
+
+	switch (swkey->ip.proto) {
+	case IPPROTO_TCP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+		swkey->ipv4.tp.src = tcp_key->tcp_src;
+		swkey->ipv4.tp.dst = tcp_key->tcp_dst;
+		break;
+
+	case IPPROTO_UDP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+		swkey->ipv4.tp.src = udp_key->udp_src;
+		swkey->ipv4.tp.dst = udp_key->udp_dst;
+		break;
+
+	case IPPROTO_ICMP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+		swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
+		swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
+		break;
+	}
+
+	return 0;
+}
+
+static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
+				  const struct nlattr *a[], u32 *attrs)
+{
+	const struct ovs_key_icmpv6 *icmpv6_key;
+	const struct ovs_key_tcp *tcp_key;
+	const struct ovs_key_udp *udp_key;
+
+	switch (swkey->ip.proto) {
+	case IPPROTO_TCP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+		swkey->ipv6.tp.src = tcp_key->tcp_src;
+		swkey->ipv6.tp.dst = tcp_key->tcp_dst;
+		break;
+
+	case IPPROTO_UDP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+		swkey->ipv6.tp.src = udp_key->udp_src;
+		swkey->ipv6.tp.dst = udp_key->udp_dst;
+		break;
+
+	case IPPROTO_ICMPV6:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+		swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
+		swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
+
+		if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+		    swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+			const struct ovs_key_nd *nd_key;
+
+			if (!(*attrs & (1 << OVS_KEY_ATTR_ND)))
+				return -EINVAL;
+			*attrs &= ~(1 << OVS_KEY_ATTR_ND);
+
+			*key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+			nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+			memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
+			       sizeof(swkey->ipv6.nd.target));
+			memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
+			memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static int parse_flow_nlattrs(const struct nlattr *attr,
+			      const struct nlattr *a[], u32 *attrsp)
+{
+	const struct nlattr *nla;
+	u32 attrs;
+	int rem;
+
+	attrs = 0;
+	nla_for_each_nested(nla, attr, rem) {
+		u16 type = nla_type(nla);
+		int expected_len;
+
+		if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type))
+			return -EINVAL;
+
+		expected_len = ovs_key_lens[type];
+		if (nla_len(nla) != expected_len && expected_len != -1)
+			return -EINVAL;
+
+		attrs |= 1 << type;
+		a[type] = nla;
+	}
+	if (rem)
+		return -EINVAL;
+
+	*attrsp = attrs;
+	return 0;
+}
+
+/**
+ * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key.
+ * @swkey: receives the extracted flow key.
+ * @key_lenp: number of bytes used in @swkey.
+ * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ */
+int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+		      const struct nlattr *attr)
+{
+	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+	const struct ovs_key_ethernet *eth_key;
+	int key_len;
+	u32 attrs;
+	int err;
+
+	memset(swkey, 0, sizeof(struct sw_flow_key));
+	key_len = SW_FLOW_KEY_OFFSET(eth);
+
+	err = parse_flow_nlattrs(attr, a, &attrs);
+	if (err)
+		return err;
+
+	/* Metadata attributes. */
+	if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
+		swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]);
+		attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+	}
+	if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
+		u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
+		if (in_port >= DP_MAX_PORTS)
+			return -EINVAL;
+		swkey->phy.in_port = in_port;
+		attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
+	} else {
+		swkey->phy.in_port = USHRT_MAX;
+	}
+
+	/* Data attributes. */
+	if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
+		return -EINVAL;
+	attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+
+	eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+	memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
+	memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
+
+	if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) &&
+	    nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
+		const struct nlattr *encap;
+		__be16 tci;
+
+		if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
+			      (1 << OVS_KEY_ATTR_ETHERTYPE) |
+			      (1 << OVS_KEY_ATTR_ENCAP)))
+			return -EINVAL;
+
+		encap = a[OVS_KEY_ATTR_ENCAP];
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+		if (tci & htons(VLAN_TAG_PRESENT)) {
+			swkey->eth.tci = tci;
+
+			err = parse_flow_nlattrs(encap, a, &attrs);
+			if (err)
+				return err;
+		} else if (!tci) {
+			/* Corner case for truncated 802.1Q header. */
+			if (nla_len(encap))
+				return -EINVAL;
+
+			swkey->eth.type = htons(ETH_P_8021Q);
+			*key_lenp = key_len;
+			return 0;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
+		swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+		if (ntohs(swkey->eth.type) < 1536)
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+	} else {
+		swkey->eth.type = htons(ETH_P_802_2);
+	}
+
+	if (swkey->eth.type == htons(ETH_P_IP)) {
+		const struct ovs_key_ipv4 *ipv4_key;
+
+		if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
+		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
+		if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX)
+			return -EINVAL;
+		swkey->ip.proto = ipv4_key->ipv4_proto;
+		swkey->ip.tos = ipv4_key->ipv4_tos;
+		swkey->ip.ttl = ipv4_key->ipv4_ttl;
+		swkey->ip.frag = ipv4_key->ipv4_frag;
+		swkey->ipv4.addr.src = ipv4_key->ipv4_src;
+		swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
+
+		if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+			err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
+			if (err)
+				return err;
+		}
+	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+		const struct ovs_key_ipv6 *ipv6_key;
+
+		if (!(attrs & (1 << OVS_KEY_ATTR_IPV6)))
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
+		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
+		if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX)
+			return -EINVAL;
+		swkey->ipv6.label = ipv6_key->ipv6_label;
+		swkey->ip.proto = ipv6_key->ipv6_proto;
+		swkey->ip.tos = ipv6_key->ipv6_tclass;
+		swkey->ip.ttl = ipv6_key->ipv6_hlimit;
+		swkey->ip.frag = ipv6_key->ipv6_frag;
+		memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
+		       sizeof(swkey->ipv6.addr.src));
+		memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
+		       sizeof(swkey->ipv6.addr.dst));
+
+		if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+			err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
+			if (err)
+				return err;
+		}
+	} else if (swkey->eth.type == htons(ETH_P_ARP)) {
+		const struct ovs_key_arp *arp_key;
+
+		if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
+		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+		swkey->ipv4.addr.src = arp_key->arp_sip;
+		swkey->ipv4.addr.dst = arp_key->arp_tip;
+		if (arp_key->arp_op & htons(0xff00))
+			return -EINVAL;
+		swkey->ip.proto = ntohs(arp_key->arp_op);
+		memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
+		memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
+	}
+
+	if (attrs)
+		return -EINVAL;
+	*key_lenp = key_len;
+
+	return 0;
+}
+
+/**
+ * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
+ * @in_port: receives the extracted input port.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ *
+ * This parses a series of Netlink attributes that form a flow key, which must
+ * take the same form accepted by flow_from_nlattrs(), but only enough of it to
+ * get the metadata, that is, the parts of the flow key that cannot be
+ * extracted from the packet itself.
+ */
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+			       const struct nlattr *attr)
+{
+	const struct nlattr *nla;
+	int rem;
+
+	*in_port = USHRT_MAX;
+	*priority = 0;
+
+	nla_for_each_nested(nla, attr, rem) {
+		int type = nla_type(nla);
+
+		if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
+			if (nla_len(nla) != ovs_key_lens[type])
+				return -EINVAL;
+
+			switch (type) {
+			case OVS_KEY_ATTR_PRIORITY:
+				*priority = nla_get_u32(nla);
+				break;
+
+			case OVS_KEY_ATTR_IN_PORT:
+				if (nla_get_u32(nla) >= DP_MAX_PORTS)
+					return -EINVAL;
+				*in_port = nla_get_u32(nla);
+				break;
+			}
+		}
+	}
+	if (rem)
+		return -EINVAL;
+	return 0;
+}
+
+int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+{
+	struct ovs_key_ethernet *eth_key;
+	struct nlattr *nla, *encap;
+
+	if (swkey->phy.priority)
+		NLA_PUT_U32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority);
+
+	if (swkey->phy.in_port != USHRT_MAX)
+		NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port);
+
+	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+	if (!nla)
+		goto nla_put_failure;
+	eth_key = nla_data(nla);
+	memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN);
+	memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN);
+
+	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
+		NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q));
+		NLA_PUT_BE16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci);
+		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+		if (!swkey->eth.tci)
+			goto unencap;
+	} else {
+		encap = NULL;
+	}
+
+	if (swkey->eth.type == htons(ETH_P_802_2))
+		goto unencap;
+
+	NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type);
+
+	if (swkey->eth.type == htons(ETH_P_IP)) {
+		struct ovs_key_ipv4 *ipv4_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv4_key = nla_data(nla);
+		ipv4_key->ipv4_src = swkey->ipv4.addr.src;
+		ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
+		ipv4_key->ipv4_proto = swkey->ip.proto;
+		ipv4_key->ipv4_tos = swkey->ip.tos;
+		ipv4_key->ipv4_ttl = swkey->ip.ttl;
+		ipv4_key->ipv4_frag = swkey->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+		struct ovs_key_ipv6 *ipv6_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv6_key = nla_data(nla);
+		memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src,
+				sizeof(ipv6_key->ipv6_src));
+		memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
+				sizeof(ipv6_key->ipv6_dst));
+		ipv6_key->ipv6_label = swkey->ipv6.label;
+		ipv6_key->ipv6_proto = swkey->ip.proto;
+		ipv6_key->ipv6_tclass = swkey->ip.tos;
+		ipv6_key->ipv6_hlimit = swkey->ip.ttl;
+		ipv6_key->ipv6_frag = swkey->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_ARP)) {
+		struct ovs_key_arp *arp_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
+		if (!nla)
+			goto nla_put_failure;
+		arp_key = nla_data(nla);
+		memset(arp_key, 0, sizeof(struct ovs_key_arp));
+		arp_key->arp_sip = swkey->ipv4.addr.src;
+		arp_key->arp_tip = swkey->ipv4.addr.dst;
+		arp_key->arp_op = htons(swkey->ip.proto);
+		memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
+		memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
+	}
+
+	if ((swkey->eth.type == htons(ETH_P_IP) ||
+	     swkey->eth.type == htons(ETH_P_IPV6)) &&
+	     swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+
+		if (swkey->ip.proto == IPPROTO_TCP) {
+			struct ovs_key_tcp *tcp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
+			if (!nla)
+				goto nla_put_failure;
+			tcp_key = nla_data(nla);
+			if (swkey->eth.type == htons(ETH_P_IP)) {
+				tcp_key->tcp_src = swkey->ipv4.tp.src;
+				tcp_key->tcp_dst = swkey->ipv4.tp.dst;
+			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+				tcp_key->tcp_src = swkey->ipv6.tp.src;
+				tcp_key->tcp_dst = swkey->ipv6.tp.dst;
+			}
+		} else if (swkey->ip.proto == IPPROTO_UDP) {
+			struct ovs_key_udp *udp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
+			if (!nla)
+				goto nla_put_failure;
+			udp_key = nla_data(nla);
+			if (swkey->eth.type == htons(ETH_P_IP)) {
+				udp_key->udp_src = swkey->ipv4.tp.src;
+				udp_key->udp_dst = swkey->ipv4.tp.dst;
+			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+				udp_key->udp_src = swkey->ipv6.tp.src;
+				udp_key->udp_dst = swkey->ipv6.tp.dst;
+			}
+		} else if (swkey->eth.type == htons(ETH_P_IP) &&
+			   swkey->ip.proto == IPPROTO_ICMP) {
+			struct ovs_key_icmp *icmp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
+			if (!nla)
+				goto nla_put_failure;
+			icmp_key = nla_data(nla);
+			icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src);
+			icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst);
+		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
+			   swkey->ip.proto == IPPROTO_ICMPV6) {
+			struct ovs_key_icmpv6 *icmpv6_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
+						sizeof(*icmpv6_key));
+			if (!nla)
+				goto nla_put_failure;
+			icmpv6_key = nla_data(nla);
+			icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src);
+			icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst);
+
+			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+				struct ovs_key_nd *nd_key;
+
+				nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
+				if (!nla)
+					goto nla_put_failure;
+				nd_key = nla_data(nla);
+				memcpy(nd_key->nd_target, &swkey->ipv6.nd.target,
+							sizeof(nd_key->nd_target));
+				memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN);
+				memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN);
+			}
+		}
+	}
+
+unencap:
+	if (encap)
+		nla_nest_end(skb, encap);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+/* Initializes the flow module.
+ * Returns zero if successful or a negative error code. */
+int ovs_flow_init(void)
+{
+	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
+					0, NULL);
+	if (flow_cache == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/* Uninitializes the flow module. */
+void ovs_flow_exit(void)
+{
+	kmem_cache_destroy(flow_cache);
+}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
new file mode 100644
index 000000000000..2747dc2c4ac1
--- /dev/null
+++ b/net/openvswitch/flow.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef FLOW_H
+#define FLOW_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+#include <net/inet_ecn.h>
+
+struct sk_buff;
+
+struct sw_flow_actions {
+	struct rcu_head rcu;
+	u32 actions_len;
+	struct nlattr actions[];
+};
+
+struct sw_flow_key {
+	struct {
+		u32	priority;	/* Packet QoS priority. */
+		u16	in_port;	/* Input switch port (or USHRT_MAX). */
+	} phy;
+	struct {
+		u8     src[ETH_ALEN];	/* Ethernet source address. */
+		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
+		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+		__be16 type;		/* Ethernet frame type. */
+	} eth;
+	struct {
+		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
+		u8     tos;		/* IP ToS. */
+		u8     ttl;		/* IP TTL/hop limit. */
+		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
+	} ip;
+	union {
+		struct {
+			struct {
+				__be32 src;	/* IP source address. */
+				__be32 dst;	/* IP destination address. */
+			} addr;
+			union {
+				struct {
+					__be16 src;		/* TCP/UDP source port. */
+					__be16 dst;		/* TCP/UDP destination port. */
+				} tp;
+				struct {
+					u8 sha[ETH_ALEN];	/* ARP source hardware address. */
+					u8 tha[ETH_ALEN];	/* ARP target hardware address. */
+				} arp;
+			};
+		} ipv4;
+		struct {
+			struct {
+				struct in6_addr src;	/* IPv6 source address. */
+				struct in6_addr dst;	/* IPv6 destination address. */
+			} addr;
+			__be32 label;			/* IPv6 flow label. */
+			struct {
+				__be16 src;		/* TCP/UDP source port. */
+				__be16 dst;		/* TCP/UDP destination port. */
+			} tp;
+			struct {
+				struct in6_addr target;	/* ND target address. */
+				u8 sll[ETH_ALEN];	/* ND source link layer address. */
+				u8 tll[ETH_ALEN];	/* ND target link layer address. */
+			} nd;
+		} ipv6;
+	};
+};
+
+struct sw_flow {
+	struct rcu_head rcu;
+	struct hlist_node hash_node[2];
+	u32 hash;
+
+	struct sw_flow_key key;
+	struct sw_flow_actions __rcu *sf_acts;
+
+	spinlock_t lock;	/* Lock for values below. */
+	unsigned long used;	/* Last used time (in jiffies). */
+	u64 packet_count;	/* Number of packets matched. */
+	u64 byte_count;		/* Number of bytes matched. */
+	u8 tcp_flags;		/* Union of seen TCP flags. */
+};
+
+struct arp_eth_header {
+	__be16      ar_hrd;	/* format of hardware address   */
+	__be16      ar_pro;	/* format of protocol address   */
+	unsigned char   ar_hln;	/* length of hardware address   */
+	unsigned char   ar_pln;	/* length of protocol address   */
+	__be16      ar_op;	/* ARP opcode (command)     */
+
+	/* Ethernet+IPv4 specific members. */
+	unsigned char       ar_sha[ETH_ALEN];	/* sender hardware address  */
+	unsigned char       ar_sip[4];		/* sender IP address        */
+	unsigned char       ar_tha[ETH_ALEN];	/* target hardware address  */
+	unsigned char       ar_tip[4];		/* target IP address        */
+} __packed;
+
+int ovs_flow_init(void);
+void ovs_flow_exit(void);
+
+struct sw_flow *ovs_flow_alloc(void);
+void ovs_flow_deferred_free(struct sw_flow *);
+void ovs_flow_free(struct sw_flow *flow);
+
+struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *);
+void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
+
+int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
+		     int *key_lenp);
+void ovs_flow_used(struct sw_flow *, struct sk_buff *);
+u64 ovs_flow_used_time(unsigned long flow_jiffies);
+
+/* Upper bound on the length of a nlattr-formatted flow key.  The longest
+ * nlattr-formatted flow key would be:
+ *
+ *                         struct  pad  nl hdr  total
+ *                         ------  ---  ------  -----
+ *  OVS_KEY_ATTR_PRIORITY      4    --     4      8
+ *  OVS_KEY_ATTR_IN_PORT       4    --     4      8
+ *  OVS_KEY_ATTR_ETHERNET     12    --     4     16
+ *  OVS_KEY_ATTR_8021Q         4    --     4      8
+ *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8
+ *  OVS_KEY_ATTR_IPV6         40    --     4     44
+ *  OVS_KEY_ATTR_ICMPV6        2     2     4      8
+ *  OVS_KEY_ATTR_ND           28    --     4     32
+ *  -------------------------------------------------
+ *  total                                       132
+ */
+#define FLOW_BUFSIZE 132
+
+int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
+int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+		      const struct nlattr *);
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+			       const struct nlattr *);
+
+#define TBL_MIN_BUCKETS		1024
+
+struct flow_table {
+	struct flex_array *buckets;
+	unsigned int count, n_buckets;
+	struct rcu_head rcu;
+	int node_ver;
+	u32 hash_seed;
+	bool keep_flows;
+};
+
+static inline int ovs_flow_tbl_count(struct flow_table *table)
+{
+	return table->count;
+}
+
+static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
+{
+	return (table->count > table->n_buckets);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
+				    struct sw_flow_key *key, int len);
+void ovs_flow_tbl_destroy(struct flow_table *table);
+void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
+struct flow_table *ovs_flow_tbl_alloc(int new_size);
+struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
+struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
+void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow);
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
+u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len);
+
+struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx);
+extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
+
+#endif /* flow.h */
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
new file mode 100644
index 000000000000..8fc28b86f2b3
--- /dev/null
+++ b/net/openvswitch/vport-internal_dev.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/hardirq.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+#include <linux/version.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+struct internal_dev {
+	struct vport *vport;
+};
+
+static struct internal_dev *internal_dev_priv(struct net_device *netdev)
+{
+	return netdev_priv(netdev);
+}
+
+/* This function is only called by the kernel network layer.*/
+static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netdev,
+							struct rtnl_link_stats64 *stats)
+{
+	struct vport *vport = ovs_internal_dev_get_vport(netdev);
+	struct ovs_vport_stats vport_stats;
+
+	ovs_vport_get_stats(vport, &vport_stats);
+
+	/* The tx and rx stats need to be swapped because the
+	 * switch and host OS have opposite perspectives. */
+	stats->rx_packets	= vport_stats.tx_packets;
+	stats->tx_packets	= vport_stats.rx_packets;
+	stats->rx_bytes		= vport_stats.tx_bytes;
+	stats->tx_bytes		= vport_stats.rx_bytes;
+	stats->rx_errors	= vport_stats.tx_errors;
+	stats->tx_errors	= vport_stats.rx_errors;
+	stats->rx_dropped	= vport_stats.tx_dropped;
+	stats->tx_dropped	= vport_stats.rx_dropped;
+
+	return stats;
+}
+
+static int internal_dev_mac_addr(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	return 0;
+}
+
+/* Called with rcu_read_lock_bh. */
+static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	rcu_read_lock();
+	ovs_vport_receive(internal_dev_priv(netdev)->vport, skb);
+	rcu_read_unlock();
+	return 0;
+}
+
+static int internal_dev_open(struct net_device *netdev)
+{
+	netif_start_queue(netdev);
+	return 0;
+}
+
+static int internal_dev_stop(struct net_device *netdev)
+{
+	netif_stop_queue(netdev);
+	return 0;
+}
+
+static void internal_dev_getinfo(struct net_device *netdev,
+				 struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "openvswitch");
+}
+
+static const struct ethtool_ops internal_dev_ethtool_ops = {
+	.get_drvinfo	= internal_dev_getinfo,
+	.get_link	= ethtool_op_get_link,
+};
+
+static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	if (new_mtu < 68)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+	return 0;
+}
+
+static void internal_dev_destructor(struct net_device *dev)
+{
+	struct vport *vport = ovs_internal_dev_get_vport(dev);
+
+	ovs_vport_free(vport);
+	free_netdev(dev);
+}
+
+static const struct net_device_ops internal_dev_netdev_ops = {
+	.ndo_open = internal_dev_open,
+	.ndo_stop = internal_dev_stop,
+	.ndo_start_xmit = internal_dev_xmit,
+	.ndo_set_mac_address = internal_dev_mac_addr,
+	.ndo_change_mtu = internal_dev_change_mtu,
+	.ndo_get_stats64 = internal_dev_get_stats,
+};
+
+static void do_setup(struct net_device *netdev)
+{
+	ether_setup(netdev);
+
+	netdev->netdev_ops = &internal_dev_netdev_ops;
+
+	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	netdev->destructor = internal_dev_destructor;
+	SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
+	netdev->tx_queue_len = 0;
+
+	netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
+				NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
+
+	netdev->vlan_features = netdev->features;
+	netdev->features |= NETIF_F_HW_VLAN_TX;
+	netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+	random_ether_addr(netdev->dev_addr);
+}
+
+static struct vport *internal_dev_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+	struct netdev_vport *netdev_vport;
+	struct internal_dev *internal_dev;
+	int err;
+
+	vport = ovs_vport_alloc(sizeof(struct netdev_vport),
+				&ovs_internal_vport_ops, parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		goto error;
+	}
+
+	netdev_vport = netdev_vport_priv(vport);
+
+	netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
+					 parms->name, do_setup);
+	if (!netdev_vport->dev) {
+		err = -ENOMEM;
+		goto error_free_vport;
+	}
+
+	internal_dev = internal_dev_priv(netdev_vport->dev);
+	internal_dev->vport = vport;
+
+	err = register_netdevice(netdev_vport->dev);
+	if (err)
+		goto error_free_netdev;
+
+	dev_set_promiscuity(netdev_vport->dev, 1);
+	netif_start_queue(netdev_vport->dev);
+
+	return vport;
+
+error_free_netdev:
+	free_netdev(netdev_vport->dev);
+error_free_vport:
+	ovs_vport_free(vport);
+error:
+	return ERR_PTR(err);
+}
+
+static void internal_dev_destroy(struct vport *vport)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+	netif_stop_queue(netdev_vport->dev);
+	dev_set_promiscuity(netdev_vport->dev, -1);
+
+	/* unregister_netdevice() waits for an RCU grace period. */
+	unregister_netdevice(netdev_vport->dev);
+}
+
+static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
+{
+	struct net_device *netdev = netdev_vport_priv(vport)->dev;
+	int len;
+
+	len = skb->len;
+	skb->dev = netdev;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, netdev);
+
+	netif_rx(skb);
+
+	return len;
+}
+
+const struct vport_ops ovs_internal_vport_ops = {
+	.type		= OVS_VPORT_TYPE_INTERNAL,
+	.create		= internal_dev_create,
+	.destroy	= internal_dev_destroy,
+	.get_name	= ovs_netdev_get_name,
+	.get_ifindex	= ovs_netdev_get_ifindex,
+	.send		= internal_dev_recv,
+};
+
+int ovs_is_internal_dev(const struct net_device *netdev)
+{
+	return netdev->netdev_ops == &internal_dev_netdev_ops;
+}
+
+struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
+{
+	if (!ovs_is_internal_dev(netdev))
+		return NULL;
+
+	return internal_dev_priv(netdev)->vport;
+}
diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h
new file mode 100644
index 000000000000..3454447c5f11
--- /dev/null
+++ b/net/openvswitch/vport-internal_dev.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_INTERNAL_DEV_H
+#define VPORT_INTERNAL_DEV_H 1
+
+#include "datapath.h"
+#include "vport.h"
+
+int ovs_is_internal_dev(const struct net_device *);
+struct vport *ovs_internal_dev_get_vport(struct net_device *);
+
+#endif /* vport-internal_dev.h */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
new file mode 100644
index 000000000000..c1068aed03d1
--- /dev/null
+++ b/net/openvswitch/vport-netdev.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if_arp.h>
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/llc.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <net/llc.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+/* Must be called with rcu_read_lock. */
+static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
+{
+	if (unlikely(!vport)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* Make our own copy of the packet.  Otherwise we will mangle the
+	 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
+	 * (No one comes after us, since we tell handle_bridge() that we took
+	 * the packet.) */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return;
+
+	skb_push(skb, ETH_HLEN);
+	ovs_vport_receive(vport, skb);
+}
+
+/* Called with rcu_read_lock and bottom-halves disabled. */
+static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct vport *vport;
+
+	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+		return RX_HANDLER_PASS;
+
+	vport = ovs_netdev_get_vport(skb->dev);
+
+	netdev_port_receive(vport, skb);
+
+	return RX_HANDLER_CONSUMED;
+}
+
+static struct vport *netdev_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+	struct netdev_vport *netdev_vport;
+	int err;
+
+	vport = ovs_vport_alloc(sizeof(struct netdev_vport),
+				&ovs_netdev_vport_ops, parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		goto error;
+	}
+
+	netdev_vport = netdev_vport_priv(vport);
+
+	netdev_vport->dev = dev_get_by_name(&init_net, parms->name);
+	if (!netdev_vport->dev) {
+		err = -ENODEV;
+		goto error_free_vport;
+	}
+
+	if (netdev_vport->dev->flags & IFF_LOOPBACK ||
+	    netdev_vport->dev->type != ARPHRD_ETHER ||
+	    ovs_is_internal_dev(netdev_vport->dev)) {
+		err = -EINVAL;
+		goto error_put;
+	}
+
+	err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
+					 vport);
+	if (err)
+		goto error_put;
+
+	dev_set_promiscuity(netdev_vport->dev, 1);
+	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
+
+	return vport;
+
+error_put:
+	dev_put(netdev_vport->dev);
+error_free_vport:
+	ovs_vport_free(vport);
+error:
+	return ERR_PTR(err);
+}
+
+static void netdev_destroy(struct vport *vport)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+	netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
+	netdev_rx_handler_unregister(netdev_vport->dev);
+	dev_set_promiscuity(netdev_vport->dev, -1);
+
+	synchronize_rcu();
+
+	dev_put(netdev_vport->dev);
+	ovs_vport_free(vport);
+}
+
+const char *ovs_netdev_get_name(const struct vport *vport)
+{
+	const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+	return netdev_vport->dev->name;
+}
+
+int ovs_netdev_get_ifindex(const struct vport *vport)
+{
+	const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+	return netdev_vport->dev->ifindex;
+}
+
+static unsigned packet_length(const struct sk_buff *skb)
+{
+	unsigned length = skb->len - ETH_HLEN;
+
+	if (skb->protocol == htons(ETH_P_8021Q))
+		length -= VLAN_HLEN;
+
+	return length;
+}
+
+static int netdev_send(struct vport *vport, struct sk_buff *skb)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+	int mtu = netdev_vport->dev->mtu;
+	int len;
+
+	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+		if (net_ratelimit())
+			pr_warn("%s: dropped over-mtu packet: %d > %d\n",
+				ovs_dp_name(vport->dp), packet_length(skb), mtu);
+		goto error;
+	}
+
+	if (unlikely(skb_warn_if_lro(skb)))
+		goto error;
+
+	skb->dev = netdev_vport->dev;
+	len = skb->len;
+	dev_queue_xmit(skb);
+
+	return len;
+
+error:
+	kfree_skb(skb);
+	ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
+	return 0;
+}
+
+/* Returns null if this device is not attached to a datapath. */
+struct vport *ovs_netdev_get_vport(struct net_device *dev)
+{
+	if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
+		return (struct vport *)
+			rcu_dereference_rtnl(dev->rx_handler_data);
+	else
+		return NULL;
+}
+
+const struct vport_ops ovs_netdev_vport_ops = {
+	.type		= OVS_VPORT_TYPE_NETDEV,
+	.create		= netdev_create,
+	.destroy	= netdev_destroy,
+	.get_name	= ovs_netdev_get_name,
+	.get_ifindex	= ovs_netdev_get_ifindex,
+	.send		= netdev_send,
+};
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
new file mode 100644
index 000000000000..fd9b008a0e6e
--- /dev/null
+++ b/net/openvswitch/vport-netdev.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_NETDEV_H
+#define VPORT_NETDEV_H 1
+
+#include <linux/netdevice.h>
+
+#include "vport.h"
+
+struct vport *ovs_netdev_get_vport(struct net_device *dev);
+
+struct netdev_vport {
+	struct net_device *dev;
+};
+
+static inline struct netdev_vport *
+netdev_vport_priv(const struct vport *vport)
+{
+	return vport_priv(vport);
+}
+
+const char *ovs_netdev_get_name(const struct vport *);
+const char *ovs_netdev_get_config(const struct vport *);
+int ovs_netdev_get_ifindex(const struct vport *);
+
+#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
new file mode 100644
index 000000000000..6cd760131f15
--- /dev/null
+++ b/net/openvswitch/vport.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/dcache.h>
+#include <linux/etherdevice.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+
+#include "vport.h"
+#include "vport-internal_dev.h"
+
+/* List of statically compiled vport implementations.  Don't forget to also
+ * add yours to the list at the bottom of vport.h. */
+static const struct vport_ops *vport_ops_list[] = {
+	&ovs_netdev_vport_ops,
+	&ovs_internal_vport_ops,
+};
+
+/* Protected by RCU read lock for reading, RTNL lock for writing. */
+static struct hlist_head *dev_table;
+#define VPORT_HASH_BUCKETS 1024
+
+/**
+ *	ovs_vport_init - initialize vport subsystem
+ *
+ * Called at module load time to initialize the vport subsystem.
+ */
+int ovs_vport_init(void)
+{
+	dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
+			    GFP_KERNEL);
+	if (!dev_table)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ *	ovs_vport_exit - shutdown vport subsystem
+ *
+ * Called at module exit time to shutdown the vport subsystem.
+ */
+void ovs_vport_exit(void)
+{
+	kfree(dev_table);
+}
+
+static struct hlist_head *hash_bucket(const char *name)
+{
+	unsigned int hash = full_name_hash(name, strlen(name));
+	return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
+}
+
+/**
+ *	ovs_vport_locate - find a port that has already been created
+ *
+ * @name: name of port to find
+ *
+ * Must be called with RTNL or RCU read lock.
+ */
+struct vport *ovs_vport_locate(const char *name)
+{
+	struct hlist_head *bucket = hash_bucket(name);
+	struct vport *vport;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
+		if (!strcmp(name, vport->ops->get_name(vport)))
+			return vport;
+
+	return NULL;
+}
+
+/**
+ *	ovs_vport_alloc - allocate and initialize new vport
+ *
+ * @priv_size: Size of private data area to allocate.
+ * @ops: vport device ops
+ *
+ * Allocate and initialize a new vport defined by @ops.  The vport will contain
+ * a private data area of size @priv_size that can be accessed using
+ * vport_priv().  vports that are no longer needed should be released with
+ * vport_free().
+ */
+struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
+			  const struct vport_parms *parms)
+{
+	struct vport *vport;
+	size_t alloc_size;
+
+	alloc_size = sizeof(struct vport);
+	if (priv_size) {
+		alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
+		alloc_size += priv_size;
+	}
+
+	vport = kzalloc(alloc_size, GFP_KERNEL);
+	if (!vport)
+		return ERR_PTR(-ENOMEM);
+
+	vport->dp = parms->dp;
+	vport->port_no = parms->port_no;
+	vport->upcall_pid = parms->upcall_pid;
+	vport->ops = ops;
+
+	vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
+	if (!vport->percpu_stats)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&vport->stats_lock);
+
+	return vport;
+}
+
+/**
+ *	ovs_vport_free - uninitialize and free vport
+ *
+ * @vport: vport to free
+ *
+ * Frees a vport allocated with vport_alloc() when it is no longer needed.
+ *
+ * The caller must ensure that an RCU grace period has passed since the last
+ * time @vport was in a datapath.
+ */
+void ovs_vport_free(struct vport *vport)
+{
+	free_percpu(vport->percpu_stats);
+	kfree(vport);
+}
+
+/**
+ *	ovs_vport_add - add vport device (for kernel callers)
+ *
+ * @parms: Information about new vport.
+ *
+ * Creates a new vport with the specified configuration (which is dependent on
+ * device type).  RTNL lock must be held.
+ */
+struct vport *ovs_vport_add(const struct vport_parms *parms)
+{
+	struct vport *vport;
+	int err = 0;
+	int i;
+
+	ASSERT_RTNL();
+
+	for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
+		if (vport_ops_list[i]->type == parms->type) {
+			vport = vport_ops_list[i]->create(parms);
+			if (IS_ERR(vport)) {
+				err = PTR_ERR(vport);
+				goto out;
+			}
+
+			hlist_add_head_rcu(&vport->hash_node,
+					   hash_bucket(vport->ops->get_name(vport)));
+			return vport;
+		}
+	}
+
+	err = -EAFNOSUPPORT;
+
+out:
+	return ERR_PTR(err);
+}
+
+/**
+ *	ovs_vport_set_options - modify existing vport device (for kernel callers)
+ *
+ * @vport: vport to modify.
+ * @port: New configuration.
+ *
+ * Modifies an existing device with the specified configuration (which is
+ * dependent on device type).  RTNL lock must be held.
+ */
+int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
+{
+	ASSERT_RTNL();
+
+	if (!vport->ops->set_options)
+		return -EOPNOTSUPP;
+	return vport->ops->set_options(vport, options);
+}
+
+/**
+ *	ovs_vport_del - delete existing vport device
+ *
+ * @vport: vport to delete.
+ *
+ * Detaches @vport from its datapath and destroys it.  It is possible to fail
+ * for reasons such as lack of memory.  RTNL lock must be held.
+ */
+void ovs_vport_del(struct vport *vport)
+{
+	ASSERT_RTNL();
+
+	hlist_del_rcu(&vport->hash_node);
+
+	vport->ops->destroy(vport);
+}
+
+/**
+ *	ovs_vport_get_stats - retrieve device stats
+ *
+ * @vport: vport from which to retrieve the stats
+ * @stats: location to store stats
+ *
+ * Retrieves transmit, receive, and error stats for the given device.
+ *
+ * Must be called with RTNL lock or rcu_read_lock.
+ */
+void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
+{
+	int i;
+
+	memset(stats, 0, sizeof(*stats));
+
+	/* We potentially have 2 sources of stats that need to be combined:
+	 * those we have collected (split into err_stats and percpu_stats) from
+	 * set_stats() and device error stats from netdev->get_stats() (for
+	 * errors that happen  downstream and therefore aren't reported through
+	 * our vport_record_error() function).
+	 * Stats from first source are reported by ovs (OVS_VPORT_ATTR_STATS).
+	 * netdev-stats can be directly read over netlink-ioctl.
+	 */
+
+	spin_lock_bh(&vport->stats_lock);
+
+	stats->rx_errors	= vport->err_stats.rx_errors;
+	stats->tx_errors	= vport->err_stats.tx_errors;
+	stats->tx_dropped	= vport->err_stats.tx_dropped;
+	stats->rx_dropped	= vport->err_stats.rx_dropped;
+
+	spin_unlock_bh(&vport->stats_lock);
+
+	for_each_possible_cpu(i) {
+		const struct vport_percpu_stats *percpu_stats;
+		struct vport_percpu_stats local_stats;
+		unsigned int start;
+
+		percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
+
+		do {
+			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+			local_stats = *percpu_stats;
+		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+
+		stats->rx_bytes		+= local_stats.rx_bytes;
+		stats->rx_packets	+= local_stats.rx_packets;
+		stats->tx_bytes		+= local_stats.tx_bytes;
+		stats->tx_packets	+= local_stats.tx_packets;
+	}
+}
+
+/**
+ *	ovs_vport_get_options - retrieve device options
+ *
+ * @vport: vport from which to retrieve the options.
+ * @skb: sk_buff where options should be appended.
+ *
+ * Retrieves the configuration of the given device, appending an
+ * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
+ * vport-specific attributes to @skb.
+ *
+ * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
+ * negative error code if a real error occurred.  If an error occurs, @skb is
+ * left unmodified.
+ *
+ * Must be called with RTNL lock or rcu_read_lock.
+ */
+int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
+{
+	struct nlattr *nla;
+
+	nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (vport->ops->get_options) {
+		int err = vport->ops->get_options(vport, skb);
+		if (err) {
+			nla_nest_cancel(skb, nla);
+			return err;
+		}
+	}
+
+	nla_nest_end(skb, nla);
+	return 0;
+}
+
+/**
+ *	ovs_vport_receive - pass up received packet to the datapath for processing
+ *
+ * @vport: vport that received the packet
+ * @skb: skb that was received
+ *
+ * Must be called with rcu_read_lock.  The packet cannot be shared and
+ * skb->data should point to the Ethernet header.  The caller must have already
+ * called compute_ip_summed() to initialize the checksumming fields.
+ */
+void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
+{
+	struct vport_percpu_stats *stats;
+
+	stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+	u64_stats_update_begin(&stats->sync);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+	u64_stats_update_end(&stats->sync);
+
+	ovs_dp_process_received_packet(vport, skb);
+}
+
+/**
+ *	ovs_vport_send - send a packet on a device
+ *
+ * @vport: vport on which to send the packet
+ * @skb: skb to send
+ *
+ * Sends the given packet and returns the length of data sent.  Either RTNL
+ * lock or rcu_read_lock must be held.
+ */
+int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+{
+	int sent = vport->ops->send(vport, skb);
+
+	if (likely(sent)) {
+		struct vport_percpu_stats *stats;
+
+		stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+		u64_stats_update_begin(&stats->sync);
+		stats->tx_packets++;
+		stats->tx_bytes += sent;
+		u64_stats_update_end(&stats->sync);
+	}
+	return sent;
+}
+
+/**
+ *	ovs_vport_record_error - indicate device error to generic stats layer
+ *
+ * @vport: vport that encountered the error
+ * @err_type: one of enum vport_err_type types to indicate the error type
+ *
+ * If using the vport generic stats layer indicate that an error of the given
+ * type has occured.
+ */
+void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type)
+{
+	spin_lock(&vport->stats_lock);
+
+	switch (err_type) {
+	case VPORT_E_RX_DROPPED:
+		vport->err_stats.rx_dropped++;
+		break;
+
+	case VPORT_E_RX_ERROR:
+		vport->err_stats.rx_errors++;
+		break;
+
+	case VPORT_E_TX_DROPPED:
+		vport->err_stats.tx_dropped++;
+		break;
+
+	case VPORT_E_TX_ERROR:
+		vport->err_stats.tx_errors++;
+		break;
+	};
+
+	spin_unlock(&vport->stats_lock);
+}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
new file mode 100644
index 000000000000..19609629dabd
--- /dev/null
+++ b/net/openvswitch/vport.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_H
+#define VPORT_H 1
+
+#include <linux/list.h>
+#include <linux/openvswitch.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/u64_stats_sync.h>
+
+#include "datapath.h"
+
+struct vport;
+struct vport_parms;
+
+/* The following definitions are for users of the vport subsytem: */
+
+int ovs_vport_init(void);
+void ovs_vport_exit(void);
+
+struct vport *ovs_vport_add(const struct vport_parms *);
+void ovs_vport_del(struct vport *);
+
+struct vport *ovs_vport_locate(const char *name);
+
+void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
+
+int ovs_vport_set_options(struct vport *, struct nlattr *options);
+int ovs_vport_get_options(const struct vport *, struct sk_buff *);
+
+int ovs_vport_send(struct vport *, struct sk_buff *);
+
+/* The following definitions are for implementers of vport devices: */
+
+struct vport_percpu_stats {
+	u64 rx_bytes;
+	u64 rx_packets;
+	u64 tx_bytes;
+	u64 tx_packets;
+	struct u64_stats_sync sync;
+};
+
+struct vport_err_stats {
+	u64 rx_dropped;
+	u64 rx_errors;
+	u64 tx_dropped;
+	u64 tx_errors;
+};
+
+/**
+ * struct vport - one port within a datapath
+ * @rcu: RCU callback head for deferred destruction.
+ * @port_no: Index into @dp's @ports array.
+ * @dp: Datapath to which this port belongs.
+ * @node: Element in @dp's @port_list.
+ * @upcall_pid: The Netlink port to use for packets received on this port that
+ * miss the flow table.
+ * @hash_node: Element in @dev_table hash table in vport.c.
+ * @ops: Class structure.
+ * @percpu_stats: Points to per-CPU statistics used and maintained by vport
+ * @stats_lock: Protects @err_stats;
+ * @err_stats: Points to error statistics used and maintained by vport
+ */
+struct vport {
+	struct rcu_head rcu;
+	u16 port_no;
+	struct datapath	*dp;
+	struct list_head node;
+	u32 upcall_pid;
+
+	struct hlist_node hash_node;
+	const struct vport_ops *ops;
+
+	struct vport_percpu_stats __percpu *percpu_stats;
+
+	spinlock_t stats_lock;
+	struct vport_err_stats err_stats;
+};
+
+/**
+ * struct vport_parms - parameters for creating a new vport
+ *
+ * @name: New vport's name.
+ * @type: New vport's type.
+ * @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if
+ * none was supplied.
+ * @dp: New vport's datapath.
+ * @port_no: New vport's port number.
+ */
+struct vport_parms {
+	const char *name;
+	enum ovs_vport_type type;
+	struct nlattr *options;
+
+	/* For ovs_vport_alloc(). */
+	struct datapath *dp;
+	u16 port_no;
+	u32 upcall_pid;
+};
+
+/**
+ * struct vport_ops - definition of a type of virtual port
+ *
+ * @type: %OVS_VPORT_TYPE_* value for this type of virtual port.
+ * @create: Create a new vport configured as specified.  On success returns
+ * a new vport allocated with ovs_vport_alloc(), otherwise an ERR_PTR() value.
+ * @destroy: Destroys a vport.  Must call vport_free() on the vport but not
+ * before an RCU grace period has elapsed.
+ * @set_options: Modify the configuration of an existing vport.  May be %NULL
+ * if modification is not supported.
+ * @get_options: Appends vport-specific attributes for the configuration of an
+ * existing vport to a &struct sk_buff.  May be %NULL for a vport that does not
+ * have any configuration.
+ * @get_name: Get the device's name.
+ * @get_config: Get the device's configuration.
+ * @get_ifindex: Get the system interface index associated with the device.
+ * May be null if the device does not have an ifindex.
+ * @send: Send a packet on the device.  Returns the length of the packet sent.
+ */
+struct vport_ops {
+	enum ovs_vport_type type;
+
+	/* Called with RTNL lock. */
+	struct vport *(*create)(const struct vport_parms *);
+	void (*destroy)(struct vport *);
+
+	int (*set_options)(struct vport *, struct nlattr *);
+	int (*get_options)(const struct vport *, struct sk_buff *);
+
+	/* Called with rcu_read_lock or RTNL lock. */
+	const char *(*get_name)(const struct vport *);
+	void (*get_config)(const struct vport *, void *);
+	int (*get_ifindex)(const struct vport *);
+
+	int (*send)(struct vport *, struct sk_buff *);
+};
+
+enum vport_err_type {
+	VPORT_E_RX_DROPPED,
+	VPORT_E_RX_ERROR,
+	VPORT_E_TX_DROPPED,
+	VPORT_E_TX_ERROR,
+};
+
+struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
+			      const struct vport_parms *);
+void ovs_vport_free(struct vport *);
+
+#define VPORT_ALIGN 8
+
+/**
+ *	vport_priv - access private data area of vport
+ *
+ * @vport: vport to access
+ *
+ * If a nonzero size was passed in priv_size of vport_alloc() a private data
+ * area was allocated on creation.  This allows that area to be accessed and
+ * used for any purpose needed by the vport implementer.
+ */
+static inline void *vport_priv(const struct vport *vport)
+{
+	return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
+}
+
+/**
+ *	vport_from_priv - lookup vport from private data pointer
+ *
+ * @priv: Start of private data area.
+ *
+ * It is sometimes useful to translate from a pointer to the private data
+ * area to the vport, such as in the case where the private data pointer is
+ * the result of a hash table lookup.  @priv must point to the start of the
+ * private data area.
+ */
+static inline struct vport *vport_from_priv(const void *priv)
+{
+	return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
+}
+
+void ovs_vport_receive(struct vport *, struct sk_buff *);
+void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
+
+/* List of statically compiled vport implementations.  Don't forget to also
+ * add yours to the list at the top of vport.c. */
+extern const struct vport_ops ovs_netdev_vport_ops;
+extern const struct vport_ops ovs_internal_vport_ops;
+
+#endif /* vport.h */
-- 
cgit v1.2.3


From a67ba43d30bf8c1cfdc2615439455302d2408453 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 1 Dec 2011 12:54:31 -0500
Subject: asm-generic/unistd.h: support new process_vm_{readv,write} syscalls

Also prototype the "compat" functions so they can be referenced
from C code.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/unistd.h | 8 +++++++-
 include/linux/compat.h       | 9 +++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index f4c38d8c6674..2292d1af9d70 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -685,9 +685,15 @@ __SYSCALL(__NR_syncfs, sys_syncfs)
 __SYSCALL(__NR_setns, sys_setns)
 #define __NR_sendmmsg 269
 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
+#define __NR_process_vm_readv 270
+__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
+          compat_sys_process_vm_readv)
+#define __NR_process_vm_writev 271
+__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
+          compat_sys_process_vm_writev)
 
 #undef __NR_syscalls
-#define __NR_syscalls 270
+#define __NR_syscalls 272
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 154bf5683015..66ed067fb729 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -552,5 +552,14 @@ extern ssize_t compat_rw_copy_check_uvector(int type,
 
 extern void __user *compat_alloc_user_space(unsigned long len);
 
+asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid,
+		const struct compat_iovec __user *lvec,
+		unsigned long liovcnt, const struct compat_iovec __user *rvec,
+		unsigned long riovcnt, unsigned long flags);
+asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
+		const struct compat_iovec __user *lvec,
+		unsigned long liovcnt, const struct compat_iovec __user *rvec,
+		unsigned long riovcnt, unsigned long flags);
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
-- 
cgit v1.2.3


From 117632e64d2a5f464e491fe221d7169a3814a77b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 3 Dec 2011 21:39:53 +0000
Subject: tcp: take care of misalignments

We discovered that TCP stack could retransmit misaligned skbs if a
malicious peer acknowledged sub MSS frame. This currently can happen
only if output interface is non SG enabled : If SG is enabled, tcp
builds headless skbs (all payload is included in fragments), so the tcp
trimming process only removes parts of skb fragments, header stay
aligned.

Some arches cant handle misalignments, so force a head reallocation and
shrink headroom to MAX_TCP_HEADER.

Dont care about misaligments on x86 and PPC (or other arches setting
NET_IP_ALIGN to 0)

This patch introduces __pskb_copy() which can specify the headroom of
new head, and pskb_copy() becomes a wrapper on top of __pskb_copy()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 11 +++++++++--
 net/core/skbuff.c      | 11 ++++++-----
 net/ipv4/tcp_output.c  | 10 +++++++++-
 3 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cec0657d0d32..12e6fed73f8e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -568,8 +568,9 @@ extern struct sk_buff *skb_clone(struct sk_buff *skb,
 				 gfp_t priority);
 extern struct sk_buff *skb_copy(const struct sk_buff *skb,
 				gfp_t priority);
-extern struct sk_buff *pskb_copy(struct sk_buff *skb,
-				 gfp_t gfp_mask);
+extern struct sk_buff *__pskb_copy(struct sk_buff *skb,
+				 int headroom, gfp_t gfp_mask);
+
 extern int	       pskb_expand_head(struct sk_buff *skb,
 					int nhead, int ntail,
 					gfp_t gfp_mask);
@@ -1799,6 +1800,12 @@ static inline dma_addr_t skb_frag_dma_map(struct device *dev,
 			    frag->page_offset + offset, size, dir);
 }
 
+static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
+					gfp_t gfp_mask)
+{
+	return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
+}
+
 /**
  *	skb_clone_writable - is the header of a clone writable
  *	@skb: buffer to check
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 678ae4e783aa..fd3646209b65 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -840,8 +840,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 EXPORT_SYMBOL(skb_copy);
 
 /**
- *	pskb_copy	-	create copy of an sk_buff with private head.
+ *	__pskb_copy	-	create copy of an sk_buff with private head.
  *	@skb: buffer to copy
+ *	@headroom: headroom of new skb
  *	@gfp_mask: allocation priority
  *
  *	Make a copy of both an &sk_buff and part of its data, located
@@ -852,16 +853,16 @@ EXPORT_SYMBOL(skb_copy);
  *	The returned buffer has a reference count of 1.
  */
 
-struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
+struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
 {
-	unsigned int size = skb_end_pointer(skb) - skb->head;
+	unsigned int size = skb_headlen(skb) + headroom;
 	struct sk_buff *n = alloc_skb(size, gfp_mask);
 
 	if (!n)
 		goto out;
 
 	/* Set the data pointer */
-	skb_reserve(n, skb_headroom(skb));
+	skb_reserve(n, headroom);
 	/* Set the tail pointer and length */
 	skb_put(n, skb_headlen(skb));
 	/* Copy the bytes */
@@ -897,7 +898,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 out:
 	return n;
 }
-EXPORT_SYMBOL(pskb_copy);
+EXPORT_SYMBOL(__pskb_copy);
 
 /**
  *	pskb_expand_head - reallocate header of &sk_buff
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 58f69acd3d22..50788d67bdb7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2147,7 +2147,15 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-	err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+	/* make sure skb->data is aligned on arches that require it */
+	if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
+		struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
+						   GFP_ATOMIC);
+		err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+			     -ENOBUFS;
+	} else {
+		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+	}
 
 	if (err == 0) {
 		/* Update global TCP statistics. */
-- 
cgit v1.2.3


From 8f97339d3feb662037b86a925e692017c0b32323 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 Jul 2011 22:48:10 +0100
Subject: netfilter: add ipv4 reverse path filter match

This tries to do the same thing as fib_validate_source(), but differs
in several aspects.

The most important difference is that the reverse path filter built into
fib_validate_source uses the oif as iif when performing the reverse
lookup.  We do not do this, as the oif is not yet known by the time the
PREROUTING hook is invoked.

We can't wait until FORWARD chain because by the time FORWARD is invoked
ipv4 forward path may have already sent icmp messages is response
to to-be-discarded-via-rpfilter packets.

To avoid the such an additional lookup in PREROUTING, Patrick McHardy
suggested to attach the path information directly in the match
(i.e., just do what the standard ipv4 path does a bit earlier in PREROUTING).

This works, but it also has a few caveats. Most importantly, when using
marks in PREROUTING to re-route traffic based on the nfmark, -m rpfilter
would have to be used after the nfmark has been set; otherwise the nfmark
would have no effect (because the route is already attached).

Another problem would be interaction with -j TPROXY, as this target sets an
nfmark and uses ACCEPT instead of continue, i.e. such a version of
-m rpfilter cannot be used for the initial to-be-intercepted packets.

In case in turns out that the oif is required, we can add Patricks
suggestion with a new match option (e.g. --rpf-use-oif) to keep ruleset
compatibility.

Another difference to current builtin ipv4 rpfilter is that packets subject to ipsec
transformation are not automatically excluded. If you want this, simply
combine -m rpfilter with the policy match.

Packets arriving on loopback interfaces always match.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_rpfilter.h |  23 ++++++
 net/ipv4/netfilter/Kconfig            |  10 +++
 net/ipv4/netfilter/Makefile           |   1 +
 net/ipv4/netfilter/ipt_rpfilter.c     | 141 ++++++++++++++++++++++++++++++++++
 4 files changed, 175 insertions(+)
 create mode 100644 include/linux/netfilter/xt_rpfilter.h
 create mode 100644 net/ipv4/netfilter/ipt_rpfilter.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_rpfilter.h b/include/linux/netfilter/xt_rpfilter.h
new file mode 100644
index 000000000000..8358d4f71952
--- /dev/null
+++ b/include/linux/netfilter/xt_rpfilter.h
@@ -0,0 +1,23 @@
+#ifndef _XT_RPATH_H
+#define _XT_RPATH_H
+
+#include <linux/types.h>
+
+enum {
+	XT_RPFILTER_LOOSE = 1 << 0,
+	XT_RPFILTER_VALID_MARK = 1 << 1,
+	XT_RPFILTER_ACCEPT_LOCAL = 1 << 2,
+	XT_RPFILTER_INVERT = 1 << 3,
+#ifdef __KERNEL__
+	XT_RPFILTER_OPTION_MASK = XT_RPFILTER_LOOSE |
+				  XT_RPFILTER_VALID_MARK |
+				  XT_RPFILTER_ACCEPT_LOCAL |
+				  XT_RPFILTER_INVERT,
+#endif
+};
+
+struct xt_rpfilter_info {
+	__u8 flags;
+};
+
+#endif
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f19f2182894c..7e1f5cdaf11e 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -82,6 +82,16 @@ config IP_NF_MATCH_ECN
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP_NF_MATCH_RPFILTER
+	tristate '"rpfilter" reverse path filter match support'
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This option allows you to match packets whose replies would
+	  go out via the interface the packet came in.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+	  The module will be called ipt_rpfilter.
+
 config IP_NF_MATCH_TTL
 	tristate '"ttl" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index dca2082ec683..123dd88cea53 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 # matches
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
+obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
 
 # targets
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
new file mode 100644
index 000000000000..31371be8174b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match");
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 rpfilter_get_saddr(__be32 addr)
+{
+	if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+	    ipv4_is_zeronet(addr))
+		return 0;
+	return addr;
+}
+
+static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
+				const struct net_device *dev, u8 flags)
+{
+	struct fib_result res;
+	bool dev_match;
+	struct net *net = dev_net(dev);
+	int ret __maybe_unused;
+
+	if (fib_lookup(net, fl4, &res))
+		return false;
+
+	if (res.type != RTN_UNICAST) {
+		if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL))
+			return false;
+	}
+	dev_match = false;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	for (ret = 0; ret < res.fi->fib_nhs; ret++) {
+		struct fib_nh *nh = &res.fi->fib_nh[ret];
+
+		if (nh->nh_dev == dev) {
+			dev_match = true;
+			break;
+		}
+	}
+#else
+	if (FIB_RES_DEV(res) == dev)
+		dev_match = true;
+#endif
+	if (dev_match || flags & XT_RPFILTER_LOOSE)
+		return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
+	return dev_match;
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_rpfilter_info *info;
+	const struct iphdr *iph;
+	struct flowi4 flow;
+	bool invert;
+
+	info = par->matchinfo;
+	invert = info->flags & XT_RPFILTER_INVERT;
+
+	if (par->in->flags & IFF_LOOPBACK)
+		return true ^ invert;
+
+	iph = ip_hdr(skb);
+	if (ipv4_is_multicast(iph->daddr)) {
+		if (ipv4_is_zeronet(iph->saddr))
+			return ipv4_is_local_multicast(iph->daddr) ^ invert;
+		flow.flowi4_iif = 0;
+	} else {
+		flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex;
+	}
+
+	flow.daddr = iph->saddr;
+	flow.saddr = rpfilter_get_saddr(iph->daddr);
+	flow.flowi4_oif = 0;
+	flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+	flow.flowi4_tos = RT_TOS(iph->tos);
+	flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+
+	return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_rpfilter_info *info = par->matchinfo;
+	unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+	if (info->flags & options) {
+		pr_info("unknown options encountered");
+		return -EINVAL;
+	}
+
+	if (strcmp(par->table, "mangle") != 0 &&
+	    strcmp(par->table, "raw") != 0) {
+		pr_info("match only valid in the \'raw\' "
+			"or \'mangle\' tables, not \'%s\'.\n", par->table);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+	.name		= "rpfilter",
+	.family		= NFPROTO_IPV4,
+	.checkentry	= rpfilter_check,
+	.match		= rpfilter_mt,
+	.matchsize	= sizeof(struct xt_rpfilter_info),
+	.hooks		= (1 << NF_INET_PRE_ROUTING),
+	.me		= THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+	return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+	xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
-- 
cgit v1.2.3


From 10c6db110d0eb4466b59812c49088ab56218fc2e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 26 Nov 2011 02:47:31 +0100
Subject: perf: Fix loss of notification with multi-event

When you do:
        $ perf record -e cycles,cycles,cycles noploop 10

You expect about 10,000 samples for each event, i.e., 10s at
1000samples/sec. However, this is not what's happening. You
get much fewer samples, maybe 3700 samples/event:

$ perf report -D | tail -15
Aggregated stats:
           TOTAL events:      10998
            MMAP events:         66
            COMM events:          2
          SAMPLE events:      10930
cycles stats:
           TOTAL events:       3644
          SAMPLE events:       3644
cycles stats:
           TOTAL events:       3642
          SAMPLE events:       3642
cycles stats:
           TOTAL events:       3644
          SAMPLE events:       3644

On a Intel Nehalem or even AMD64, there are 4 counters capable
of measuring cycles, so there is plenty of space to measure those
events without multiplexing (even with the NMI watchdog active).
And even with multiplexing, we'd expect roughly the same number
of samples per event.

The root of the problem was that when the event that caused the buffer
to become full was not the first event passed on the cmdline, the user
notification would get lost. The notification was sent to the file
descriptor of the overflowed event but the perf tool was not polling
on it.  The perf tool aggregates all samples into a single buffer,
i.e., the buffer of the first event. Consequently, it assumes
notifications for any event will come via that descriptor.

The seemingly straight forward solution of moving the waitq into the
ringbuffer object doesn't work because of life-time issues. One could
perf_event_set_output() on a fd that you're also blocking on and cause
the old rb object to be freed while its waitq would still be
referenced by the blocked thread -> FAIL.

Therefore link all events to the ringbuffer and broadcast the wakeup
from the ringbuffer object to all possible events that could be waited
upon. This is rather ugly, and we're open to better solutions but it
works for now.

Reported-by: Stephane Eranian <eranian@google.com>
Finished-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111126014731.GA7030@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h  |  1 +
 kernel/events/core.c        | 86 +++++++++++++++++++++++++++++++++++++++++++--
 kernel/events/internal.h    |  3 ++
 kernel/events/ring_buffer.c |  3 ++
 4 files changed, 91 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1e9ebe5e0091..b1f89122bf6a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -822,6 +822,7 @@ struct perf_event {
 	int				mmap_locked;
 	struct user_struct		*mmap_user;
 	struct ring_buffer		*rb;
+	struct list_head		rb_entry;
 
 	/* poll related */
 	wait_queue_head_t		waitq;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b0c1186fd97b..600c1629b64d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
 static void update_context_time(struct perf_event_context *ctx);
 static u64 perf_event_time(struct perf_event *event);
 
+static void ring_buffer_attach(struct perf_event *event,
+			       struct ring_buffer *rb);
+
 void __weak perf_event_print_debug(void)	{ }
 
 extern __weak const char *perf_pmu_name(void)
@@ -3191,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 	struct ring_buffer *rb;
 	unsigned int events = POLL_HUP;
 
+	/*
+	 * Race between perf_event_set_output() and perf_poll(): perf_poll()
+	 * grabs the rb reference but perf_event_set_output() overrides it.
+	 * Here is the timeline for two threads T1, T2:
+	 * t0: T1, rb = rcu_dereference(event->rb)
+	 * t1: T2, old_rb = event->rb
+	 * t2: T2, event->rb = new rb
+	 * t3: T2, ring_buffer_detach(old_rb)
+	 * t4: T1, ring_buffer_attach(rb1)
+	 * t5: T1, poll_wait(event->waitq)
+	 *
+	 * To avoid this problem, we grab mmap_mutex in perf_poll()
+	 * thereby ensuring that the assignment of the new ring buffer
+	 * and the detachment of the old buffer appear atomic to perf_poll()
+	 */
+	mutex_lock(&event->mmap_mutex);
+
 	rcu_read_lock();
 	rb = rcu_dereference(event->rb);
-	if (rb)
+	if (rb) {
+		ring_buffer_attach(event, rb);
 		events = atomic_xchg(&rb->poll, 0);
+	}
 	rcu_read_unlock();
 
+	mutex_unlock(&event->mmap_mutex);
+
 	poll_wait(file, &event->waitq, wait);
 
 	return events;
@@ -3497,6 +3521,49 @@ unlock:
 	return ret;
 }
 
+static void ring_buffer_attach(struct perf_event *event,
+			       struct ring_buffer *rb)
+{
+	unsigned long flags;
+
+	if (!list_empty(&event->rb_entry))
+		return;
+
+	spin_lock_irqsave(&rb->event_lock, flags);
+	if (!list_empty(&event->rb_entry))
+		goto unlock;
+
+	list_add(&event->rb_entry, &rb->event_list);
+unlock:
+	spin_unlock_irqrestore(&rb->event_lock, flags);
+}
+
+static void ring_buffer_detach(struct perf_event *event,
+			       struct ring_buffer *rb)
+{
+	unsigned long flags;
+
+	if (list_empty(&event->rb_entry))
+		return;
+
+	spin_lock_irqsave(&rb->event_lock, flags);
+	list_del_init(&event->rb_entry);
+	wake_up_all(&event->waitq);
+	spin_unlock_irqrestore(&rb->event_lock, flags);
+}
+
+static void ring_buffer_wakeup(struct perf_event *event)
+{
+	struct ring_buffer *rb;
+
+	rcu_read_lock();
+	rb = rcu_dereference(event->rb);
+	list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
+		wake_up_all(&event->waitq);
+	}
+	rcu_read_unlock();
+}
+
 static void rb_free_rcu(struct rcu_head *rcu_head)
 {
 	struct ring_buffer *rb;
@@ -3522,9 +3589,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
 
 static void ring_buffer_put(struct ring_buffer *rb)
 {
+	struct perf_event *event, *n;
+	unsigned long flags;
+
 	if (!atomic_dec_and_test(&rb->refcount))
 		return;
 
+	spin_lock_irqsave(&rb->event_lock, flags);
+	list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
+		list_del_init(&event->rb_entry);
+		wake_up_all(&event->waitq);
+	}
+	spin_unlock_irqrestore(&rb->event_lock, flags);
+
 	call_rcu(&rb->rcu_head, rb_free_rcu);
 }
 
@@ -3547,6 +3624,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
 		vma->vm_mm->pinned_vm -= event->mmap_locked;
 		rcu_assign_pointer(event->rb, NULL);
+		ring_buffer_detach(event, rb);
 		mutex_unlock(&event->mmap_mutex);
 
 		ring_buffer_put(rb);
@@ -3701,7 +3779,7 @@ static const struct file_operations perf_fops = {
 
 void perf_event_wakeup(struct perf_event *event)
 {
-	wake_up_all(&event->waitq);
+	ring_buffer_wakeup(event);
 
 	if (event->pending_kill) {
 		kill_fasync(&event->fasync, SIGIO, event->pending_kill);
@@ -5823,6 +5901,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	INIT_LIST_HEAD(&event->group_entry);
 	INIT_LIST_HEAD(&event->event_entry);
 	INIT_LIST_HEAD(&event->sibling_list);
+	INIT_LIST_HEAD(&event->rb_entry);
+
 	init_waitqueue_head(&event->waitq);
 	init_irq_work(&event->pending, perf_pending_event);
 
@@ -6029,6 +6109,8 @@ set:
 
 	old_rb = event->rb;
 	rcu_assign_pointer(event->rb, rb);
+	if (old_rb)
+		ring_buffer_detach(event, old_rb);
 	ret = 0;
 unlock:
 	mutex_unlock(&event->mmap_mutex);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 09097dd8116c..64568a699375 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -22,6 +22,9 @@ struct ring_buffer {
 	local_t				lost;		/* nr records lost   */
 
 	long				watermark;	/* wakeup watermark  */
+	/* poll crap */
+	spinlock_t			event_lock;
+	struct list_head		event_list;
 
 	struct perf_event_mmap_page	*user_page;
 	void				*data_pages[0];
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index a2a29205cc0f..7f3011c6b57f 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -209,6 +209,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 		rb->writable = 1;
 
 	atomic_set(&rb->refcount, 1);
+
+	INIT_LIST_HEAD(&rb->event_list);
+	spin_lock_init(&rb->event_lock);
 }
 
 #ifndef CONFIG_PERF_USE_VMALLOC
-- 
cgit v1.2.3


From 55af77969fbd7a841838220ea2287432e0da8ae5 Mon Sep 17 00:00:00 2001
From: Mitsuo Hayasaka <mitsuo.hayasaka.hu@hitachi.com>
Date: Tue, 29 Nov 2011 15:08:36 +0900
Subject: x86: Panic on detection of stack overflow

Currently, messages are just output on the detection of stack
overflow, which is not sufficient for systems that need a
high reliability. This is because in general the overflow may
corrupt data, and the additional corruption may occur due to
reading them unless systems stop.

This patch adds the sysctl parameter
kernel.panic_on_stackoverflow and causes a panic when detecting
the overflows of kernel, IRQ and exception stacks except user
stack according to the parameter. It is disabled by default.

Signed-off-by: Mitsuo Hayasaka <mitsuo.hayasaka.hu@hitachi.com>
Cc: yrl.pp-manager.tt@hitachi.com
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/r/20111129060836.11076.12323.stgit@ltc219.sdl.hitachi.co.jp
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/sysctl/kernel.txt | 14 ++++++++++++++
 arch/x86/kernel/irq_32.c        |  2 ++
 arch/x86/kernel/irq_64.c        |  5 +++++
 include/linux/kernel.h          |  1 +
 kernel/sysctl.c                 |  9 +++++++++
 5 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 1f2463671a1a..6d8cd8b2c30d 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -49,6 +49,7 @@ show up in /proc/sys/kernel:
 - panic
 - panic_on_oops
 - panic_on_unrecovered_nmi
+- panic_on_stackoverflow
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -393,6 +394,19 @@ Controls the kernel's behaviour when an oops or BUG is encountered.
 
 ==============================================================
 
+panic_on_stackoverflow:
+
+Controls the kernel's behavior when detecting the overflows of
+kernel, IRQ and exception stacks except a user stack.
+This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
+
+0: try to continue operation.
+
+1: panic immediately.
+
+==============================================================
+
+
 pid_max:
 
 PID allocation wrap value.  When the kernel's next PID value
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 72090705a656..e16e99ebd7ad 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -43,6 +43,8 @@ static void print_stack_overflow(void)
 {
 	printk(KERN_WARNING "low stack detected by irq handler\n");
 	dump_stack();
+	if (sysctl_panic_on_stackoverflow)
+		panic("low stack detected by irq handler - check messages\n");
 }
 
 #else
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 928a7e909619..42552b0dce6a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
+int sysctl_panic_on_stackoverflow;
+
 /*
  * Probabilistic stack overflow check:
  *
@@ -65,6 +67,9 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 		current->comm, curbase, regs->sp,
 		irq_stack_top, irq_stack_bottom,
 		estack_top, estack_bottom);
+
+	if (sysctl_panic_on_stackoverflow)
+		panic("low stack detected by irq handler - check messages\n");
 #endif
 }
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e8b1597b5cf2..ff83683c0b9d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -341,6 +341,7 @@ extern int panic_timeout;
 extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
 extern int panic_on_io_nmi;
+extern int sysctl_panic_on_stackoverflow;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned flag);
 extern int test_taint(unsigned flag);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ae2719643854..f487f257e05e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -803,6 +803,15 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	{
+		.procname	= "panic_on_stackoverflow",
+		.data		= &sysctl_panic_on_stackoverflow,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 	{
 		.procname	= "bootloader_type",
 		.data		= &bootloader_type,
-- 
cgit v1.2.3


From bf315173359b2f3b8b8ccca4264815e91f30be12 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 3 Dec 2011 17:06:20 +0000
Subject: regmap: Allow drivers to reinitialise the register cache at runtime

Sometimes the register map information may change in ways that drivers can
discover at runtime. For example, new revisions of a device may add new
registers. Support runtime discovery by drivers by allowing the register
cache to be reinitialised with a new function regmap_reinit_cache() which
discards the existing cache and creates a new one.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/regmap.h       |  2 ++
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 3aca18dbf367..579e85b8a684 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -230,6 +230,39 @@ err:
 }
 EXPORT_SYMBOL_GPL(regmap_init);
 
+/**
+ * regmap_reinit_cache(): Reinitialise the current register cache
+ *
+ * @map: Register map to operate on.
+ * @config: New configuration.  Only the cache data will be used.
+ *
+ * Discard any existing register cache for the map and initialize a
+ * new cache.  This can be used to restore the cache to defaults or to
+ * update the cache configuration to reflect runtime discovery of the
+ * hardware.
+ */
+int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config)
+{
+	int ret;
+
+	mutex_lock(&map->lock);
+
+	regcache_exit(map);
+
+	map->max_register = config->max_register;
+	map->writeable_reg = config->writeable_reg;
+	map->readable_reg = config->readable_reg;
+	map->volatile_reg = config->volatile_reg;
+	map->precious_reg = config->precious_reg;
+	map->cache_type = config->cache_type;
+
+	ret = regcache_init(map, config);
+
+	mutex_unlock(&map->lock);
+
+	return ret;
+}
+
 /**
  * regmap_exit(): Free a previously allocated register map
  */
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index bebda1481f23..86923a98a766 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -129,6 +129,8 @@ struct regmap *regmap_init_spi(struct spi_device *dev,
 			       const struct regmap_config *config);
 
 void regmap_exit(struct regmap *map);
+int regmap_reinit_cache(struct regmap *map,
+			const struct regmap_config *config);
 int regmap_write(struct regmap *map, unsigned int reg, unsigned int val);
 int regmap_raw_write(struct regmap *map, unsigned int reg,
 		     const void *val, size_t val_len);
-- 
cgit v1.2.3


From 209a600623cf13a8168b2f6b83643db7825abb9a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 5 Dec 2011 16:10:15 +0000
Subject: regmap: Add irq_base accessor to regmap_irq

Allows devices to discover their own interrupt without having to remember
it themselves.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap-irq.c | 13 +++++++++++++
 include/linux/regmap.h           |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 6b8a74c3ed18..428836fc5835 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -287,3 +287,16 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
 	kfree(d);
 }
 EXPORT_SYMBOL_GPL(regmap_del_irq_chip);
+
+/**
+ * regmap_irq_chip_get_base(): Retrieve interrupt base for a regmap IRQ chip
+ *
+ * Useful for drivers to request their own IRQs.
+ *
+ * @data: regmap_irq controller to operate on.
+ */
+int regmap_irq_chip_get_base(struct regmap_irq_chip_data *data)
+{
+	return data->irq_base;
+}
+EXPORT_SYMBOL_GPL(regmap_irq_chip_get_base);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index bd54cecdfdf8..e7e8953e8c2a 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -190,5 +190,6 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 			int irq_base, struct regmap_irq_chip *chip,
 			struct regmap_irq_chip_data **data);
 void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data);
+int regmap_irq_chip_get_base(struct regmap_irq_chip_data *data);
 
 #endif
-- 
cgit v1.2.3


From f62ef5f3e9cff065aa845e2b7f487e1810b8e57e Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Fri, 2 Dec 2011 08:21:43 +0100
Subject: x86, amd: Fix up numa_node information for AMD CPU family 15h model
 0-0fh northbridge functions

I've received complaints that the numa_node attribute for family
15h model 00-0fh (e.g. Interlagos) northbridge functions shows
-1 instead of the proper node ID.

Correct this with attached quirks (similar to quirks for other
AMD CPU families used in multi-socket systems).

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Frank Arnold <frank.arnold@amd.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/20111202072143.GA31916@alberich.amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/quirks.c | 13 +++++++++++++
 include/linux/pci_ids.h  |  4 ++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index b78643d0f9a5..03920a15a632 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -553,4 +553,17 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC,
 			quirk_amd_nb_node);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK,
 			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F0,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F1,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F2,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5,
+			quirk_amd_nb_node);
+
 #endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 172ba70306d1..2aaee0ca9da8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -517,8 +517,12 @@
 #define PCI_DEVICE_ID_AMD_11H_NB_DRAM	0x1302
 #define PCI_DEVICE_ID_AMD_11H_NB_MISC	0x1303
 #define PCI_DEVICE_ID_AMD_11H_NB_LINK	0x1304
+#define PCI_DEVICE_ID_AMD_15H_NB_F0	0x1600
+#define PCI_DEVICE_ID_AMD_15H_NB_F1	0x1601
+#define PCI_DEVICE_ID_AMD_15H_NB_F2	0x1602
 #define PCI_DEVICE_ID_AMD_15H_NB_F3	0x1603
 #define PCI_DEVICE_ID_AMD_15H_NB_F4	0x1604
+#define PCI_DEVICE_ID_AMD_15H_NB_F5	0x1605
 #define PCI_DEVICE_ID_AMD_CNB17H_F3	0x1703
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
-- 
cgit v1.2.3


From b50cac55bf859d5b2fdcc1803a553a251b703456 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 6 Oct 2011 14:08:18 -0400
Subject: PCI/sysfs: add per pci device msi[x] irq listing (v5)

This patch adds a per-pci-device subdirectory in sysfs called:
/sys/bus/pci/devices/<device>/msi_irqs

This sub-directory exports the set of msi vectors allocated by a given
pci device, by creating a numbered sub-directory for each vector beneath
msi_irqs.  For each vector various attributes can be exported.
Currently the only attribute is called mode, which tracks the
operational mode of that vector (msi vs. msix)

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-bus-pci |  18 ++++++
 drivers/pci/msi.c                       | 111 ++++++++++++++++++++++++++++++++
 include/linux/msi.h                     |   3 +
 include/linux/pci.h                     |   1 +
 4 files changed, 133 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf26ce10..34f51100f029 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
 		re-discover previously removed devices.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../msi_irqs/
+Date:		September, 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		The /sys/devices/.../msi_irqs directory contains a variable set
+		of sub-directories, with each sub-directory being named after a
+		corresponding msi irq vector allocated to that device.  Each
+		numbered sub-directory N contains attributes of that irq.
+		Note that this directory is not created for device drivers which
+		do not support msi irqs
+
+What:		/sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date:		September 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		This attribute indicates the mode that the irq vector named by
+		the parent directory is in (msi vs. msix)
+
 What:		/sys/bus/pci/devices/.../remove
 Date:		January 2009
 Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 0e6d04d7ba4f..e6b6b9c67023 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -323,6 +323,8 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
 		list_del(&entry->list);
 		kfree(entry);
 	}
@@ -403,6 +405,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+			     char *buf)
+{
+	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+void msi_kobj_release(struct kobject *kobj)
+{
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+	.release = msi_kobj_release,
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		kobj = &entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+				     "%u", entry->irq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -454,6 +548,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, pos, 1);
@@ -574,6 +675,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	msix_program_entries(dev, entries);
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		ret = 0;
+		goto error;
+	}
+
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
@@ -732,6 +839,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
 	pci_msi_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -830,6 +939,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
 	pci_msix_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 05acced439a3..ce93a341337d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
+
+	struct kobject kobj;
 };
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7cda65b5f798..84225c756bd1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -336,6 +336,7 @@ struct pci_dev {
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
+	struct kset *msi_kset;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_ATS
-- 
cgit v1.2.3


From 69166fbf02c7a21745013f2de037bf7af26e4279 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 2 Nov 2011 14:07:15 -0600
Subject: PCI: Fix PRI and PASID consistency

These are extended capabilities, rename and move to proper
group for consistency.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/ats.c        | 20 ++++++++++----------
 include/linux/pci_regs.h |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 7ec56fb0bd78..831e1920386c 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -174,7 +174,7 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	u32 max_requests;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -205,7 +205,7 @@ void pci_disable_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return;
 
@@ -226,7 +226,7 @@ bool pci_pri_enabled(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return false;
 
@@ -248,7 +248,7 @@ int pci_reset_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -281,7 +281,7 @@ bool pci_pri_stopped(struct pci_dev *pdev)
 	u16 control, status;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return true;
 
@@ -310,7 +310,7 @@ int pci_pri_status(struct pci_dev *pdev)
 	u16 status, control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -341,7 +341,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	u16 control, supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
@@ -375,7 +375,7 @@ void pci_disable_pasid(struct pci_dev *pdev)
 	u16 control = 0;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return;
 
@@ -399,7 +399,7 @@ int pci_pasid_features(struct pci_dev *pdev)
 	u16 supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
@@ -425,7 +425,7 @@ int pci_max_pasids(struct pci_dev *pdev)
 	u16 supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index b5d9657f3100..090d3a9f5b26 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -537,7 +537,9 @@
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
 #define PCI_EXT_CAP_ID_SRIOV	16
+#define PCI_EXT_CAP_ID_PRI	19
 #define PCI_EXT_CAP_ID_LTR	24
+#define PCI_EXT_CAP_ID_PASID	27
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -664,7 +666,6 @@
 #define  PCI_ATS_MIN_STU	12	/* shift of minimum STU block */
 
 /* Page Request Interface */
-#define PCI_PRI_CAP		0x13    /* PRI capability ID */
 #define PCI_PRI_CONTROL_OFF	0x04	/* Offset of control register */
 #define PCI_PRI_STATUS_OFF	0x06	/* Offset of status register */
 #define PCI_PRI_ENABLE		0x0001	/* Enable mask */
@@ -676,7 +677,6 @@
 #define PCI_PRI_ALLOC_REQ_OFF	0x0c	/* Cap offset for max reqs allowed */
 
 /* PASID capability */
-#define PCI_PASID_CAP		0x1b    /* PASID capability ID */
 #define PCI_PASID_CAP_OFF	0x04    /* PASID feature register */
 #define PCI_PASID_CONTROL_OFF   0x06    /* PASID control register */
 #define PCI_PASID_ENABLE	0x01	/* Enable/Supported bit */
-- 
cgit v1.2.3


From 3c076351c4027a56d5005a39a0b518a4ba393ce2 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 10 Nov 2011 16:38:33 -0500
Subject: PCI: Rework ASPM disable code

Right now we forcibly clear ASPM state on all devices if the BIOS indicates
that the feature isn't supported. Based on the Microsoft presentation
"PCI Express In Depth for Windows Vista and Beyond", I'm starting to think
that this may be an error. The implication is that unless the platform
grants full control via _OSC, Windows will not touch any PCIe features -
including ASPM. In that case clearing ASPM state would be an error unless
the platform has granted us that control.

This patch reworks the ASPM disabling code such that the actual clearing
of state is triggered by a successful handoff of PCIe control to the OS.
The general ASPM code undergoes some changes in order to ensure that the
ability to clear the bits isn't overridden by ASPM having already been
disabled. Further, this theoretically now allows for situations where
only a subset of PCIe roots hand over control, leaving the others in the
BIOS state.

It's difficult to know for sure that this is the right thing to do -
there's zero public documentation on the interaction between all of these
components. But enough vendors enable ASPM on platforms and then set this
bit that it seems likely that they're expecting the OS to leave them alone.

Measured to save around 5W on an idle Thinkpad X220.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c  |  7 ++++++
 drivers/pci/pci-acpi.c   |  1 -
 drivers/pci/pcie/aspm.c  | 58 ++++++++++++++++++++++++++++++------------------
 include/linux/pci-aspm.h |  4 ++--
 4 files changed, 46 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 2672c798272f..7aff6312ce7c 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -596,6 +596,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 		if (ACPI_SUCCESS(status)) {
 			dev_info(root->bus->bridge,
 				"ACPI _OSC control (0x%02x) granted\n", flags);
+			if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
+				/*
+				 * We have ASPM control, but the FADT indicates
+				 * that it's unsupported. Clear it.
+				 */
+				pcie_clear_aspm(root->bus);
+			}
 		} else {
 			dev_info(root->bus->bridge,
 				"ACPI _OSC request failed (%s), "
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 4ecb6408b0d6..c8e75851a314 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -395,7 +395,6 @@ static int __init acpi_pci_init(void)
 
 	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
 		printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
-		pcie_clear_aspm();
 		pcie_no_aspm();
 	}
 
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index cbfbab18be91..1cfbf228fbb1 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -68,7 +68,7 @@ struct pcie_link_state {
 	struct aspm_latency acceptable[8];
 };
 
-static int aspm_disabled, aspm_force, aspm_clear_state;
+static int aspm_disabled, aspm_force;
 static bool aspm_support_enabled = true;
 static DEFINE_MUTEX(aspm_lock);
 static LIST_HEAD(link_list);
@@ -500,9 +500,6 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 	int pos;
 	u32 reg32;
 
-	if (aspm_clear_state)
-		return -EINVAL;
-
 	/*
 	 * Some functions in a slot might not all be PCIe functions,
 	 * very strange. Disable ASPM for the whole slot
@@ -574,9 +571,6 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 	    pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
 		return;
 
-	if (aspm_disabled && !aspm_clear_state)
-		return;
-
 	/* VIA has a strange chipset, root port is under a bridge */
 	if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT &&
 	    pdev->bus->self)
@@ -608,7 +602,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 	 * the BIOS's expectation, we'll do so once pci_enable_device() is
 	 * called.
 	 */
-	if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) {
+	if (aspm_policy != POLICY_POWERSAVE) {
 		pcie_config_aspm_path(link);
 		pcie_set_clkpm(link, policy_to_clkpm_state(link));
 	}
@@ -649,8 +643,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link, *root, *parent_link;
 
-	if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) ||
-	    !parent || !parent->link_state)
+	if (!pci_is_pcie(pdev) || !parent || !parent->link_state)
 		return;
 	if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
 	    (parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
@@ -734,13 +727,18 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
  */
-static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
+static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
+				     bool force)
 {
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link;
 
-	if (aspm_disabled || !pci_is_pcie(pdev))
+	if (aspm_disabled && !force)
+		return;
+
+	if (!pci_is_pcie(pdev))
 		return;
+
 	if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
 	    pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
 		parent = pdev;
@@ -768,16 +766,31 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 
 void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, false);
+	__pci_disable_link_state(pdev, state, false, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state_locked);
 
 void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, true);
+	__pci_disable_link_state(pdev, state, true, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
+void pcie_clear_aspm(struct pci_bus *bus)
+{
+	struct pci_dev *child;
+
+	/*
+	 * Clear any ASPM setup that the firmware has carried out on this bus
+	 */
+	list_for_each_entry(child, &bus->devices, bus_list) {
+		__pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
+					 PCIE_LINK_STATE_L1 |
+					 PCIE_LINK_STATE_CLKPM,
+					 false, true);
+	}
+}
+
 static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
 {
 	int i;
@@ -935,6 +948,7 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
 static int __init pcie_aspm_disable(char *str)
 {
 	if (!strcmp(str, "off")) {
+		aspm_policy = POLICY_DEFAULT;
 		aspm_disabled = 1;
 		aspm_support_enabled = false;
 		printk(KERN_INFO "PCIe ASPM is disabled\n");
@@ -947,16 +961,18 @@ static int __init pcie_aspm_disable(char *str)
 
 __setup("pcie_aspm=", pcie_aspm_disable);
 
-void pcie_clear_aspm(void)
-{
-	if (!aspm_force)
-		aspm_clear_state = 1;
-}
-
 void pcie_no_aspm(void)
 {
-	if (!aspm_force)
+	/*
+	 * Disabling ASPM is intended to prevent the kernel from modifying
+	 * existing hardware state, not to clear existing state. To that end:
+	 * (a) set policy to POLICY_DEFAULT in order to avoid changing state
+	 * (b) prevent userspace from changing policy
+	 */
+	if (!aspm_force) {
+		aspm_policy = POLICY_DEFAULT;
 		aspm_disabled = 1;
+	}
 }
 
 /**
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index 7cea7b6c1413..c8320144fe79 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -29,7 +29,7 @@ extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
 extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
-extern void pcie_clear_aspm(void);
+extern void pcie_clear_aspm(struct pci_bus *bus);
 extern void pcie_no_aspm(void);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
@@ -47,7 +47,7 @@ static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }
-static inline void pcie_clear_aspm(void)
+static inline void pcie_clear_aspm(struct pci_bus *bus)
 {
 }
 static inline void pcie_no_aspm(void)
-- 
cgit v1.2.3


From d90116ea38f7768dac0349f01ffbc2663d63b7e9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 6 Nov 2011 23:11:28 +0100
Subject: PCI/ACPI: Make acpiphp ignore root bridges using SHPC native hotplug

If the kernel has requested control of the SHPC native hotplug
feature for a given root bridge, the acpiphp driver should not try
to handle that root bridge and it should leave it to shpchp.
Failing to do so causes problems to happen if shpchp is loaded
and unloaded before loading acpiphp (ACPI-based hotplug won't work
in that case anyway).

To address this issue make find_root_bridges() ignore PCI root
bridges with SHPC native hotplug enabled and make add_bridge()
return error code if SHPC native hotplug is enabled for the given
root bridge.  This causes acpiphp to refuse to load if SHPC native
hotplug is enabled for all root bridges and to refuse binding to
the root bridges with SHPC native hotplug enabled.

Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpiphp_glue.c | 4 ++--
 include/linux/acpi.h               | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index fce1c54a0c8d..ba43c037de80 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -467,7 +467,7 @@ static int add_bridge(acpi_handle handle)
 	 * granted by the BIOS for it.
 	 */
 	root = acpi_pci_find_root(handle);
-	if (root && (root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL))
+	if (root && (root->osc_control_set & OSC_PCI_NATIVE_HOTPLUG))
 		return -ENODEV;
 
 	/* if the bridge doesn't have _STA, we assume it is always there */
@@ -1395,7 +1395,7 @@ find_root_bridges(acpi_handle handle, u32 lvl, void *context, void **rv)
 	if (!root)
 		return AE_OK;
 
-	if (root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL)
+	if (root->osc_control_set & OSC_PCI_NATIVE_HOTPLUG)
 		return AE_OK;
 
 	(*count)++;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6001b4da39dd..627a3a42e4d8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -302,6 +302,10 @@ extern bool osc_sb_apei_support_acked;
 				OSC_PCI_EXPRESS_PME_CONTROL |		\
 				OSC_PCI_EXPRESS_AER_CONTROL |		\
 				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
+
+#define OSC_PCI_NATIVE_HOTPLUG	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |	\
+				OSC_SHPC_NATIVE_HP_CONTROL)
+
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 extern void acpi_early_init(void);
-- 
cgit v1.2.3


From 91f57d5e1be3db1e079c8696f1eab214f1c7922d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 11 Nov 2011 10:07:36 -0700
Subject: PCI: More PRI/PASID cleanup

More consistency cleanups.  Drop the _OFF, separate and indent
CTRL/CAP/STATUS bit definitions.  This helped find the previous
mis-use of bit 0 in the PASID capability register.

Reviewed-by: Joerg Roedel <joerg.roedel@amd.com>
Tested-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/ats.c        | 69 ++++++++++++++++++++++++------------------------
 include/linux/pci_regs.h | 30 +++++++++++----------
 2 files changed, 51 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 8e95a123d37a..2df49af6cc90 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -178,17 +178,18 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF,  &status);
-	if ((control & PCI_PRI_ENABLE) || !(status & PCI_PRI_STATUS_STOPPED))
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
+	if ((control & PCI_PRI_CTRL_ENABLE) ||
+	    !(status & PCI_PRI_STATUS_STOPPED))
 		return -EBUSY;
 
-	pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ_OFF, &max_requests);
+	pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests);
 	reqs = min(max_requests, reqs);
-	pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ_OFF, reqs);
+	pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
 
-	control |= PCI_PRI_ENABLE;
-	pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+	control |= PCI_PRI_CTRL_ENABLE;
+	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
 	return 0;
 }
@@ -209,9 +210,9 @@ void pci_disable_pri(struct pci_dev *pdev)
 	if (!pos)
 		return;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	control &= ~PCI_PRI_ENABLE;
-	pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	control &= ~PCI_PRI_CTRL_ENABLE;
+	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 }
 EXPORT_SYMBOL_GPL(pci_disable_pri);
 
@@ -230,9 +231,9 @@ bool pci_pri_enabled(struct pci_dev *pdev)
 	if (!pos)
 		return false;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
 
-	return (control & PCI_PRI_ENABLE) ? true : false;
+	return (control & PCI_PRI_CTRL_ENABLE) ? true : false;
 }
 EXPORT_SYMBOL_GPL(pci_pri_enabled);
 
@@ -252,13 +253,13 @@ int pci_reset_pri(struct pci_dev *pdev)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	if (control & PCI_PRI_ENABLE)
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	if (control & PCI_PRI_CTRL_ENABLE)
 		return -EBUSY;
 
-	control |= PCI_PRI_RESET;
+	control |= PCI_PRI_CTRL_RESET;
 
-	pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
 	return 0;
 }
@@ -285,10 +286,10 @@ bool pci_pri_stopped(struct pci_dev *pdev)
 	if (!pos)
 		return true;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF,  &status);
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
 
-	if (control & PCI_PRI_ENABLE)
+	if (control & PCI_PRI_CTRL_ENABLE)
 		return false;
 
 	return (status & PCI_PRI_STATUS_STOPPED) ? true : false;
@@ -314,11 +315,11 @@ int pci_pri_status(struct pci_dev *pdev)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF,  &status);
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
 
 	/* Stopped bit is undefined when enable == 1, so clear it */
-	if (control & PCI_PRI_ENABLE)
+	if (control & PCI_PRI_CTRL_ENABLE)
 		status &= ~PCI_PRI_STATUS_STOPPED;
 
 	return status;
@@ -345,21 +346,21 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, &control);
-	pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF,     &supported);
+	pci_read_config_word(pdev, pos + PCI_PASID_CTRL, &control);
+	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 
-	if (control & PCI_PASID_ENABLE)
+	if (control & PCI_PASID_CTRL_ENABLE)
 		return -EINVAL;
 
-	supported &= PCI_PASID_EXEC | PCI_PASID_PRIV;
+	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
 	/* User wants to enable anything unsupported? */
 	if ((supported & features) != features)
 		return -EINVAL;
 
-	control = PCI_PASID_ENABLE | features;
+	control = PCI_PASID_CTRL_ENABLE | features;
 
-	pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control);
+	pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 
 	return 0;
 }
@@ -379,7 +380,7 @@ void pci_disable_pasid(struct pci_dev *pdev)
 	if (!pos)
 		return;
 
-	pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control);
+	pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 }
 EXPORT_SYMBOL_GPL(pci_disable_pasid);
 
@@ -390,8 +391,8 @@ EXPORT_SYMBOL_GPL(pci_disable_pasid);
  * Returns a negative value when no PASI capability is present.
  * Otherwise is returns a bitmask with supported features. Current
  * features reported are:
- * PCI_PASID_EXEC - Execute permission supported
- * PCI_PASID_PRIV - Priviledged mode supported
+ * PCI_PASID_CAP_EXEC - Execute permission supported
+ * PCI_PASID_CAP_PRIV - Priviledged mode supported
  */
 int pci_pasid_features(struct pci_dev *pdev)
 {
@@ -402,9 +403,9 @@ int pci_pasid_features(struct pci_dev *pdev)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported);
+	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 
-	supported &= PCI_PASID_EXEC | PCI_PASID_PRIV;
+	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
 	return supported;
 }
@@ -428,7 +429,7 @@ int pci_max_pasids(struct pci_dev *pdev)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported);
+	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 
 	supported = (supported & PASID_NUMBER_MASK) >> PASID_NUMBER_SHIFT;
 
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 090d3a9f5b26..28fe380cb19d 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -666,22 +666,24 @@
 #define  PCI_ATS_MIN_STU	12	/* shift of minimum STU block */
 
 /* Page Request Interface */
-#define PCI_PRI_CONTROL_OFF	0x04	/* Offset of control register */
-#define PCI_PRI_STATUS_OFF	0x06	/* Offset of status register */
-#define PCI_PRI_ENABLE		0x0001	/* Enable mask */
-#define PCI_PRI_RESET		0x0002	/* Reset bit mask */
-#define PCI_PRI_STATUS_RF	0x0001  /* Request Failure */
-#define PCI_PRI_STATUS_UPRGI	0x0002  /* Unexpected PRG index */
-#define PCI_PRI_STATUS_STOPPED	0x0100  /* PRI Stopped */
-#define PCI_PRI_MAX_REQ_OFF	0x08	/* Cap offset for max reqs supported */
-#define PCI_PRI_ALLOC_REQ_OFF	0x0c	/* Cap offset for max reqs allowed */
+#define PCI_PRI_CTRL		0x04	/* PRI control register */
+#define  PCI_PRI_CTRL_ENABLE	0x01	/* Enable */
+#define  PCI_PRI_CTRL_RESET	0x02	/* Reset */
+#define PCI_PRI_STATUS		0x06	/* PRI status register */
+#define  PCI_PRI_STATUS_RF	0x001	/* Response Failure */
+#define  PCI_PRI_STATUS_UPRGI	0x002	/* Unexpected PRG index */
+#define  PCI_PRI_STATUS_STOPPED	0x100	/* PRI Stopped */
+#define PCI_PRI_MAX_REQ		0x08	/* PRI max reqs supported */
+#define PCI_PRI_ALLOC_REQ	0x0c	/* PRI max reqs allowed */
 
 /* PASID capability */
-#define PCI_PASID_CAP_OFF	0x04    /* PASID feature register */
-#define PCI_PASID_CONTROL_OFF   0x06    /* PASID control register */
-#define PCI_PASID_ENABLE	0x01	/* Enable/Supported bit */
-#define PCI_PASID_EXEC		0x02	/* Exec permissions Enable/Supported */
-#define PCI_PASID_PRIV		0x04	/* Priviledge Mode Enable/Support */
+#define PCI_PASID_CAP		0x04    /* PASID feature register */
+#define  PCI_PASID_CAP_EXEC	0x02	/* Exec permissions Supported */
+#define  PCI_PASID_CAP_PRIV	0x04	/* Priviledge Mode Supported */
+#define PCI_PASID_CTRL		0x06    /* PASID control register */
+#define  PCI_PASID_CTRL_ENABLE	0x01	/* Enable bit */
+#define  PCI_PASID_CTRL_EXEC	0x02	/* Exec permissions Enable */
+#define  PCI_PASID_CTRL_PRIV	0x04	/* Priviledge Mode Enable */
 
 /* Single Root I/O Virtualization */
 #define PCI_SRIOV_CAP		0x04	/* SR-IOV Capabilities */
-- 
cgit v1.2.3


From 27b14b56af081ec7edeefb3a38b2c9577cc5ef48 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 1 Nov 2011 09:09:35 +0800
Subject: tracing: Restore system filter behavior

Though not all events have field 'prev_pid', it was allowed to do this:

  # echo 'prev_pid == 100' > events/sched/filter

but commit 75b8e98263fdb0bfbdeba60d4db463259f1fe8a2 (tracing/filter: Swap
entire filter of events) broke it without any reason.

Link: http://lkml.kernel.org/r/4EAF46CF.8040408@cn.fujitsu.com

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h       | 2 ++
 kernel/trace/trace_events_filter.c | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 96efa6794ea5..c3da42dd22ba 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -172,6 +172,7 @@ enum {
 	TRACE_EVENT_FL_FILTERED_BIT,
 	TRACE_EVENT_FL_RECORDED_CMD_BIT,
 	TRACE_EVENT_FL_CAP_ANY_BIT,
+	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 };
 
 enum {
@@ -179,6 +180,7 @@ enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
 	TRACE_EVENT_FL_RECORDED_CMD	= (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
 	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
+	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 };
 
 struct ftrace_event_call {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index d6e7926dcd26..95dc31efd6dd 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1649,7 +1649,9 @@ static int replace_system_preds(struct event_subsystem *system,
 		 */
 		err = replace_preds(call, NULL, ps, filter_string, true);
 		if (err)
-			goto fail;
+			call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
+		else
+			call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
 	}
 
 	list_for_each_entry(call, &ftrace_events, list) {
@@ -1658,6 +1660,9 @@ static int replace_system_preds(struct event_subsystem *system,
 		if (strcmp(call->class->system, system->name) != 0)
 			continue;
 
+		if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER)
+			continue;
+
 		filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
 		if (!filter_item)
 			goto fail_mem;
-- 
cgit v1.2.3


From d9a861cce10596ae1f10cffefe1ad4519a253475 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Thu, 1 Dec 2011 17:21:06 +0800
Subject: regulator: pass device_node to of_get_regulator_init_data()

It's not always true that the device_node of regulator can be found
at dev->of_node at the time when of_get_regulator_init_data() is being
called, because in some cases the regulator nodes in device tree do
not have 'struct device' behind them until regulator_dev gets created
for it by core function regulator_register().

The patch adds device_node as a new parameter to
of_get_regulator_init_data(), so that caller can pass in the node of
regulator directly.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/fixed.c              | 2 +-
 drivers/regulator/of_regulator.c       | 7 ++++---
 include/linux/regulator/of_regulator.h | 6 ++++--
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index ebec5e06dfa1..12d08c694ae2 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -64,7 +64,7 @@ of_get_fixed_voltage_config(struct device *dev)
 	if (!config)
 		return NULL;
 
-	config->init_data = of_get_regulator_init_data(dev);
+	config->init_data = of_get_regulator_init_data(dev, dev->of_node);
 	if (!config->init_data)
 		return NULL;
 
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index b7b3fc3b09e2..f1651eb69648 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -69,18 +69,19 @@ static void of_get_regulation_constraints(struct device_node *np,
  * tree node, returns a pointer to the populated struture or NULL if memory
  * alloc fails.
  */
-struct regulator_init_data *of_get_regulator_init_data(struct device *dev)
+struct regulator_init_data *of_get_regulator_init_data(struct device *dev,
+						struct device_node *node)
 {
 	struct regulator_init_data *init_data;
 
-	if (!dev->of_node)
+	if (!node)
 		return NULL;
 
 	init_data = devm_kzalloc(dev, sizeof(*init_data), GFP_KERNEL);
 	if (!init_data)
 		return NULL; /* Out of memory? */
 
-	of_get_regulation_constraints(dev->of_node, &init_data);
+	of_get_regulation_constraints(node, &init_data);
 	return init_data;
 }
 EXPORT_SYMBOL_GPL(of_get_regulator_init_data);
diff --git a/include/linux/regulator/of_regulator.h b/include/linux/regulator/of_regulator.h
index d83a98d3e3fd..769704f296e5 100644
--- a/include/linux/regulator/of_regulator.h
+++ b/include/linux/regulator/of_regulator.h
@@ -8,10 +8,12 @@
 
 #if defined(CONFIG_OF)
 extern struct regulator_init_data
-	*of_get_regulator_init_data(struct device *dev);
+	*of_get_regulator_init_data(struct device *dev,
+				    struct device_node *node);
 #else
 static inline struct regulator_init_data
-	*of_get_regulator_init_data(struct device *dev)
+	*of_get_regulator_init_data(struct device *dev,
+				    struct device_node *node)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 63afe12f4be3b08597ae41ce7c0837bfc106b0ac Mon Sep 17 00:00:00 2001
From: "sjur.brandeland@stericsson.com" <sjur.brandeland@stericsson.com>
Date: Sun, 4 Dec 2011 11:22:52 +0000
Subject: if_ether.h: Add IEEE 802.1 Local Experimental Ethertype 1.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add EthType 0x88b5.
This Ethertype value is available for public use for prototype and
vendor-specific protocol development,as defined in Amendment 802a
to IEEE Std 802.

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index e473003e4bda..56d907a2c804 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -79,6 +79,7 @@
 #define ETH_P_PAE	0x888E		/* Port Access Entity (IEEE 802.1X) */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
+#define ETH_P_802_EX1	0x88B5		/* 802.1 Local Experimental 1.  */
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
 #define ETH_P_1588	0x88F7		/* IEEE 1588 Timesync */
-- 
cgit v1.2.3


From 0f5a2601284237e2ba089389fd75d67f77626cef Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Nov 2011 14:38:16 +0100
Subject: perf: Avoid a useless pmu_disable() in the perf-tick

Gleb writes:

 > Currently pmu is disabled and re-enabled on each timer interrupt even
 > when no rotation or frequency adjustment is needed. On Intel CPU this
 > results in two writes into PERF_GLOBAL_CTRL MSR per tick. On bare metal
 > it does not cause significant slowdown, but when running perf in a virtual
 > machine it leads to 20% slowdown on my machine.

Cure this by keeping a perf_event_context::nr_freq counter that counts the
number of active events that require frequency adjustments and use this in a
similar fashion to the already existing nr_events != nr_active test in
perf_rotate_context().

By being able to exclude both rotation and frequency adjustments a-priory for
the common case we can avoid the otherwise superfluous PMU disable.

Suggested-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-515yhoatehd3gza7we9fapaa@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 48 ++++++++++++++++++++++++++++++----------------
 2 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b1f89122bf6a..cb44c9e75660 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -890,6 +890,7 @@ struct perf_event_context {
 	int				nr_active;
 	int				is_active;
 	int				nr_stat;
+	int				nr_freq;
 	int				rotate_disable;
 	atomic_t			refcount;
 	struct task_struct		*task;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a355ffb0b28f..b3fed52aaf20 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1130,6 +1130,8 @@ event_sched_out(struct perf_event *event,
 	if (!is_software_event(event))
 		cpuctx->active_oncpu--;
 	ctx->nr_active--;
+	if (event->attr.freq && event->attr.sample_freq)
+		ctx->nr_freq--;
 	if (event->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
 }
@@ -1407,6 +1409,8 @@ event_sched_in(struct perf_event *event,
 	if (!is_software_event(event))
 		cpuctx->active_oncpu++;
 	ctx->nr_active++;
+	if (event->attr.freq && event->attr.sample_freq)
+		ctx->nr_freq++;
 
 	if (event->attr.exclusive)
 		cpuctx->exclusive = 1;
@@ -2329,6 +2333,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 	u64 interrupts, now;
 	s64 delta;
 
+	if (!ctx->nr_freq)
+		return;
+
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (event->state != PERF_EVENT_STATE_ACTIVE)
 			continue;
@@ -2384,12 +2391,14 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 {
 	u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
 	struct perf_event_context *ctx = NULL;
-	int rotate = 0, remove = 1;
+	int rotate = 0, remove = 1, freq = 0;
 
 	if (cpuctx->ctx.nr_events) {
 		remove = 0;
 		if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
 			rotate = 1;
+		if (cpuctx->ctx.nr_freq)
+			freq = 1;
 	}
 
 	ctx = cpuctx->task_ctx;
@@ -2397,33 +2406,40 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 		remove = 0;
 		if (ctx->nr_events != ctx->nr_active)
 			rotate = 1;
+		if (ctx->nr_freq)
+			freq = 1;
 	}
 
+	if (!rotate && !freq)
+		goto done;
+
 	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
 	perf_pmu_disable(cpuctx->ctx.pmu);
-	perf_ctx_adjust_freq(&cpuctx->ctx, interval);
-	if (ctx)
-		perf_ctx_adjust_freq(ctx, interval);
 
-	if (!rotate)
-		goto done;
+	if (freq) {
+		perf_ctx_adjust_freq(&cpuctx->ctx, interval);
+		if (ctx)
+			perf_ctx_adjust_freq(ctx, interval);
+	}
 
-	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-	if (ctx)
-		ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+	if (rotate) {
+		cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
+		if (ctx)
+			ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
 
-	rotate_ctx(&cpuctx->ctx);
-	if (ctx)
-		rotate_ctx(ctx);
+		rotate_ctx(&cpuctx->ctx);
+		if (ctx)
+			rotate_ctx(ctx);
+
+		perf_event_sched_in(cpuctx, ctx, current);
+	}
 
-	perf_event_sched_in(cpuctx, ctx, current);
+	perf_pmu_enable(cpuctx->ctx.pmu);
+	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 
 done:
 	if (remove)
 		list_del_init(&cpuctx->rotation_list);
-
-	perf_pmu_enable(cpuctx->ctx.pmu);
-	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 }
 
 void perf_event_task_tick(void)
-- 
cgit v1.2.3


From 1e2ad28f80b4e155678259238f51edebc19e4014 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 18 Nov 2011 12:35:21 +0100
Subject: perf, x86: Implement event scheduler helper functions

This patch introduces x86 perf scheduler code helper functions. We
need this to later add more complex functionality to support
overlapping counter constraints (next patch).

The algorithm is modified so that the range of weight values is now
generated from the constraints. There shouldn't be other functional
changes.

With the helper functions the scheduler is controlled. There are
functions to initialize, traverse the event list, find unused counters
etc. The scheduler keeps its own state.

V3:
* Added macro for_each_set_bit_cont().
* Changed functions interfaces of perf_sched_find_counter() and
  perf_sched_next_event() to use bool as return value.
* Added some comments to make code better understandable.

V4:
* Fix broken event assignment if weight of the first event is not
  wmin (perf_sched_init()).

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1321616122-1533-2-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 185 ++++++++++++++++++++++++++++-----------
 include/linux/bitops.h           |  10 ++-
 2 files changed, 140 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bda212a0010..5a469d3d0c66 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,18 +484,145 @@ static inline int is_x86_event(struct perf_event *event)
 	return event->pmu == &pmu;
 }
 
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+	int	weight;
+	int	event;		/* event index */
+	int	counter;	/* counter index */
+	int	unassigned;	/* number of events to be assigned left */
+	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+struct perf_sched {
+	int			max_weight;
+	int			max_events;
+	struct event_constraint	**constraints;
+	struct sched_state	state;
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+			    int num, int wmin, int wmax)
+{
+	int idx;
+
+	memset(sched, 0, sizeof(*sched));
+	sched->max_events	= num;
+	sched->max_weight	= wmax;
+	sched->constraints	= c;
+
+	for (idx = 0; idx < num; idx++) {
+		if (c[idx]->weight == wmin)
+			break;
+	}
+
+	sched->state.event	= idx;		/* start with min weight */
+	sched->state.weight	= wmin;
+	sched->state.unassigned	= num;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+	struct event_constraint *c;
+	int idx;
+
+	if (!sched->state.unassigned)
+		return false;
+
+	if (sched->state.event >= sched->max_events)
+		return false;
+
+	c = sched->constraints[sched->state.event];
+
+	/* Grab the first unused counter starting with idx */
+	idx = sched->state.counter;
+	for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+		if (!__test_and_set_bit(idx, sched->state.used))
+			break;
+	}
+	sched->state.counter = idx;
+
+	if (idx >= X86_PMC_IDX_MAX)
+		return false;
+
+	return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+	struct event_constraint *c;
+
+	if (!sched->state.unassigned || !--sched->state.unassigned)
+		return false;
+
+	do {
+		/* next event */
+		sched->state.event++;
+		if (sched->state.event >= sched->max_events) {
+			/* next weight */
+			sched->state.event = 0;
+			sched->state.weight++;
+			if (sched->state.weight > sched->max_weight)
+				return false;
+		}
+		c = sched->constraints[sched->state.event];
+	} while (c->weight != sched->state.weight);
+
+	sched->state.counter = 0;	/* start with first counter */
+
+	return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+static int perf_assign_events(struct event_constraint **constraints, int n,
+			      int wmin, int wmax, int *assign)
+{
+	struct perf_sched sched;
+
+	perf_sched_init(&sched, constraints, n, wmin, wmax);
+
+	do {
+		if (!perf_sched_find_counter(&sched))
+			break;	/* failed */
+		if (assign)
+			assign[sched.state.event] = sched.state.counter;
+	} while (perf_sched_next_event(&sched));
+
+	return sched.state.unassigned;
+}
+
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
 	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	int i, j, w, wmax, num = 0;
+	int i, wmin, wmax, num = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
-	for (i = 0; i < n; i++) {
+	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
 		constraints[i] = c;
+		wmin = min(wmin, c->weight);
+		wmax = max(wmax, c->weight);
 	}
 
 	/*
@@ -521,59 +648,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		if (assign)
 			assign[i] = hwc->idx;
 	}
-	if (i == n)
-		goto done;
 
-	/*
-	 * begin slow path
-	 */
+	/* slow path */
+	if (i != n)
+		num = perf_assign_events(constraints, n, wmin, wmax, assign);
 
-	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-
-	/*
-	 * weight = number of possible counters
-	 *
-	 * 1    = most constrained, only works on one counter
-	 * wmax = least constrained, works on any counter
-	 *
-	 * assign events to counters starting with most
-	 * constrained events.
-	 */
-	wmax = x86_pmu.num_counters;
-
-	/*
-	 * when fixed event counters are present,
-	 * wmax is incremented by 1 to account
-	 * for one more choice
-	 */
-	if (x86_pmu.num_counters_fixed)
-		wmax++;
-
-	for (w = 1, num = n; num && w <= wmax; w++) {
-		/* for each event */
-		for (i = 0; num && i < n; i++) {
-			c = constraints[i];
-			hwc = &cpuc->event_list[i]->hw;
-
-			if (c->weight != w)
-				continue;
-
-			for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
-				if (!test_bit(j, used_mask))
-					break;
-			}
-
-			if (j == X86_PMC_IDX_MAX)
-				break;
-
-			__set_bit(j, used_mask);
-
-			if (assign)
-				assign[i] = j;
-			num--;
-		}
-	}
-done:
 	/*
 	 * scheduling failed or is just a simulation,
 	 * free resources if necessary
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3ef66a2a083..3c1063acb2ab 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -22,8 +22,14 @@ extern unsigned long __sw_hweight64(__u64 w);
 #include <asm/bitops.h>
 
 #define for_each_set_bit(bit, addr, size) \
-	for ((bit) = find_first_bit((addr), (size)); \
-	     (bit) < (size); \
+	for ((bit) = find_first_bit((addr), (size));		\
+	     (bit) < (size);					\
+	     (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_cont(bit, addr, size) \
+	for ((bit) = find_next_bit((addr), (size), (bit));	\
+	     (bit) < (size);					\
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
 
 static __inline__ int get_bitmask_order(unsigned int count)
-- 
cgit v1.2.3


From b202952075f62603bea9bfb6ebc6b0420db11949 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 27 Nov 2011 17:59:09 +0200
Subject: perf, core: Rate limit perf_sched_events jump_label patching

jump_lable patching is very expensive operation that involves pausing all
cpus. The patching of perf_sched_events jump_label is easily controllable
from userspace by unprivileged user.

When te user runs a loop like this:

  "while true; do perf stat -e cycles true; done"

... the performance of my test application that just increments a counter
for one second drops by 4%.

This is on a 16 cpu box with my test application using only one of
them. An impact on a real server doing real work will be worse.

Performance of KVM PMU drops nearly 50% due to jump_lable for "perf
record" since KVM PMU implementation creates and destroys perf event
frequently.

This patch introduces a way to rate limit jump_label patching and uses
it to fix the above problem.

I believe that as jump_label use will spread the problem will become more
common and thus solving it in a generic code is appropriate. Also fixing
it in the perf code would result in moving jump_label accounting logic to
perf code with all the ifdefs in case of JUMP_LABEL=n kernel. With this
patch all details are nicely hidden inside jump_label code.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111127155909.GO2557@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/jump_label.h | 24 ++++++++++++++++++++++++
 include/linux/perf_event.h |  6 +++---
 kernel/events/core.c       | 13 ++++++++-----
 kernel/jump_label.c        | 35 +++++++++++++++++++++++++++++++++--
 4 files changed, 68 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 388b0d425b50..a1e7f909c801 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/workqueue.h>
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
 
@@ -14,6 +15,12 @@ struct jump_label_key {
 #endif
 };
 
+struct jump_label_key_deferred {
+	struct jump_label_key key;
+	unsigned long timeout;
+	struct delayed_work work;
+};
+
 # include <asm/jump_label.h>
 # define HAVE_JUMP_LABEL
 #endif	/* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
@@ -51,8 +58,11 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
 extern int jump_label_text_reserved(void *start, void *end);
 extern void jump_label_inc(struct jump_label_key *key);
 extern void jump_label_dec(struct jump_label_key *key);
+extern void jump_label_dec_deferred(struct jump_label_key_deferred *key);
 extern bool jump_label_enabled(struct jump_label_key *key);
 extern void jump_label_apply_nops(struct module *mod);
+extern void jump_label_rate_limit(struct jump_label_key_deferred *key,
+		unsigned long rl);
 
 #else  /* !HAVE_JUMP_LABEL */
 
@@ -68,6 +78,10 @@ static __always_inline void jump_label_init(void)
 {
 }
 
+struct jump_label_key_deferred {
+	struct jump_label_key  key;
+};
+
 static __always_inline bool static_branch(struct jump_label_key *key)
 {
 	if (unlikely(atomic_read(&key->enabled)))
@@ -85,6 +99,11 @@ static inline void jump_label_dec(struct jump_label_key *key)
 	atomic_dec(&key->enabled);
 }
 
+static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key)
+{
+	jump_label_dec(&key->key);
+}
+
 static inline int jump_label_text_reserved(void *start, void *end)
 {
 	return 0;
@@ -102,6 +121,11 @@ static inline int jump_label_apply_nops(struct module *mod)
 {
 	return 0;
 }
+
+static inline void jump_label_rate_limit(struct jump_label_key_deferred *key,
+		unsigned long rl)
+{
+}
 #endif	/* HAVE_JUMP_LABEL */
 
 #endif	/* _LINUX_JUMP_LABEL_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index cb44c9e75660..564769cdb473 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1064,12 +1064,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 	}
 }
 
-extern struct jump_label_key perf_sched_events;
+extern struct jump_label_key_deferred perf_sched_events;
 
 static inline void perf_event_task_sched_in(struct task_struct *prev,
 					    struct task_struct *task)
 {
-	if (static_branch(&perf_sched_events))
+	if (static_branch(&perf_sched_events.key))
 		__perf_event_task_sched_in(prev, task);
 }
 
@@ -1078,7 +1078,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
 {
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
 
-	if (static_branch(&perf_sched_events))
+	if (static_branch(&perf_sched_events.key))
 		__perf_event_task_sched_out(prev, next);
 }
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3c1541d7a53d..3a3b1a18f490 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -128,7 +128,7 @@ enum event_type_t {
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-struct jump_label_key perf_sched_events __read_mostly;
+struct jump_label_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -2748,7 +2748,7 @@ static void free_event(struct perf_event *event)
 
 	if (!event->parent) {
 		if (event->attach_state & PERF_ATTACH_TASK)
-			jump_label_dec(&perf_sched_events);
+			jump_label_dec_deferred(&perf_sched_events);
 		if (event->attr.mmap || event->attr.mmap_data)
 			atomic_dec(&nr_mmap_events);
 		if (event->attr.comm)
@@ -2759,7 +2759,7 @@ static void free_event(struct perf_event *event)
 			put_callchain_buffers();
 		if (is_cgroup_event(event)) {
 			atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
-			jump_label_dec(&perf_sched_events);
+			jump_label_dec_deferred(&perf_sched_events);
 		}
 	}
 
@@ -5784,7 +5784,7 @@ done:
 
 	if (!event->parent) {
 		if (event->attach_state & PERF_ATTACH_TASK)
-			jump_label_inc(&perf_sched_events);
+			jump_label_inc(&perf_sched_events.key);
 		if (event->attr.mmap || event->attr.mmap_data)
 			atomic_inc(&nr_mmap_events);
 		if (event->attr.comm)
@@ -6022,7 +6022,7 @@ SYSCALL_DEFINE5(perf_event_open,
 		 * - that may need work on context switch
 		 */
 		atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
-		jump_label_inc(&perf_sched_events);
+		jump_label_inc(&perf_sched_events.key);
 	}
 
 	/*
@@ -6868,6 +6868,9 @@ void __init perf_event_init(void)
 
 	ret = init_hw_breakpoint();
 	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
+
+	/* do not patch jump label more than once per second */
+	jump_label_rate_limit(&perf_sched_events, HZ);
 }
 
 static int __init perf_event_sysfs_init(void)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 66ff7109f697..51a175ab0a03 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -72,15 +72,46 @@ void jump_label_inc(struct jump_label_key *key)
 	jump_label_unlock();
 }
 
-void jump_label_dec(struct jump_label_key *key)
+static void __jump_label_dec(struct jump_label_key *key,
+		unsigned long rate_limit, struct delayed_work *work)
 {
 	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
 		return;
 
-	jump_label_update(key, JUMP_LABEL_DISABLE);
+	if (rate_limit) {
+		atomic_inc(&key->enabled);
+		schedule_delayed_work(work, rate_limit);
+	} else
+		jump_label_update(key, JUMP_LABEL_DISABLE);
+
 	jump_label_unlock();
 }
 
+static void jump_label_update_timeout(struct work_struct *work)
+{
+	struct jump_label_key_deferred *key =
+		container_of(work, struct jump_label_key_deferred, work.work);
+	__jump_label_dec(&key->key, 0, NULL);
+}
+
+void jump_label_dec(struct jump_label_key *key)
+{
+	__jump_label_dec(key, 0, NULL);
+}
+
+void jump_label_dec_deferred(struct jump_label_key_deferred *key)
+{
+	__jump_label_dec(&key->key, key->timeout, &key->work);
+}
+
+
+void jump_label_rate_limit(struct jump_label_key_deferred *key,
+		unsigned long rl)
+{
+	key->timeout = rl;
+	INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
+}
+
 static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 {
 	if (entry->code <= (unsigned long)end &&
-- 
cgit v1.2.3


From 69e1e811dcc436a6b129dbef273ad9ec22d095ce Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 1 Dec 2011 17:07:33 -0800
Subject: sched, nohz: Track nr_busy_cpus in the sched_group_power

Introduce nr_busy_cpus in the struct sched_group_power [Not in sched_group
because sched groups are duplicated for the SD_OVERLAP scheduler domain]
and for each cpu that enters and exits idle, this parameter will
be updated in each scheduler group of the scheduler domain that this cpu
belongs to.

To avoid the frequent update of this state as the cpu enters
and exits idle, the update of the stat during idle exit is
delayed to the first timer tick that happens after the cpu becomes busy.
This is done using NOHZ_IDLE flag in the struct rq's nohz_flags.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20111202010832.555984323@sbsiddha-desk.sc.intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    |  6 ++++++
 kernel/sched/core.c      |  1 +
 kernel/sched/fair.c      | 31 +++++++++++++++++++++++++++++++
 kernel/sched/sched.h     |  1 +
 kernel/time/tick-sched.c |  9 +++++++++
 5 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8db17b7622ec..295666cb5b86 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -273,9 +273,11 @@ extern int runqueue_is_locked(int cpu);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern void select_nohz_load_balancer(int stop_tick);
+extern void set_cpu_sd_state_idle(void);
 extern int get_nohz_timer_target(void);
 #else
 static inline void select_nohz_load_balancer(int stop_tick) { }
+static inline void set_cpu_sd_state_idle(void);
 #endif
 
 /*
@@ -901,6 +903,10 @@ struct sched_group_power {
 	 * single CPU.
 	 */
 	unsigned int power, power_orig;
+	/*
+	 * Number of busy cpus in this group.
+	 */
+	atomic_t nr_busy_cpus;
 };
 
 struct sched_group {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f1da77b83f3..699ff1499a8a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6024,6 +6024,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 		return;
 
 	update_group_power(sd, cpu);
+	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
 }
 
 int __weak arch_sd_sibling_asym_packing(void)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 50c06b0e9fab..e050563e97a4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4901,6 +4901,36 @@ static void nohz_balancer_kick(int cpu)
 	return;
 }
 
+static inline void set_cpu_sd_state_busy(void)
+{
+	struct sched_domain *sd;
+	int cpu = smp_processor_id();
+
+	if (!test_bit(NOHZ_IDLE, nohz_flags(cpu)))
+		return;
+	clear_bit(NOHZ_IDLE, nohz_flags(cpu));
+
+	rcu_read_lock();
+	for_each_domain(cpu, sd)
+		atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+	rcu_read_unlock();
+}
+
+void set_cpu_sd_state_idle(void)
+{
+	struct sched_domain *sd;
+	int cpu = smp_processor_id();
+
+	if (test_bit(NOHZ_IDLE, nohz_flags(cpu)))
+		return;
+	set_bit(NOHZ_IDLE, nohz_flags(cpu));
+
+	rcu_read_lock();
+	for_each_domain(cpu, sd)
+		atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+	rcu_read_unlock();
+}
+
 /*
  * This routine will try to nominate the ilb (idle load balancing)
  * owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -5135,6 +5165,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
 	* We may be recently in ticked or tickless idle mode. At the first
 	* busy tick after returning from idle, we will update the busy stats.
 	*/
+	set_cpu_sd_state_busy();
 	if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))))
 		clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index cf7d02662bc2..91810f0ee3af 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1069,6 +1069,7 @@ extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
 enum rq_nohz_flag_bits {
 	NOHZ_TICK_STOPPED,
 	NOHZ_BALANCE_KICK,
+	NOHZ_IDLE,
 };
 
 #define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 40420644d0ba..31cc06163ed5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -296,6 +296,15 @@ void tick_nohz_stop_sched_tick(int inidle)
 	cpu = smp_processor_id();
 	ts = &per_cpu(tick_cpu_sched, cpu);
 
+	/*
+ 	 * Update the idle state in the scheduler domain hierarchy
+ 	 * when tick_nohz_stop_sched_tick() is called from the idle loop.
+ 	 * State will be updated to busy during the first busy tick after
+ 	 * exiting idle.
+ 	 */
+	if (inidle)
+		set_cpu_sd_state_idle();
+
 	/*
 	 * Call to tick_nohz_start_idle stops the last_update_time from being
 	 * updated. Thus, it must not be called in the event we are called from
-- 
cgit v1.2.3


From 3292beb340c76884427faa1f5d6085719477d889 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Mon, 28 Nov 2011 14:45:17 -0200
Subject: sched/accounting: Change cpustat fields to an array

This patch changes fields in cpustat from a structure, to an
u64 array. Math gets easier, and the code is more flexible.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Tuner <pjt@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1322498719-2255-2-git-send-email-glommer@parallels.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/s390/appldata/appldata_os.c       | 16 +++----
 arch/x86/include/asm/i387.h            |  2 +-
 drivers/cpufreq/cpufreq_conservative.c | 38 ++++++++---------
 drivers/cpufreq/cpufreq_ondemand.c     | 38 ++++++++---------
 drivers/macintosh/rack-meter.c         |  8 ++--
 fs/proc/stat.c                         | 63 +++++++++++++--------------
 fs/proc/uptime.c                       |  4 +-
 include/linux/kernel_stat.h            | 36 ++++++++++------
 kernel/sched/core.c                    | 78 +++++++++++++++++-----------------
 9 files changed, 142 insertions(+), 141 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 92f1cb745d69..4de031d6b76c 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data)
 	j = 0;
 	for_each_online_cpu(i) {
 		os_data->os_cpu[j].per_cpu_user =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.user);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
 		os_data->os_cpu[j].per_cpu_nice =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
 		os_data->os_cpu[j].per_cpu_system =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.system);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
 		os_data->os_cpu[j].per_cpu_idle =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
 		os_data->os_cpu[j].per_cpu_irq =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
 		os_data->os_cpu[j].per_cpu_softirq =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
 		os_data->os_cpu[j].per_cpu_iowait =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
 		os_data->os_cpu[j].per_cpu_steal =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
 		os_data->os_cpu[j].cpu_id = i;
 		j++;
 	}
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index c9e09ea05644..6919e936345b 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
 #ifdef CONFIG_SMP
 #define safe_address (__per_cpu_offset[0])
 #else
-#define safe_address (kstat_cpu(0).cpustat.user)
+#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
 #endif
 
 /*
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index c97b468ee9f7..118bff73fed3 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -95,27 +95,26 @@ static struct dbs_tuners {
 	.freq_step = 5,
 };
 
-static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
-							cputime64_t *wall)
+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
-	cputime64_t idle_time;
+	u64 idle_time;
 	cputime64_t cur_wall_time;
-	cputime64_t busy_time;
+	u64 busy_time;
 
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
-			kstat_cpu(cpu).cpustat.system);
+	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER] +
+		    kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
 
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 
 	idle_time = cputime64_sub(cur_wall_time, busy_time);
 	if (wall)
-		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+		*wall = jiffies_to_usecs(cur_wall_time);
 
-	return (cputime64_t)jiffies_to_usecs(idle_time);
+	return jiffies_to_usecs(idle_time);
 }
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@@ -272,7 +271,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&dbs_info->prev_cpu_wall);
 		if (dbs_tuners_ins.ignore_nice)
-			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 	}
 	return count;
 }
@@ -362,11 +361,11 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 
 		if (dbs_tuners_ins.ignore_nice) {
-			cputime64_t cur_nice;
+			u64 cur_nice;
 			unsigned long cur_nice_jiffies;
 
-			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
-					 j_dbs_info->prev_cpu_nice);
+			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
+					 j_dbs_info->prev_cpu_nice;
 			/*
 			 * Assumption: nice time between sampling periods will
 			 * be less than 2^32 jiffies for 32 bit sys
@@ -374,7 +373,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 			cur_nice_jiffies = (unsigned long)
 					cputime64_to_jiffies64(cur_nice);
 
-			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
 
@@ -501,10 +500,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 
 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&j_dbs_info->prev_cpu_wall);
-			if (dbs_tuners_ins.ignore_nice) {
+			if (dbs_tuners_ins.ignore_nice)
 				j_dbs_info->prev_cpu_nice =
-						kstat_cpu(j).cpustat.nice;
-			}
+						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 		}
 		this_dbs_info->down_skip = 0;
 		this_dbs_info->requested_freq = policy->cur;
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index fa8af4ebb1d6..f3d327cee43f 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -119,27 +119,26 @@ static struct dbs_tuners {
 	.powersave_bias = 0,
 };
 
-static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
-							cputime64_t *wall)
+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
-	cputime64_t idle_time;
+	u64 idle_time;
 	cputime64_t cur_wall_time;
-	cputime64_t busy_time;
+	u64 busy_time;
 
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
-			kstat_cpu(cpu).cpustat.system);
+	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER] +
+		    kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
 
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 
 	idle_time = cputime64_sub(cur_wall_time, busy_time);
 	if (wall)
-		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+		*wall = jiffies_to_usecs(cur_wall_time);
 
-	return (cputime64_t)jiffies_to_usecs(idle_time);
+	return jiffies_to_usecs(idle_time);
 }
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@@ -345,7 +344,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&dbs_info->prev_cpu_wall);
 		if (dbs_tuners_ins.ignore_nice)
-			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 
 	}
 	return count;
@@ -455,11 +454,11 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 		j_dbs_info->prev_cpu_iowait = cur_iowait_time;
 
 		if (dbs_tuners_ins.ignore_nice) {
-			cputime64_t cur_nice;
+			u64 cur_nice;
 			unsigned long cur_nice_jiffies;
 
-			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
-					 j_dbs_info->prev_cpu_nice);
+			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
+					 j_dbs_info->prev_cpu_nice;
 			/*
 			 * Assumption: nice time between sampling periods will
 			 * be less than 2^32 jiffies for 32 bit sys
@@ -467,7 +466,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 			cur_nice_jiffies = (unsigned long)
 					cputime64_to_jiffies64(cur_nice);
 
-			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
 
@@ -646,10 +645,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 
 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&j_dbs_info->prev_cpu_wall);
-			if (dbs_tuners_ins.ignore_nice) {
+			if (dbs_tuners_ins.ignore_nice)
 				j_dbs_info->prev_cpu_nice =
-						kstat_cpu(j).cpustat.nice;
-			}
+						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 		}
 		this_dbs_info->cpu = cpu;
 		this_dbs_info->rate_mult = 1;
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index 2637c139777b..66d7f1c7baa1 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -81,13 +81,13 @@ static int rackmeter_ignore_nice;
  */
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
 {
-	cputime64_t retval;
+	u64 retval;
 
-	retval = cputime64_add(kstat_cpu(cpu).cpustat.idle,
-			kstat_cpu(cpu).cpustat.iowait);
+	retval = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE] +
+		 kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
 
 	if (rackmeter_ignore_nice)
-		retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice);
+		retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 
 	return retval;
 }
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 42b274da92c3..8a6ab666e9f8 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -22,29 +22,27 @@
 #define arch_idle_time(cpu) 0
 #endif
 
-static cputime64_t get_idle_time(int cpu)
+static u64 get_idle_time(int cpu)
 {
-	u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
-	cputime64_t idle;
+	u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
 
 	if (idle_time == -1ULL) {
 		/* !NO_HZ so we can rely on cpustat.idle */
-		idle = kstat_cpu(cpu).cpustat.idle;
-		idle = cputime64_add(idle, arch_idle_time(cpu));
+		idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+		idle += arch_idle_time(cpu);
 	} else
 		idle = usecs_to_cputime(idle_time);
 
 	return idle;
 }
 
-static cputime64_t get_iowait_time(int cpu)
+static u64 get_iowait_time(int cpu)
 {
-	u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL);
-	cputime64_t iowait;
+	u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
 
 	if (iowait_time == -1ULL)
 		/* !NO_HZ so we can rely on cpustat.iowait */
-		iowait = kstat_cpu(cpu).cpustat.iowait;
+		iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
 	else
 		iowait = usecs_to_cputime(iowait_time);
 
@@ -55,33 +53,30 @@ static int show_stat(struct seq_file *p, void *v)
 {
 	int i, j;
 	unsigned long jif;
-	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
-	cputime64_t guest, guest_nice;
+	u64 user, nice, system, idle, iowait, irq, softirq, steal;
+	u64 guest, guest_nice;
 	u64 sum = 0;
 	u64 sum_softirq = 0;
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
 	struct timespec boottime;
 
 	user = nice = system = idle = iowait =
-		irq = softirq = steal = cputime64_zero;
-	guest = guest_nice = cputime64_zero;
+		irq = softirq = steal = 0;
+	guest = guest_nice = 0;
 	getboottime(&boottime);
 	jif = boottime.tv_sec;
 
 	for_each_possible_cpu(i) {
-		user = cputime64_add(user, kstat_cpu(i).cpustat.user);
-		nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
-		system = cputime64_add(system, kstat_cpu(i).cpustat.system);
-		idle = cputime64_add(idle, get_idle_time(i));
-		iowait = cputime64_add(iowait, get_iowait_time(i));
-		irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
-		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
-		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
-		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		guest_nice = cputime64_add(guest_nice,
-			kstat_cpu(i).cpustat.guest_nice);
-		sum += kstat_cpu_irqs_sum(i);
-		sum += arch_irq_stat_cpu(i);
+		user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
+		nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
+		system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
+		idle += get_idle_time(i);
+		iowait += get_iowait_time(i);
+		irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
+		softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
+		steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
+		guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
+		guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
 
 		for (j = 0; j < NR_SOFTIRQS; j++) {
 			unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
@@ -106,16 +101,16 @@ static int show_stat(struct seq_file *p, void *v)
 		(unsigned long long)cputime64_to_clock_t(guest_nice));
 	for_each_online_cpu(i) {
 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
-		user = kstat_cpu(i).cpustat.user;
-		nice = kstat_cpu(i).cpustat.nice;
-		system = kstat_cpu(i).cpustat.system;
+		user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
+		nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
+		system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
 		idle = get_idle_time(i);
 		iowait = get_iowait_time(i);
-		irq = kstat_cpu(i).cpustat.irq;
-		softirq = kstat_cpu(i).cpustat.softirq;
-		steal = kstat_cpu(i).cpustat.steal;
-		guest = kstat_cpu(i).cpustat.guest;
-		guest_nice = kstat_cpu(i).cpustat.guest_nice;
+		irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
+		softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
+		steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
+		guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
+		guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
 		seq_printf(p,
 			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
 			"%llu\n",
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 766b1d456050..0fb22e464e72 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -12,10 +12,10 @@ static int uptime_proc_show(struct seq_file *m, void *v)
 	struct timespec uptime;
 	struct timespec idle;
 	int i;
-	cputime_t idletime = cputime_zero;
+	u64 idletime = 0;
 
 	for_each_possible_cpu(i)
-		idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
+		idletime += kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
 
 	do_posix_clock_monotonic_gettime(&uptime);
 	monotonic_to_bootbased(&uptime);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 0cce2db580c3..2fbd9053c2df 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -6,6 +6,7 @@
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <asm/irq.h>
 #include <asm/cputime.h>
 
@@ -15,21 +16,25 @@
  * used by rstatd/perfmeter
  */
 
-struct cpu_usage_stat {
-	cputime64_t user;
-	cputime64_t nice;
-	cputime64_t system;
-	cputime64_t softirq;
-	cputime64_t irq;
-	cputime64_t idle;
-	cputime64_t iowait;
-	cputime64_t steal;
-	cputime64_t guest;
-	cputime64_t guest_nice;
+enum cpu_usage_stat {
+	CPUTIME_USER,
+	CPUTIME_NICE,
+	CPUTIME_SYSTEM,
+	CPUTIME_SOFTIRQ,
+	CPUTIME_IRQ,
+	CPUTIME_IDLE,
+	CPUTIME_IOWAIT,
+	CPUTIME_STEAL,
+	CPUTIME_GUEST,
+	CPUTIME_GUEST_NICE,
+	NR_STATS,
+};
+
+struct kernel_cpustat {
+	u64 cpustat[NR_STATS];
 };
 
 struct kernel_stat {
-	struct cpu_usage_stat	cpustat;
 #ifndef CONFIG_GENERIC_HARDIRQS
        unsigned int irqs[NR_IRQS];
 #endif
@@ -38,10 +43,13 @@ struct kernel_stat {
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
+DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
 
-#define kstat_cpu(cpu)	per_cpu(kstat, cpu)
 /* Must have preemption disabled for this to be meaningful. */
-#define kstat_this_cpu	__get_cpu_var(kstat)
+#define kstat_this_cpu (&__get_cpu_var(kstat))
+#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat))
+#define kstat_cpu(cpu) per_cpu(kstat, cpu)
+#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
 
 extern unsigned long long nr_context_switches(void);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 699ff1499a8a..dbbe35ff93fc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -896,14 +896,14 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 static int irqtime_account_hi_update(void)
 {
-	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
 	unsigned long flags;
 	u64 latest_ns;
 	int ret = 0;
 
 	local_irq_save(flags);
 	latest_ns = this_cpu_read(cpu_hardirq_time);
-	if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
+	if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat[CPUTIME_IRQ]))
 		ret = 1;
 	local_irq_restore(flags);
 	return ret;
@@ -911,14 +911,14 @@ static int irqtime_account_hi_update(void)
 
 static int irqtime_account_si_update(void)
 {
-	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
 	unsigned long flags;
 	u64 latest_ns;
 	int ret = 0;
 
 	local_irq_save(flags);
 	latest_ns = this_cpu_read(cpu_softirq_time);
-	if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
+	if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat[CPUTIME_SOFTIRQ]))
 		ret = 1;
 	local_irq_restore(flags);
 	return ret;
@@ -2500,8 +2500,10 @@ unlock:
 #endif
 
 DEFINE_PER_CPU(struct kernel_stat, kstat);
+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
 
 EXPORT_PER_CPU_SYMBOL(kstat);
+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
 
 /*
  * Return any ns on the sched_clock that have not yet been accounted in
@@ -2563,8 +2565,9 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 void account_user_time(struct task_struct *p, cputime_t cputime,
 		       cputime_t cputime_scaled)
 {
-	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	cputime64_t tmp;
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	u64 tmp;
+	int index;
 
 	/* Add user time to process. */
 	p->utime = cputime_add(p->utime, cputime);
@@ -2573,10 +2576,9 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
-	if (TASK_NICE(p) > 0)
-		cpustat->nice = cputime64_add(cpustat->nice, tmp);
-	else
-		cpustat->user = cputime64_add(cpustat->user, tmp);
+
+	index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
+	cpustat[index] += tmp;
 
 	cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
 	/* Account for user time used */
@@ -2592,8 +2594,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 static void account_guest_time(struct task_struct *p, cputime_t cputime,
 			       cputime_t cputime_scaled)
 {
-	cputime64_t tmp;
-	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+	u64 tmp;
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
 
 	tmp = cputime_to_cputime64(cputime);
 
@@ -2605,11 +2607,11 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
 
 	/* Add guest time to cpustat. */
 	if (TASK_NICE(p) > 0) {
-		cpustat->nice = cputime64_add(cpustat->nice, tmp);
-		cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
+		cpustat[CPUTIME_NICE] += tmp;
+		cpustat[CPUTIME_GUEST_NICE] += tmp;
 	} else {
-		cpustat->user = cputime64_add(cpustat->user, tmp);
-		cpustat->guest = cputime64_add(cpustat->guest, tmp);
+		cpustat[CPUTIME_USER] += tmp;
+		cpustat[CPUTIME_GUEST] += tmp;
 	}
 }
 
@@ -2622,9 +2624,10 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
  */
 static inline
 void __account_system_time(struct task_struct *p, cputime_t cputime,
-			cputime_t cputime_scaled, cputime64_t *target_cputime64)
+			cputime_t cputime_scaled, int index)
 {
-	cputime64_t tmp = cputime_to_cputime64(cputime);
+	u64 tmp = cputime_to_cputime64(cputime);
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
 
 	/* Add system time to process. */
 	p->stime = cputime_add(p->stime, cputime);
@@ -2632,7 +2635,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
 	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
-	*target_cputime64 = cputime64_add(*target_cputime64, tmp);
+	cpustat[index] += tmp;
 	cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
 
 	/* Account for system time used */
@@ -2649,8 +2652,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
 void account_system_time(struct task_struct *p, int hardirq_offset,
 			 cputime_t cputime, cputime_t cputime_scaled)
 {
-	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	cputime64_t *target_cputime64;
+	int index;
 
 	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
 		account_guest_time(p, cputime, cputime_scaled);
@@ -2658,13 +2660,13 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	}
 
 	if (hardirq_count() - hardirq_offset)
-		target_cputime64 = &cpustat->irq;
+		index = CPUTIME_IRQ;
 	else if (in_serving_softirq())
-		target_cputime64 = &cpustat->softirq;
+		index = CPUTIME_SOFTIRQ;
 	else
-		target_cputime64 = &cpustat->system;
+		index = CPUTIME_SYSTEM;
 
-	__account_system_time(p, cputime, cputime_scaled, target_cputime64);
+	__account_system_time(p, cputime, cputime_scaled, index);
 }
 
 /*
@@ -2673,10 +2675,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
  */
 void account_steal_time(cputime_t cputime)
 {
-	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	cputime64_t cputime64 = cputime_to_cputime64(cputime);
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	u64 cputime64 = cputime_to_cputime64(cputime);
 
-	cpustat->steal = cputime64_add(cpustat->steal, cputime64);
+	cpustat[CPUTIME_STEAL] += cputime64;
 }
 
 /*
@@ -2685,14 +2687,14 @@ void account_steal_time(cputime_t cputime)
  */
 void account_idle_time(cputime_t cputime)
 {
-	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	cputime64_t cputime64 = cputime_to_cputime64(cputime);
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	u64 cputime64 = cputime_to_cputime64(cputime);
 	struct rq *rq = this_rq();
 
 	if (atomic_read(&rq->nr_iowait) > 0)
-		cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
+		cpustat[CPUTIME_IOWAIT] += cputime64;
 	else
-		cpustat->idle = cputime64_add(cpustat->idle, cputime64);
+		cpustat[CPUTIME_IDLE] += cputime64;
 }
 
 static __always_inline bool steal_account_process_tick(void)
@@ -2742,16 +2744,16 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 						struct rq *rq)
 {
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
-	cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
-	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+	u64 tmp = cputime_to_cputime64(cputime_one_jiffy);
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
 
 	if (steal_account_process_tick())
 		return;
 
 	if (irqtime_account_hi_update()) {
-		cpustat->irq = cputime64_add(cpustat->irq, tmp);
+		cpustat[CPUTIME_IRQ] += tmp;
 	} else if (irqtime_account_si_update()) {
-		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+		cpustat[CPUTIME_SOFTIRQ] += tmp;
 	} else if (this_cpu_ksoftirqd() == p) {
 		/*
 		 * ksoftirqd time do not get accounted in cpu_softirq_time.
@@ -2759,7 +2761,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 		 * Also, p->stime needs to be updated for ksoftirqd.
 		 */
 		__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
-					&cpustat->softirq);
+					CPUTIME_SOFTIRQ);
 	} else if (user_tick) {
 		account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
 	} else if (p == rq->idle) {
@@ -2768,7 +2770,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 		account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
 	} else {
 		__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
-					&cpustat->system);
+					CPUTIME_SYSTEM);
 	}
 }
 
-- 
cgit v1.2.3


From 54c29c635ae91f5d75ced7bffeaa77ba37ca02bb Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 29 Nov 2011 17:05:11 +0100
Subject: mm, x86: Remove debug_pagealloc_enabled

When (no)bootmem finish operation, it pass pages to buddy
allocator. Since debug_pagealloc_enabled is not set, we will do
not protect pages, what is not what we want with
CONFIG_DEBUG_PAGEALLOC=y.

To fix remove debug_pagealloc_enabled. That variable was
introduced by commit 12d6f21e "x86: do not PSE on
CONFIG_DEBUG_PAGEALLOC=y" to get more CPA (change page
attribude) code testing. But currently we have CONFIG_CPA_DEBUG,
which test CPA.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1322582711-14571-1-git-send-email-sgruszka@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c |  6 ------
 include/linux/mm.h     | 10 ----------
 init/main.c            |  5 -----
 mm/debug-pagealloc.c   |  3 ---
 4 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f9e526742fa1..5031eefa051f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1333,12 +1333,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 					   numpages * PAGE_SIZE);
 	}
 
-	/*
-	 * If page allocator is not up yet then do not call c_p_a():
-	 */
-	if (!debug_pagealloc_enabled)
-		return;
-
 	/*
 	 * The return value is ignored as the calls cannot fail.
 	 * Large pages for identity mappings are not used at boot time
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3dc3a8c2c485..0a22db144753 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1537,23 +1537,13 @@ static inline void vm_stat_account(struct mm_struct *mm,
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-extern int debug_pagealloc_enabled;
-
 extern void kernel_map_pages(struct page *page, int numpages, int enable);
-
-static inline void enable_debug_pagealloc(void)
-{
-	debug_pagealloc_enabled = 1;
-}
 #ifdef CONFIG_HIBERNATION
 extern bool kernel_page_present(struct page *page);
 #endif /* CONFIG_HIBERNATION */
 #else
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable) {}
-static inline void enable_debug_pagealloc(void)
-{
-}
 #ifdef CONFIG_HIBERNATION
 static inline bool kernel_page_present(struct page *page) { return true; }
 #endif /* CONFIG_HIBERNATION */
diff --git a/init/main.c b/init/main.c
index 217ed23e9487..99c4ba30ba7e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -282,10 +282,6 @@ static int __init unknown_bootoption(char *param, char *val)
 	return 0;
 }
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-int __read_mostly debug_pagealloc_enabled = 0;
-#endif
-
 static int __init init_setup(char *str)
 {
 	unsigned int i;
@@ -597,7 +593,6 @@ asmlinkage void __init start_kernel(void)
 	}
 #endif
 	page_cgroup_init();
-	enable_debug_pagealloc();
 	debug_objects_mem_init();
 	kmemleak_init();
 	setup_per_cpu_pageset();
diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c
index 7cea557407f4..789ff70c8a4a 100644
--- a/mm/debug-pagealloc.c
+++ b/mm/debug-pagealloc.c
@@ -95,9 +95,6 @@ static void unpoison_pages(struct page *page, int n)
 
 void kernel_map_pages(struct page *page, int numpages, int enable)
 {
-	if (!debug_pagealloc_enabled)
-		return;
-
 	if (enable)
 		unpoison_pages(page, numpages);
 	else
-- 
cgit v1.2.3


From f21ffe9f6da6d3a69c518b7345c198d48d941c34 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 11 Aug 2011 16:50:56 -0400
Subject: swiotlb: Expose swiotlb_nr_tlb function to modules

As a mechanism to detect whether SWIOTLB is enabled or not.
We also fix the spelling - it was swioltb instead of
swiotlb.

CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
[v1: Ripped out swiotlb_enabled]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/swiotlb-xen.c | 2 +-
 include/linux/swiotlb.h   | 2 +-
 lib/swiotlb.c             | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 8e964b91c447..4864e5d72e72 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -153,7 +153,7 @@ void __init xen_swiotlb_init(int verbose)
 	char *m = NULL;
 	unsigned int repeat = 3;
 
-	nr_tbl = swioltb_nr_tbl();
+	nr_tbl = swiotlb_nr_tbl();
 	if (nr_tbl)
 		xen_io_tlb_nslabs = nr_tbl;
 	else {
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 445702c60d04..e872526fdc5f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -24,7 +24,7 @@ extern int swiotlb_force;
 
 extern void swiotlb_init(int verbose);
 extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
-extern unsigned long swioltb_nr_tbl(void);
+extern unsigned long swiotlb_nr_tbl(void);
 
 /*
  * Enumeration for sync targets
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 99093b396145..058935ef3975 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -110,11 +110,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-unsigned long swioltb_nr_tbl(void)
+unsigned long swiotlb_nr_tbl(void)
 {
 	return io_tlb_nslabs;
 }
-
+EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
@@ -321,6 +321,7 @@ void __init swiotlb_free(void)
 		free_bootmem_late(__pa(io_tlb_start),
 				  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	}
+	io_tlb_nslabs = 0;
 }
 
 static int is_swiotlb_buffer(phys_addr_t paddr)
-- 
cgit v1.2.3


From 7f1fb60c4fc9fb29fbb406ac8c4cfb4e59e168d6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 6 Dec 2011 07:56:43 +0000
Subject: inet_diag: Partly rename inet_ to sock_

The ultimate goal is to get the sock_diag module, that works in
family+protocol terms. Currently this is suitable to do on the
inet_diag basis, so rename parts of the code. It will be moved
to sock_diag.c later.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  4 +++-
 net/dccp/diag.c          |  2 +-
 net/ipv4/inet_diag.c     | 33 +++++++++++++++++++--------------
 net/ipv4/tcp_diag.c      |  2 +-
 security/selinux/hooks.c |  2 +-
 5 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 8374d2967362..52e48959cfa1 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -8,7 +8,7 @@
 #define NETLINK_UNUSED		1	/* Unused number				*/
 #define NETLINK_USERSOCK	2	/* Reserved for user mode socket protocols 	*/
 #define NETLINK_FIREWALL	3	/* Firewalling hook				*/
-#define NETLINK_INET_DIAG	4	/* INET socket monitoring			*/
+#define NETLINK_SOCK_DIAG	4	/* socket monitoring				*/
 #define NETLINK_NFLOG		5	/* netfilter/iptables ULOG */
 #define NETLINK_XFRM		6	/* ipsec */
 #define NETLINK_SELINUX		7	/* SELinux event notifications */
@@ -27,6 +27,8 @@
 #define NETLINK_RDMA		20
 #define NETLINK_CRYPTO		21	/* Crypto layer */
 
+#define NETLINK_INET_DIAG	NETLINK_SOCK_DIAG
+
 #define MAX_LINKS 32		
 
 struct sockaddr_nl {
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index b21f261da75e..d92ba7d1c351 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -71,4 +71,4 @@ module_exit(dccp_diag_fini);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
 MODULE_DESCRIPTION("DCCP inet_diag handler");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, DCCPDIAG_GETSOCK);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, DCCPDIAG_GETSOCK);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 0a46c541b477..a5f3c40ac3c5 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -45,7 +45,7 @@ struct inet_diag_entry {
 	u16 userlocks;
 };
 
-static struct sock *idiagnl;
+static struct sock *sdiagnl;
 
 #define INET_DIAG_PUT(skb, attrtype, attrlen) \
 	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
@@ -56,7 +56,7 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int type)
 {
 	if (!inet_diag_table[type])
 		request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-			       NETLINK_INET_DIAG, type);
+			       NETLINK_SOCK_DIAG, type);
 
 	mutex_lock(&inet_diag_table_mutex);
 	if (!inet_diag_table[type])
@@ -312,7 +312,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 		kfree_skb(rep);
 		goto out;
 	}
-	err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid,
+	err = netlink_unicast(sdiagnl, rep, NETLINK_CB(in_skb).pid,
 			      MSG_DONTWAIT);
 	if (err > 0)
 		err = 0;
@@ -870,20 +870,25 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 				return -EINVAL;
 		}
 
-		return netlink_dump_start(idiagnl, skb, nlh,
+		return netlink_dump_start(sdiagnl, skb, nlh,
 					  inet_diag_dump, NULL, 0);
 	}
 
 	return inet_diag_get_exact(skb, nlh);
 }
 
-static DEFINE_MUTEX(inet_diag_mutex);
+static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	return inet_diag_rcv_msg(skb, nlh);
+}
+
+static DEFINE_MUTEX(sock_diag_mutex);
 
-static void inet_diag_rcv(struct sk_buff *skb)
+static void sock_diag_rcv(struct sk_buff *skb)
 {
-	mutex_lock(&inet_diag_mutex);
-	netlink_rcv_skb(skb, &inet_diag_rcv_msg);
-	mutex_unlock(&inet_diag_mutex);
+	mutex_lock(&sock_diag_mutex);
+	netlink_rcv_skb(skb, &sock_diag_rcv_msg);
+	mutex_unlock(&sock_diag_mutex);
 }
 
 int inet_diag_register(const struct inet_diag_handler *h)
@@ -929,9 +934,9 @@ static int __init inet_diag_init(void)
 	if (!inet_diag_table)
 		goto out;
 
-	idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0,
-					inet_diag_rcv, NULL, THIS_MODULE);
-	if (idiagnl == NULL)
+	sdiagnl = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0,
+					sock_diag_rcv, NULL, THIS_MODULE);
+	if (sdiagnl == NULL)
 		goto out_free_table;
 	err = 0;
 out:
@@ -943,11 +948,11 @@ out_free_table:
 
 static void __exit inet_diag_exit(void)
 {
-	netlink_kernel_release(idiagnl);
+	netlink_kernel_release(sdiagnl);
 	kfree(inet_diag_table);
 }
 
 module_init(inet_diag_init);
 module_exit(inet_diag_exit);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG);
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 939edb3b8e4d..9e276b868ce8 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -54,4 +54,4 @@ static void __exit tcp_diag_exit(void)
 module_init(tcp_diag_init);
 module_exit(tcp_diag_exit);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, TCPDIAG_GETSOCK);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index cca09bb46502..86305c2f555a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1090,7 +1090,7 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
 			return SECCLASS_NETLINK_ROUTE_SOCKET;
 		case NETLINK_FIREWALL:
 			return SECCLASS_NETLINK_FIREWALL_SOCKET;
-		case NETLINK_INET_DIAG:
+		case NETLINK_SOCK_DIAG:
 			return SECCLASS_NETLINK_TCPDIAG_SOCKET;
 		case NETLINK_NFLOG:
 			return SECCLASS_NETLINK_NFLOG_SOCKET;
-- 
cgit v1.2.3


From 8d34172dfdb762a306cdf58b547aa10d798622ec Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 6 Dec 2011 07:57:06 +0000
Subject: sock_diag: Introduce new message type

This type will run the family+protocol based socket dumping.
Also prepare the stub function for it.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  1 +
 net/ipv4/inet_diag.c      | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index abf5028db981..f7baaf637426 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -6,6 +6,7 @@
 /* Just some random number */
 #define TCPDIAG_GETSOCK 18
 #define DCCPDIAG_GETSOCK 19
+#define SOCK_DIAG_BY_FAMILY 20
 
 #define INET_DIAG_GETSOCK_MAX 24
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index a5f3c40ac3c5..eb6bdfa9480c 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -850,7 +850,7 @@ unlock:
 	return skb->len;
 }
 
-static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	int hdrlen = sizeof(struct inet_diag_req);
 
@@ -877,9 +877,22 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return inet_diag_get_exact(skb, nlh);
 }
 
+static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	return -EOPNOTSUPP;
+}
+
 static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	return inet_diag_rcv_msg(skb, nlh);
+	switch (nlh->nlmsg_type) {
+	case TCPDIAG_GETSOCK:
+	case DCCPDIAG_GETSOCK:
+		return inet_diag_rcv_msg_compat(skb, nlh);
+	case SOCK_DIAG_BY_FAMILY:
+		return __sock_diag_rcv_msg(skb, nlh);
+	default:
+		return -EINVAL;
+	}
 }
 
 static DEFINE_MUTEX(sock_diag_mutex);
-- 
cgit v1.2.3


From d366477a52f1df29fa066ffb18e4e6101ee2ad04 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 6 Dec 2011 07:58:03 +0000
Subject: sock_diag: Initial skeleton

When receiving the SOCK_DIAG_BY_FAMILY message we have to find the
handler for provided family and pass the nl message to it.

This patch describes an infrastructure to work with such nandlers
and implements stubs for AF_INET(6) ones.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h |  23 +++++++++++
 net/ipv4/inet_diag.c      | 102 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 123 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/sock_diag.h

(limited to 'include/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
new file mode 100644
index 000000000000..ba4933b1213b
--- /dev/null
+++ b/include/linux/sock_diag.h
@@ -0,0 +1,23 @@
+#ifndef __SOCK_DIAG_H__
+#define __SOCK_DIAG_H__
+struct sk_buff;
+struct nlmsghdr;
+
+struct sock_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+};
+
+struct sock_diag_handler {
+	__u8 family;
+	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
+};
+
+int sock_diag_register(struct sock_diag_handler *h);
+void sock_diag_unregister(struct sock_diag_handler *h);
+
+void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
+void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
+
+extern struct sock *sock_diag_nlsk;
+#endif
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 58caecc343b1..877875ea3d71 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -33,6 +33,7 @@
 #include <linux/stddef.h>
 
 #include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
 
 static const struct inet_diag_handler **inet_diag_table;
 
@@ -887,9 +888,91 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return inet_diag_get_exact(skb, nlh);
 }
 
+static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct inet_diag_req);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP) {
+		return -EAFNOSUPPORT;
+	}
+
+	return -EAFNOSUPPORT;
+}
+
+static struct sock_diag_handler inet_diag_handler = {
+	.family = AF_INET,
+	.dump = inet_diag_handler_dump,
+};
+
+static struct sock_diag_handler inet6_diag_handler = {
+	.family = AF_INET6,
+	.dump = inet_diag_handler_dump,
+};
+
+static struct sock_diag_handler *sock_diag_handlers[AF_MAX];
+static DEFINE_MUTEX(sock_diag_table_mutex);
+
+int sock_diag_register(struct sock_diag_handler *hndl)
+{
+	int err = 0;
+
+	if (hndl->family > AF_MAX)
+		return -EINVAL;
+
+	mutex_lock(&sock_diag_table_mutex);
+	if (sock_diag_handlers[hndl->family])
+		err = -EBUSY;
+	else
+		sock_diag_handlers[hndl->family] = hndl;
+	mutex_unlock(&sock_diag_table_mutex);
+
+	return err;
+}
+
+void sock_diag_unregister(struct sock_diag_handler *hnld)
+{
+	int family = hnld->family;
+
+	if (family > AF_MAX)
+		return;
+
+	mutex_lock(&sock_diag_table_mutex);
+	BUG_ON(sock_diag_handlers[family] != hnld);
+	sock_diag_handlers[family] = NULL;
+	mutex_unlock(&sock_diag_table_mutex);
+}
+
+static inline struct sock_diag_handler *sock_diag_lock_handler(int family)
+{
+	mutex_lock(&sock_diag_table_mutex);
+	return sock_diag_handlers[family];
+}
+
+static inline void sock_diag_unlock_handler(struct sock_diag_handler *h)
+{
+	mutex_unlock(&sock_diag_table_mutex);
+}
+
 static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	return -EOPNOTSUPP;
+	int err;
+	struct sock_diag_req *req = NLMSG_DATA(nlh);
+	struct sock_diag_handler *hndl;
+
+	if (nlmsg_len(nlh) < sizeof(*req))
+		return -EINVAL;
+
+	hndl = sock_diag_lock_handler(req->sdiag_family);
+	if (hndl == NULL)
+		err = -ENOENT;
+	else
+		err = hndl->dump(skb, nlh);
+	sock_diag_unlock_handler(hndl);
+
+	return err;
 }
 
 static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -961,9 +1044,22 @@ static int __init inet_diag_init(void)
 					sock_diag_rcv, NULL, THIS_MODULE);
 	if (sdiagnl == NULL)
 		goto out_free_table;
-	err = 0;
+
+	err = sock_diag_register(&inet_diag_handler);
+	if (err)
+		goto out_free_nl;
+
+	err = sock_diag_register(&inet6_diag_handler);
+	if (err)
+		goto out_free_inet;
+
 out:
 	return err;
+
+out_free_inet:
+	sock_diag_unregister(&inet_diag_handler);
+out_free_nl:
+	netlink_kernel_release(sdiagnl);
 out_free_table:
 	kfree(inet_diag_table);
 	goto out;
@@ -971,6 +1067,8 @@ out_free_table:
 
 static void __exit inet_diag_exit(void)
 {
+	sock_diag_unregister(&inet6_diag_handler);
+	sock_diag_unregister(&inet_diag_handler);
 	netlink_kernel_release(sdiagnl);
 	kfree(inet_diag_table);
 }
-- 
cgit v1.2.3


From 126fdc3249c9ced2a0d20f916858fec26a445f61 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 6 Dec 2011 07:58:21 +0000
Subject: inet_diag: Introduce new inet_diag_req header

This one coinsides with the sock_diag_req in the beginning and
contains only used fields from its previous analogue.

The existing code is patched to use the _compat version of it
for now.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 11 ++++++++++-
 net/ipv4/inet_diag.c      | 14 +++++++-------
 2 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index f7baaf637426..defe8ff36df8 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -23,7 +23,7 @@ struct inet_diag_sockid {
 
 /* Request structure */
 
-struct inet_diag_req {
+struct inet_diag_req_compat {
 	__u8	idiag_family;		/* Family of addresses. */
 	__u8	idiag_src_len;
 	__u8	idiag_dst_len;
@@ -35,6 +35,15 @@ struct inet_diag_req {
 	__u32	idiag_dbs;		/* Tables to dump (NI) */
 };
 
+struct inet_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+	__u8	idiag_ext;
+	__u8	pad;
+	__u32	idiag_states;
+	struct inet_diag_sockid id;
+};
+
 enum {
 	INET_DIAG_REQ_NONE,
 	INET_DIAG_REQ_BYTECODE,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 877875ea3d71..f37b1284b46b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -265,7 +265,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 {
 	int err;
 	struct sock *sk;
-	struct inet_diag_req *req = NLMSG_DATA(nlh);
+	struct inet_diag_req_compat *req = NLMSG_DATA(nlh);
 	struct sk_buff *rep;
 	struct inet_hashinfo *hashinfo;
 	const struct inet_diag_handler *handler;
@@ -504,7 +504,7 @@ static int inet_csk_diag_dump(struct sock *sk,
 			      struct netlink_callback *cb,
 			      const struct nlattr *bc)
 {
-	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req_compat *r = NLMSG_DATA(cb->nlh);
 
 	if (bc != NULL) {
 		struct inet_diag_entry entry;
@@ -541,7 +541,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 			       struct netlink_callback *cb,
 			       const struct nlattr *bc)
 {
-	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req_compat *r = NLMSG_DATA(cb->nlh);
 
 	if (bc != NULL) {
 		struct inet_diag_entry entry;
@@ -629,7 +629,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 			       const struct nlattr *bc)
 {
 	struct inet_diag_entry entry;
-	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req_compat *r = NLMSG_DATA(cb->nlh);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt;
 	struct inet_sock *inet = inet_sk(sk);
@@ -712,12 +712,12 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int i, num;
 	int s_i, s_num;
-	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req_compat *r = NLMSG_DATA(cb->nlh);
 	const struct inet_diag_handler *handler;
 	struct inet_hashinfo *hashinfo;
 	const struct nlattr *bc = NULL;
 
-	if (nlmsg_attrlen(cb->nlh, sizeof(struct inet_diag_req)))
+	if (nlmsg_attrlen(cb->nlh, sizeof(struct inet_diag_req_compat)))
 		bc = nlmsg_find_attr(cb->nlh, sizeof(*r), INET_DIAG_REQ_BYTECODE);
 
 	handler = inet_diag_lock_handler(inet_diag_type2proto(cb->nlh->nlmsg_type));
@@ -863,7 +863,7 @@ unlock:
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	int hdrlen = sizeof(struct inet_diag_req);
+	int hdrlen = sizeof(struct inet_diag_req_compat);
 
 	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
 	    nlmsg_len(nlh) < hdrlen)
-- 
cgit v1.2.3


From ac99b862fb98a36929831791da31714f709c2aa8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 6 Jul 2011 14:20:14 +0200
Subject: jump_label: Provide jump_label_key initializers

Provide two initializers for jump_label_key that initialize it enabled
or disabled. Also modify all jump_label code to allow for jump_labels to be
initialized enabled.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jason Baron <jbaron@redhat.com>
Link: http://lkml.kernel.org/n/tip-p40e3yj21b68y03z1yv825e7@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/jump_label.h |  3 +++
 kernel/jump_label.c        | 12 ++++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a1e7f909c801..5ce8b140428f 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -128,4 +128,7 @@ static inline void jump_label_rate_limit(struct jump_label_key_deferred *key,
 }
 #endif	/* HAVE_JUMP_LABEL */
 
+#define jump_label_key_enabled	((struct jump_label_key){ .enabled = ATOMIC_INIT(1), })
+#define jump_label_key_disabled	((struct jump_label_key){ .enabled = ATOMIC_INIT(0), })
+
 #endif	/* _LINUX_JUMP_LABEL_H */
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 3fb7b79c86fd..30c3c7708132 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -248,8 +248,13 @@ void jump_label_apply_nops(struct module *mod)
 	if (iter_start == iter_stop)
 		return;
 
-	for (iter = iter_start; iter < iter_stop; iter++)
-		arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		struct jump_label_key *iterk;
+
+		iterk = (struct jump_label_key *)(unsigned long)iter->key;
+		arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ?
+				JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
+	}
 }
 
 static int jump_label_add_module(struct module *mod)
@@ -289,8 +294,7 @@ static int jump_label_add_module(struct module *mod)
 		key->next = jlm;
 
 		if (jump_label_enabled(key))
-			__jump_label_update(key, iter, iter_stop,
-					    JUMP_LABEL_ENABLE);
+			__jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From fdaabd800bdd60652a448994eeb77442180db6c0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 6 Dec 2011 12:47:55 +0100
Subject: sched: Fix compile error for UP,!NOHZ

Commit 69e1e811 ("sched, nohz: Track nr_busy_cpus in the
sched_group_power") messed up the static inline function definition.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/n/tip-abjah8ctq5qrjjtdiabe8lph@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 295666cb5b86..64527c499624 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -277,7 +277,7 @@ extern void set_cpu_sd_state_idle(void);
 extern int get_nohz_timer_target(void);
 #else
 static inline void select_nohz_load_balancer(int stop_tick) { }
-static inline void set_cpu_sd_state_idle(void);
+static inline void set_cpu_sd_state_idle(void) { }
 #endif
 
 /*
-- 
cgit v1.2.3


From 54858ee5bf659f80a784303e41ee8898fd163f98 Mon Sep 17 00:00:00 2001
From: Alexander Simon <an.alexsimon@googlemail.com>
Date: Wed, 30 Nov 2011 16:56:32 +0100
Subject: nl80211: Parse channel type attribute in an ibss join request

Prepare cfg80211 for IBSS HT:
 * extend cfg80211 ibss struct with channel_type
 * Check if extension channel can be used
 * Export can_beacon_sec_chan for use in mac80211 (will be called
   from ibss.c later).

Signed-off-by: Alexander Simon <an.alexsimon@googlemail.com>
[siwu@hrz.tu-chemnitz.de: Updates]
* fix cfg80211_can_beacon_ext_chan comment
* remove implicit channel_type enum assumptions
* remove radar channel flags check
* add HT IBSS feature flag
* reword commit message

Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Mathias Kretschmer <mathias.kretschmer@fokus.fraunhofer.de>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  2 ++
 include/net/cfg80211.h  | 11 +++++++++++
 net/wireless/chan.c     | 12 +++++++-----
 net/wireless/nl80211.c  | 32 ++++++++++++++++++++++++++++++--
 4 files changed, 50 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f51e3bf93a96..a18760684fc9 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -2785,9 +2785,11 @@ enum nl80211_ap_sme_features {
  * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back
  *	TX status to the socket error queue when requested with the
  *	socket option.
+ * @NL80211_FEATURE_HT_IBSS: This driver supports IBSS with HT datarates.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS	= 1 << 0,
+	NL80211_FEATURE_HT_IBSS		= 1 << 1,
 };
 
 /**
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f0e82b2e4227..3de1c39d03e5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1149,6 +1149,7 @@ struct cfg80211_ibss_params {
 	u8 *ssid;
 	u8 *bssid;
 	struct ieee80211_channel *channel;
+	enum nl80211_channel_type channel_type;
 	u8 *ie;
 	u8 ssid_len, ie_len;
 	u16 beacon_interval;
@@ -3267,6 +3268,16 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 				 const u8 *frame, size_t len,
 				 int freq, gfp_t gfp);
 
+/*
+ * cfg80211_can_beacon_sec_chan - test if ht40 on extension channel can be used
+ * @wiphy: the wiphy
+ * @chan: main channel
+ * @channel_type: HT mode
+ */
+int cfg80211_can_beacon_sec_chan(struct wiphy *wiphy,
+				 struct ieee80211_channel *chan,
+				 enum nl80211_channel_type channel_type);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 17cd0c04d139..2fcfe0993ca2 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,6 +6,7 @@
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
  */
 
+#include <linux/export.h>
 #include <net/cfg80211.h>
 #include "core.h"
 
@@ -44,9 +45,9 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 	return chan;
 }
 
-static bool can_beacon_sec_chan(struct wiphy *wiphy,
-				struct ieee80211_channel *chan,
-				enum nl80211_channel_type channel_type)
+int cfg80211_can_beacon_sec_chan(struct wiphy *wiphy,
+				  struct ieee80211_channel *chan,
+				  enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_channel *sec_chan;
 	int diff;
@@ -75,6 +76,7 @@ static bool can_beacon_sec_chan(struct wiphy *wiphy,
 
 	return true;
 }
+EXPORT_SYMBOL(cfg80211_can_beacon_sec_chan);
 
 int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
 		      struct wireless_dev *wdev, int freq,
@@ -109,8 +111,8 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
 		switch (channel_type) {
 		case NL80211_CHAN_HT40PLUS:
 		case NL80211_CHAN_HT40MINUS:
-			if (!can_beacon_sec_chan(&rdev->wiphy, chan,
-						 channel_type)) {
+			if (!cfg80211_can_beacon_sec_chan(&rdev->wiphy, chan,
+							  channel_type)) {
 				printk(KERN_DEBUG
 				       "cfg80211: Secondary channel not "
 				       "allowed to initiate communication\n");
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8a9b4d817ae6..ba439664c2e0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4682,13 +4682,41 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 		ibss.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	ibss.channel = ieee80211_get_channel(wiphy,
-		nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+		enum nl80211_channel_type channel_type;
+
+		channel_type = nla_get_u32(
+				info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    channel_type != NL80211_CHAN_HT20 &&
+		    channel_type != NL80211_CHAN_HT40MINUS &&
+		    channel_type != NL80211_CHAN_HT40PLUS)
+			return -EINVAL;
+
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    !(wiphy->features & NL80211_FEATURE_HT_IBSS))
+			return -EINVAL;
+
+		ibss.channel_type = channel_type;
+	} else {
+		ibss.channel_type = NL80211_CHAN_NO_HT;
+	}
+
+	ibss.channel = rdev_freq_to_chan(rdev,
+		nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]),
+		ibss.channel_type);
 	if (!ibss.channel ||
 	    ibss.channel->flags & IEEE80211_CHAN_NO_IBSS ||
 	    ibss.channel->flags & IEEE80211_CHAN_DISABLED)
 		return -EINVAL;
 
+	/* Both channels should be able to initiate communication */
+	if ((ibss.channel_type == NL80211_CHAN_HT40PLUS ||
+	     ibss.channel_type == NL80211_CHAN_HT40MINUS) &&
+	    !cfg80211_can_beacon_sec_chan(&rdev->wiphy, ibss.channel,
+					  ibss.channel_type))
+		return -EINVAL;
+
 	ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED];
 	ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
 
-- 
cgit v1.2.3


From 3df6eaea76a9e1351b539541c0314129a0e4b10c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 6 Dec 2011 10:39:40 +0100
Subject: mac80211: accept public action frames with mismatched BSSID

Arik's patch "mac80211: allow action frames with unknown
BSSID in GO mode" allowed any action frames in P2P mode
to go through, but only to cooked monitor interfaces as
the IEEE80211_RX_RA_MATCH was still cleared. As a result
my no-monitor patches broke invitation responses.

Instead of allowing any action frames in P2P GO mode to
go through with a wrong BSSID like that patch did, allow
all public action frames. They will never be processed
by mac80211, but can be reported via nl80211 then.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 17 +++++++++++++++++
 net/mac80211/rx.c         | 13 ++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 66cedf6eb5c2..17f2a768e2ad 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1694,6 +1694,23 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
 	return false;
 }
 
+/**
+ * ieee80211_is_public_action - check if frame is a public action frame
+ * @hdr: the frame
+ * @len: length of the frame
+ */
+static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
+					      size_t len)
+{
+	struct ieee80211_mgmt *mgmt = (void *)hdr;
+
+	if (len < IEEE80211_MIN_ACTION_SIZE)
+		return false;
+	if (!ieee80211_is_action(hdr->frame_control))
+		return false;
+	return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC;
+}
+
 /**
  * ieee80211_fhss_chan_to_freq - get channel frequency
  * @channel: the FHSS channel
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2a85fdfebde2..7d226417ef46 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2797,10 +2797,17 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 				return 0;
 		} else if (!ieee80211_bssid_match(bssid,
 					sdata->vif.addr)) {
+			/*
+			 * Accept public action frames even when the
+			 * BSSID doesn't match, this is used for P2P
+			 * and location updates. Note that mac80211
+			 * itself never looks at these frames.
+			 */
+			if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) &&
+			    ieee80211_is_public_action(hdr, skb->len))
+				return 1;
 			if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) &&
-			    !ieee80211_is_beacon(hdr->frame_control) &&
-			    !(ieee80211_is_action(hdr->frame_control) &&
-			      sdata->vif.p2p))
+			    !ieee80211_is_beacon(hdr->frame_control))
 				return 0;
 			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		}
-- 
cgit v1.2.3


From d310310cbff18ec385c6ab4d58f33b100192a96a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 1 Dec 2011 22:44:39 +0100
Subject: Freezer / sunrpc / NFS: don't allow TASK_KILLABLE sleeps to block the
 freezer

Allow the freezer to skip wait_on_bit_killable sleeps in the sunrpc
layer. This should allow suspend and hibernate events to proceed, even
when there are RPC's pending on the wire.

Also, wrap the TASK_KILLABLE sleeps in NFS layer in freezer_do_not_count
and freezer_count calls. This allows the freezer to skip tasks that are
sleeping while looping on EJUKEBOX or NFS4ERR_DELAY sorts of errors.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 fs/nfs/inode.c          |  3 ++-
 fs/nfs/nfs3proc.c       |  3 ++-
 fs/nfs/nfs4proc.c       |  5 +++--
 fs/nfs/proc.c           |  3 ++-
 include/linux/freezer.h | 28 ++++++++++++++++++++++++++++
 net/sunrpc/sched.c      |  3 ++-
 6 files changed, 39 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a15fa8cf98..bf3a57bbbfcf 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -38,6 +38,7 @@
 #include <linux/nfs_xdr.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/freezer.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -77,7 +78,7 @@ int nfs_wait_bit_killable(void *word)
 {
 	if (fatal_signal_pending(current))
 		return -ERESTARTSYS;
-	schedule();
+	freezable_schedule();
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d4bc9ed91748..91943953a370 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -17,6 +17,7 @@
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfs_mount.h>
+#include <linux/freezer.h>
 
 #include "iostat.h"
 #include "internal.h"
@@ -32,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 		res = rpc_call_sync(clnt, msg, flags);
 		if (res != -EJUKEBOX && res != -EKEYEXPIRED)
 			break;
-		schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+		freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
 	} while (!fatal_signal_pending(current));
 	return res;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be2bbac13817..b28bb19b04f0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -53,6 +53,7 @@
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/xattr.h>
 #include <linux/utsname.h>
+#include <linux/freezer.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -241,7 +242,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 		*timeout = NFS4_POLL_RETRY_MIN;
 	if (*timeout > NFS4_POLL_RETRY_MAX)
 		*timeout = NFS4_POLL_RETRY_MAX;
-	schedule_timeout_killable(*timeout);
+	freezable_schedule_timeout_killable(*timeout);
 	if (fatal_signal_pending(current))
 		res = -ERESTARTSYS;
 	*timeout <<= 1;
@@ -3950,7 +3951,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
 static unsigned long
 nfs4_set_lock_task_retry(unsigned long timeout)
 {
-	schedule_timeout_killable(timeout);
+	freezable_schedule_timeout_killable(timeout);
 	timeout <<= 1;
 	if (timeout > NFS4_LOCK_MAXTIMEOUT)
 		return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f48125da198a..0c672588fe5a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -41,6 +41,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
+#include <linux/freezer.h>
 #include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
@@ -59,7 +60,7 @@ nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 		res = rpc_call_sync(clnt, msg, flags);
 		if (res != -EKEYEXPIRED)
 			break;
-		schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+		freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
 	} while (!fatal_signal_pending(current));
 	return res;
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index c1ee2833655e..30f06c220467 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -104,6 +104,29 @@ static inline int freezer_should_skip(struct task_struct *p)
 	return !!(p->flags & PF_FREEZER_SKIP);
 }
 
+/*
+ * These macros are intended to be used whenever you want allow a task that's
+ * sleeping in TASK_UNINTERRUPTIBLE or TASK_KILLABLE state to be frozen. Note
+ * that neither return any clear indication of whether a freeze event happened
+ * while in this function.
+ */
+
+/* Like schedule(), but should not block the freezer. */
+#define freezable_schedule()						\
+({									\
+	freezer_do_not_count();						\
+	schedule();							\
+	freezer_count();						\
+})
+
+/* Like schedule_timeout_killable(), but should not block the freezer. */
+#define freezable_schedule_timeout_killable(timeout)			\
+({									\
+	freezer_do_not_count();						\
+	schedule_timeout_killable(timeout);				\
+	freezer_count();						\
+})
+
 /*
  * Freezer-friendly wrappers around wait_event_interruptible(),
  * wait_event_killable() and wait_event_interruptible_timeout(), originally
@@ -163,6 +186,11 @@ static inline void freezer_count(void) {}
 static inline int freezer_should_skip(struct task_struct *p) { return 0; }
 static inline void set_freezable(void) {}
 
+#define freezable_schedule()  schedule()
+
+#define freezable_schedule_timeout_killable(timeout)			\
+	schedule_timeout_killable(timeout)
+
 #define wait_event_freezable(wq, condition)				\
 		wait_event_interruptible(wq, condition)
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d12ffa545811..5317b9341b53 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -18,6 +18,7 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/freezer.h>
 
 #include <linux/sunrpc/clnt.h>
 
@@ -231,7 +232,7 @@ static int rpc_wait_bit_killable(void *word)
 {
 	if (fatal_signal_pending(current))
 		return -ERESTARTSYS;
-	schedule();
+	freezable_schedule();
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7710ec36b6f516e026f9e29e50e67d2547c2a79b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 25 Nov 2011 18:44:05 -0500
Subject: svcrpc: make svc_delete_xprt static

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 -
 net/sunrpc/svc_xprt.c           | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 8620f79658d4..5488e593160a 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -118,7 +118,6 @@ void	svc_xprt_received(struct svc_xprt *);
 void	svc_xprt_put(struct svc_xprt *xprt);
 void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
 void	svc_close_xprt(struct svc_xprt *xprt);
-void	svc_delete_xprt(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 int	svc_print_xprts(char *buf, int maxlen);
 struct	svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 447cd0eb415c..8046c6d1f9c2 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -22,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static void svc_age_temp_xprts(unsigned long closure);
+static void svc_delete_xprt(struct svc_xprt *xprt);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -878,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt)
 /*
  * Remove a dead transport
  */
-void svc_delete_xprt(struct svc_xprt *xprt)
+static void svc_delete_xprt(struct svc_xprt *xprt)
 {
 	struct svc_serv	*serv = xprt->xpt_server;
 	struct svc_deferred_req *dr;
-- 
cgit v1.2.3


From 2fefb8a09e7ed251ae8996e0c69066e74c5aa560 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 29 Nov 2011 11:35:35 -0500
Subject: svcrpc: destroy server sockets all at once

There's no reason I can see that we need to call sv_shutdown between
closing the two lists of sockets.

Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h |  2 +-
 net/sunrpc/svc.c               |  7 +------
 net/sunrpc/svc_xprt.c          | 11 ++++++++++-
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 85c50b40759d..c84e9741cb2a 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -34,7 +34,7 @@ struct svc_sock {
 /*
  * Function prototypes.
  */
-void		svc_close_all(struct list_head *);
+void		svc_close_all(struct svc_serv *);
 int		svc_recv(struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6e038884ae0c..60babf0a9847 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -528,16 +528,11 @@ svc_destroy(struct svc_serv *serv)
 
 	del_timer_sync(&serv->sv_temptimer);
 
-	svc_close_all(&serv->sv_tempsocks);
+	svc_close_all(serv);
 
 	if (serv->sv_shutdown)
 		serv->sv_shutdown(serv);
 
-	svc_close_all(&serv->sv_permsocks);
-
-	BUG_ON(!list_empty(&serv->sv_permsocks));
-	BUG_ON(!list_empty(&serv->sv_tempsocks));
-
 	cache_clean_deferred(serv);
 
 	if (svc_serv_is_pooled(serv))
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8046c6d1f9c2..099ddf99d2a1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -929,7 +929,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(svc_close_xprt);
 
-void svc_close_all(struct list_head *xprt_list)
+static void svc_close_list(struct list_head *xprt_list)
 {
 	struct svc_xprt *xprt;
 	struct svc_xprt *tmp;
@@ -947,6 +947,15 @@ void svc_close_all(struct list_head *xprt_list)
 	}
 }
 
+void svc_close_all(struct svc_serv *serv)
+{
+	svc_close_list(&serv->sv_tempsocks);
+	svc_close_list(&serv->sv_permsocks);
+	BUG_ON(!list_empty(&serv->sv_permsocks));
+	BUG_ON(!list_empty(&serv->sv_tempsocks));
+
+}
+
 /*
  * Handle defer and revisit of requests
  */
-- 
cgit v1.2.3


From e84b2c202771bbd538866207efcb1f7dbab8045b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 6 Dec 2011 22:19:54 +0100
Subject: PM / Domains: Make it possible to assign names to generic PM domains

Add a name member pointer to struct generic_pm_domain and use it in
diagnostic messages regarding the domain power-off and power-on
latencies.  Update the ARM shmobile SH7372 code to assign names to
the PM domains used by it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Magnus Damm <damm@opensource.se>
---
 arch/arm/mach-shmobile/pm-sh7372.c | 16 ++++++++++++----
 drivers/base/power/domain.c        | 14 ++++++++++++--
 include/linux/pm_domain.h          |  1 +
 3 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c
index adf1765e69c6..8d9ea8924ed3 100644
--- a/arch/arm/mach-shmobile/pm-sh7372.c
+++ b/arch/arm/mach-shmobile/pm-sh7372.c
@@ -101,8 +101,8 @@ static int pd_power_down(struct generic_pm_domain *genpd)
 	}
 
 	if (!sh7372_pd->no_debug)
-		pr_debug("sh7372 power domain down 0x%08x -> PSTR = 0x%08x\n",
-			 mask, __raw_readl(PSTR));
+		pr_debug("%s: Power off, 0x%08x -> PSTR = 0x%08x\n",
+			 genpd->name, mask, __raw_readl(PSTR));
 
 	return 0;
 }
@@ -133,8 +133,8 @@ static int __pd_power_up(struct sh7372_pm_domain *sh7372_pd, bool do_resume)
 		ret = -EIO;
 
 	if (!sh7372_pd->no_debug)
-		pr_debug("sh7372 power domain up 0x%08x -> PSTR = 0x%08x\n",
-			 mask, __raw_readl(PSTR));
+		pr_debug("%s: Power on, 0x%08x -> PSTR = 0x%08x\n",
+			 sh7372_pd->genpd.name, mask, __raw_readl(PSTR));
 
  out:
 	if (ret == 0 && sh7372_pd->resume && do_resume)
@@ -233,18 +233,22 @@ void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd,
 }
 
 struct sh7372_pm_domain sh7372_a4lc = {
+	.genpd.name = "A4LC",
 	.bit_shift = 1,
 };
 
 struct sh7372_pm_domain sh7372_a4mp = {
+	.genpd.name = "A4MP",
 	.bit_shift = 2,
 };
 
 struct sh7372_pm_domain sh7372_d4 = {
+	.genpd.name = "D4",
 	.bit_shift = 3,
 };
 
 struct sh7372_pm_domain sh7372_a4r = {
+	.genpd.name = "A4R",
 	.bit_shift = 5,
 	.gov = &sh7372_always_on_gov,
 	.suspend = sh7372_a4r_suspend,
@@ -253,14 +257,17 @@ struct sh7372_pm_domain sh7372_a4r = {
 };
 
 struct sh7372_pm_domain sh7372_a3rv = {
+	.genpd.name = "A3RV",
 	.bit_shift = 6,
 };
 
 struct sh7372_pm_domain sh7372_a3ri = {
+	.genpd.name = "A3RI",
 	.bit_shift = 8,
 };
 
 struct sh7372_pm_domain sh7372_a3sp = {
+	.genpd.name = "A3SP",
 	.bit_shift = 11,
 	.gov = &sh7372_always_on_gov,
 	.no_debug = true,
@@ -275,6 +282,7 @@ static void sh7372_a3sp_init(void)
 }
 
 struct sh7372_pm_domain sh7372_a3sg = {
+	.genpd.name = "A3SG",
 	.bit_shift = 13,
 };
 
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 5a8d67d51f0e..ad6ba2e04677 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -209,8 +209,13 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd)
 			goto err;
 
 		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-		if (elapsed_ns > genpd->power_on_latency_ns)
+		if (elapsed_ns > genpd->power_on_latency_ns) {
 			genpd->power_on_latency_ns = elapsed_ns;
+			if (genpd->name)
+				pr_warning("%s: Power-on latency exceeded, "
+					"new value %lld ns\n", genpd->name,
+					elapsed_ns);
+		}
 	}
 
 	genpd_set_active(genpd);
@@ -428,8 +433,13 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 		}
 
 		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-		if (elapsed_ns > genpd->power_off_latency_ns)
+		if (elapsed_ns > genpd->power_off_latency_ns) {
 			genpd->power_off_latency_ns = elapsed_ns;
+			if (genpd->name)
+				pr_warning("%s: Power-off latency exceeded, "
+					"new value %lld ns\n", genpd->name,
+					elapsed_ns);
+		}
 	}
 
 	genpd->status = GPD_STATE_POWER_OFF;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index fbb81bc5065a..fb809b904891 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -50,6 +50,7 @@ struct generic_pm_domain {
 	struct mutex lock;
 	struct dev_power_governor *gov;
 	struct work_struct power_off_work;
+	char *name;
 	unsigned int in_progress;	/* Number of devices being suspended now */
 	atomic_t sd_count;	/* Number of subdomains with power "on" */
 	enum gpd_status status;	/* Current state of the domain */
-- 
cgit v1.2.3


From bd4620ddf6d6eb3d9e7d073ad601fa4299d46ba9 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Tue, 6 Dec 2011 14:19:10 +0300
Subject: SUNRPC: create svc_xprt in proper network namespace

This patch makes svc_xprt inherit network namespace link from its socket.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h          | 2 +-
 net/sunrpc/svc_xprt.c                    | 6 +++---
 net/sunrpc/svcsock.c                     | 8 +++++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 5488e593160a..dfa900948af7 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -109,7 +109,7 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
 void	svc_unreg_xprt_class(struct svc_xprt_class *);
-void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
+void	svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
 		      struct svc_serv *);
 int	svc_create_xprt(struct svc_serv *, const char *, struct net *,
 			const int, const unsigned short, int);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 0d80c064e634..0633c7e2fe63 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -148,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put);
  * Called by transport drivers to initialize the transport independent
  * portion of the transport instance.
  */
-void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
-		   struct svc_serv *serv)
+void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
+		   struct svc_xprt *xprt, struct svc_serv *serv)
 {
 	memset(xprt, 0, sizeof(*xprt));
 	xprt->xpt_class = xcl;
@@ -164,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
 	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
-	xprt->xpt_net = get_net(&init_net);
+	xprt->xpt_net = get_net(net);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 71bed1c1c77a..277909e651ed 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -739,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
 	int err, level, optname, one = 1;
 
-	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
+	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
+		      &svsk->sk_xprt, serv);
 	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
 	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
 	svsk->sk_sk->sk_write_space = svc_write_space;
@@ -1343,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
 	struct sock	*sk = svsk->sk_sk;
 
-	svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
+	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
+		      &svsk->sk_xprt, serv);
 	set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
 	if (sk->sk_state == TCP_LISTEN) {
 		dprintk("setting up TCP socket for listening\n");
@@ -1659,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
 		return ERR_PTR(-ENOMEM);
 
 	xprt = &svsk->sk_xprt;
-	svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+	svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
 
 	serv->sv_bc_xprt = xprt;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ba1296d88de0..894cb42db91d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -453,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 
 	if (!cma_xprt)
 		return NULL;
-	svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv);
+	svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
 	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
-- 
cgit v1.2.3


From 02125a826459a6ad142f8d91c5b6357562f96615 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 5 Dec 2011 08:43:34 -0500
Subject: fix apparmor dereferencing potentially freed dentry, sanitize
 __d_path() API

__d_path() API is asking for trouble and in case of apparmor d_namespace_path()
getting just that.  The root cause is that when __d_path() misses the root
it had been told to look for, it stores the location of the most remote ancestor
in *root.  Without grabbing references.  Sure, at the moment of call it had
been pinned down by what we have in *path.  And if we raced with umount -l, we
could have very well stopped at vfsmount/dentry that got freed as soon as
prepend_path() dropped vfsmount_lock.

It is safe to compare these pointers with pre-existing (and known to be still
alive) vfsmount and dentry, as long as all we are asking is "is it the same
address?".  Dereferencing is not safe and apparmor ended up stepping into
that.  d_namespace_path() really wants to examine the place where we stopped,
even if it's not connected to our namespace.  As the result, it looked
at ->d_sb->s_magic of a dentry that might've been already freed by that point.
All other callers had been careful enough to avoid that, but it's really
a bad interface - it invites that kind of trouble.

The fix is fairly straightforward, even though it's bigger than I'd like:
	* prepend_path() root argument becomes const.
	* __d_path() is never called with NULL/NULL root.  It was a kludge
to start with.  Instead, we have an explicit function - d_absolute_root().
Same as __d_path(), except that it doesn't get root passed and stops where
it stops.  apparmor and tomoyo are using it.
	* __d_path() returns NULL on path outside of root.  The main
caller is show_mountinfo() and that's precisely what we pass root for - to
skip those outside chroot jail.  Those who don't want that can (and do)
use d_path().
	* __d_path() root argument becomes const.  Everyone agrees, I hope.
	* apparmor does *NOT* try to use __d_path() or any of its variants
when it sees that path->mnt is an internal vfsmount.  In that case it's
definitely not mounted anywhere and dentry_path() is exactly what we want
there.  Handling of sysctl()-triggered weirdness is moved to that place.
	* if apparmor is asked to do pathname relative to chroot jail
and __d_path() tells it we it's not in that jail, the sucker just calls
d_absolute_path() instead.  That's the other remaining caller of __d_path(),
BTW.
        * seq_path_root() does _NOT_ return -ENAMETOOLONG (it's stupid anyway -
the normal seq_file logics will take care of growing the buffer and redoing
the call of ->show() just fine).  However, if it gets path not reachable
from root, it returns SEQ_SKIP.  The only caller adjusted (i.e. stopped
ignoring the return value as it used to do).

Reviewed-by: John Johansen <john.johansen@canonical.com>
ACKed-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
---
 fs/dcache.c                | 71 ++++++++++++++++++++++++++++------------------
 fs/namespace.c             | 20 +++++++------
 fs/seq_file.c              |  6 ++--
 include/linux/dcache.h     |  3 +-
 include/linux/fs.h         |  1 +
 security/apparmor/path.c   | 65 ++++++++++++++++++++++++------------------
 security/tomoyo/realpath.c |  3 +-
 7 files changed, 100 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 10ba92def3f6..89509b5a090e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2439,16 +2439,14 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
 /**
  * prepend_path - Prepend path string to a buffer
  * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry (may be modified by this function)
+ * @root: root vfsmnt/dentry
  * @buffer: pointer to the end of the buffer
  * @buflen: pointer to buffer length
  *
  * Caller holds the rename_lock.
- *
- * If path is not reachable from the supplied root, then the value of
- * root is changed (without modifying refcounts).
  */
-static int prepend_path(const struct path *path, struct path *root,
+static int prepend_path(const struct path *path,
+			const struct path *root,
 			char **buffer, int *buflen)
 {
 	struct dentry *dentry = path->dentry;
@@ -2483,10 +2481,10 @@ static int prepend_path(const struct path *path, struct path *root,
 		dentry = parent;
 	}
 
-out:
 	if (!error && !slash)
 		error = prepend(buffer, buflen, "/", 1);
 
+out:
 	br_read_unlock(vfsmount_lock);
 	return error;
 
@@ -2500,15 +2498,17 @@ global_root:
 		WARN(1, "Root dentry has weird name <%.*s>\n",
 		     (int) dentry->d_name.len, dentry->d_name.name);
 	}
-	root->mnt = vfsmnt;
-	root->dentry = dentry;
+	if (!slash)
+		error = prepend(buffer, buflen, "/", 1);
+	if (!error)
+		error = vfsmnt->mnt_ns ? 1 : 2;
 	goto out;
 }
 
 /**
  * __d_path - return the path of a dentry
  * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry (may be modified by this function)
+ * @root: root vfsmnt/dentry
  * @buf: buffer to return value in
  * @buflen: buffer length
  *
@@ -2519,10 +2519,10 @@ global_root:
  *
  * "buflen" should be positive.
  *
- * If path is not reachable from the supplied root, then the value of
- * root is changed (without modifying refcounts).
+ * If the path is not reachable from the supplied root, return %NULL.
  */
-char *__d_path(const struct path *path, struct path *root,
+char *__d_path(const struct path *path,
+	       const struct path *root,
 	       char *buf, int buflen)
 {
 	char *res = buf + buflen;
@@ -2533,7 +2533,28 @@ char *__d_path(const struct path *path, struct path *root,
 	error = prepend_path(path, root, &res, &buflen);
 	write_sequnlock(&rename_lock);
 
-	if (error)
+	if (error < 0)
+		return ERR_PTR(error);
+	if (error > 0)
+		return NULL;
+	return res;
+}
+
+char *d_absolute_path(const struct path *path,
+	       char *buf, int buflen)
+{
+	struct path root = {};
+	char *res = buf + buflen;
+	int error;
+
+	prepend(&res, &buflen, "\0", 1);
+	write_seqlock(&rename_lock);
+	error = prepend_path(path, &root, &res, &buflen);
+	write_sequnlock(&rename_lock);
+
+	if (error > 1)
+		error = -EINVAL;
+	if (error < 0)
 		return ERR_PTR(error);
 	return res;
 }
@@ -2541,8 +2562,9 @@ char *__d_path(const struct path *path, struct path *root,
 /*
  * same as __d_path but appends "(deleted)" for unlinked files.
  */
-static int path_with_deleted(const struct path *path, struct path *root,
-				 char **buf, int *buflen)
+static int path_with_deleted(const struct path *path,
+			     const struct path *root,
+			     char **buf, int *buflen)
 {
 	prepend(buf, buflen, "\0", 1);
 	if (d_unlinked(path->dentry)) {
@@ -2579,7 +2601,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
 {
 	char *res = buf + buflen;
 	struct path root;
-	struct path tmp;
 	int error;
 
 	/*
@@ -2594,9 +2615,8 @@ char *d_path(const struct path *path, char *buf, int buflen)
 
 	get_fs_root(current->fs, &root);
 	write_seqlock(&rename_lock);
-	tmp = root;
-	error = path_with_deleted(path, &tmp, &res, &buflen);
-	if (error)
+	error = path_with_deleted(path, &root, &res, &buflen);
+	if (error < 0)
 		res = ERR_PTR(error);
 	write_sequnlock(&rename_lock);
 	path_put(&root);
@@ -2617,7 +2637,6 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
 {
 	char *res = buf + buflen;
 	struct path root;
-	struct path tmp;
 	int error;
 
 	if (path->dentry->d_op && path->dentry->d_op->d_dname)
@@ -2625,9 +2644,8 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
 
 	get_fs_root(current->fs, &root);
 	write_seqlock(&rename_lock);
-	tmp = root;
-	error = path_with_deleted(path, &tmp, &res, &buflen);
-	if (!error && !path_equal(&tmp, &root))
+	error = path_with_deleted(path, &root, &res, &buflen);
+	if (error > 0)
 		error = prepend_unreachable(&res, &buflen);
 	write_sequnlock(&rename_lock);
 	path_put(&root);
@@ -2758,19 +2776,18 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 	write_seqlock(&rename_lock);
 	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
-		struct path tmp = root;
 		char *cwd = page + PAGE_SIZE;
 		int buflen = PAGE_SIZE;
 
 		prepend(&cwd, &buflen, "\0", 1);
-		error = prepend_path(&pwd, &tmp, &cwd, &buflen);
+		error = prepend_path(&pwd, &root, &cwd, &buflen);
 		write_sequnlock(&rename_lock);
 
-		if (error)
+		if (error < 0)
 			goto out;
 
 		/* Unreachable from current root */
-		if (!path_equal(&tmp, &root)) {
+		if (error > 0) {
 			error = prepend_unreachable(&cwd, &buflen);
 			if (error)
 				goto out;
diff --git a/fs/namespace.c b/fs/namespace.c
index 6d3a1963879b..cfc6d4448aa5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1048,15 +1048,12 @@ static int show_mountinfo(struct seq_file *m, void *v)
 	if (err)
 		goto out;
 	seq_putc(m, ' ');
-	seq_path_root(m, &mnt_path, &root, " \t\n\\");
-	if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
-		/*
-		 * Mountpoint is outside root, discard that one.  Ugly,
-		 * but less so than trying to do that in iterator in a
-		 * race-free way (due to renames).
-		 */
-		return SEQ_SKIP;
-	}
+
+	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+	err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
+	if (err)
+		goto out;
+
 	seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
 	show_mnt_opts(m, mnt);
 
@@ -2776,3 +2773,8 @@ void kern_unmount(struct vfsmount *mnt)
 	}
 }
 EXPORT_SYMBOL(kern_unmount);
+
+bool our_mnt(struct vfsmount *mnt)
+{
+	return check_mnt(mnt);
+}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 05d6b0e78c95..dba43c3ea3af 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -449,8 +449,6 @@ EXPORT_SYMBOL(seq_path);
 
 /*
  * Same as seq_path, but relative to supplied root.
- *
- * root may be changed, see __d_path().
  */
 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 		  char *esc)
@@ -463,6 +461,8 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 		char *p;
 
 		p = __d_path(path, root, buf, size);
+		if (!p)
+			return SEQ_SKIP;
 		res = PTR_ERR(p);
 		if (!IS_ERR(p)) {
 			char *end = mangle_path(buf, p, esc);
@@ -474,7 +474,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 	}
 	seq_commit(m, res);
 
-	return res < 0 ? res : 0;
+	return res < 0 && res != -ENAMETOOLONG ? res : 0;
 }
 
 /*
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4df926199369..ed9f74f6c519 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -339,7 +339,8 @@ extern int d_validate(struct dentry *, struct dentry *);
  */
 extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
 
-extern char *__d_path(const struct path *path, struct path *root, char *, int);
+extern char *__d_path(const struct path *, const struct path *, char *, int);
+extern char *d_absolute_path(const struct path *, char *, int);
 extern char *d_path(const struct path *, char *, int);
 extern char *d_path_with_unreachable(const struct path *, char *, int);
 extern char *dentry_path_raw(struct dentry *, char *, int);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e3130220ce3e..019dc558df1a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1942,6 +1942,7 @@ extern int fd_statfs(int, struct kstatfs *);
 extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
 extern int freeze_super(struct super_block *super);
 extern int thaw_super(struct super_block *super);
+extern bool our_mnt(struct vfsmount *mnt);
 
 extern int current_umask(void);
 
diff --git a/security/apparmor/path.c b/security/apparmor/path.c
index 36cc0cc39e78..b566eba4a65c 100644
--- a/security/apparmor/path.c
+++ b/security/apparmor/path.c
@@ -57,23 +57,44 @@ static int prepend(char **buffer, int buflen, const char *str, int namelen)
 static int d_namespace_path(struct path *path, char *buf, int buflen,
 			    char **name, int flags)
 {
-	struct path root, tmp;
 	char *res;
-	int connected, error = 0;
+	int error = 0;
+	int connected = 1;
+
+	if (path->mnt->mnt_flags & MNT_INTERNAL) {
+		/* it's not mounted anywhere */
+		res = dentry_path(path->dentry, buf, buflen);
+		*name = res;
+		if (IS_ERR(res)) {
+			*name = buf;
+			return PTR_ERR(res);
+		}
+		if (path->dentry->d_sb->s_magic == PROC_SUPER_MAGIC &&
+		    strncmp(*name, "/sys/", 5) == 0) {
+			/* TODO: convert over to using a per namespace
+			 * control instead of hard coded /proc
+			 */
+			return prepend(name, *name - buf, "/proc", 5);
+		}
+		return 0;
+	}
 
-	/* Get the root we want to resolve too, released below */
+	/* resolve paths relative to chroot?*/
 	if (flags & PATH_CHROOT_REL) {
-		/* resolve paths relative to chroot */
+		struct path root;
 		get_fs_root(current->fs, &root);
-	} else {
-		/* resolve paths relative to namespace */
-		root.mnt = current->nsproxy->mnt_ns->root;
-		root.dentry = root.mnt->mnt_root;
-		path_get(&root);
+		res = __d_path(path, &root, buf, buflen);
+		if (res && !IS_ERR(res)) {
+			/* everything's fine */
+			*name = res;
+			path_put(&root);
+			goto ok;
+		}
+		path_put(&root);
+		connected = 0;
 	}
 
-	tmp = root;
-	res = __d_path(path, &tmp, buf, buflen);
+	res = d_absolute_path(path, buf, buflen);
 
 	*name = res;
 	/* handle error conditions - and still allow a partial path to
@@ -84,7 +105,10 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
 		*name = buf;
 		goto out;
 	}
+	if (!our_mnt(path->mnt))
+		connected = 0;
 
+ok:
 	/* Handle two cases:
 	 * 1. A deleted dentry && profile is not allowing mediation of deleted
 	 * 2. On some filesystems, newly allocated dentries appear to the
@@ -97,10 +121,7 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
 			goto out;
 	}
 
-	/* Determine if the path is connected to the expected root */
-	connected = tmp.dentry == root.dentry && tmp.mnt == root.mnt;
-
-	/* If the path is not connected,
+	/* If the path is not connected to the expected root,
 	 * check if it is a sysctl and handle specially else remove any
 	 * leading / that __d_path may have returned.
 	 * Unless
@@ -112,17 +133,9 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
 	 *     namespace root.
 	 */
 	if (!connected) {
-		/* is the disconnect path a sysctl? */
-		if (tmp.dentry->d_sb->s_magic == PROC_SUPER_MAGIC &&
-		    strncmp(*name, "/sys/", 5) == 0) {
-			/* TODO: convert over to using a per namespace
-			 * control instead of hard coded /proc
-			 */
-			error = prepend(name, *name - buf, "/proc", 5);
-		} else if (!(flags & PATH_CONNECT_PATH) &&
+		if (!(flags & PATH_CONNECT_PATH) &&
 			   !(((flags & CHROOT_NSCONNECT) == CHROOT_NSCONNECT) &&
-			     (tmp.mnt == current->nsproxy->mnt_ns->root &&
-			      tmp.dentry == tmp.mnt->mnt_root))) {
+			     our_mnt(path->mnt))) {
 			/* disconnected path, don't return pathname starting
 			 * with '/'
 			 */
@@ -133,8 +146,6 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
 	}
 
 out:
-	path_put(&root);
-
 	return error;
 }
 
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 738bbdf8d4c7..36fa7c9bedc4 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -101,9 +101,8 @@ static char *tomoyo_get_absolute_path(struct path *path, char * const buffer,
 {
 	char *pos = ERR_PTR(-ENOMEM);
 	if (buflen >= 256) {
-		struct path ns_root = { };
 		/* go to whatever namespace root we are under */
-		pos = __d_path(path, &ns_root, buffer, buflen - 1);
+		pos = d_absolute_path(path, buffer, buflen - 1);
 		if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
 			struct inode *inode = path->dentry->d_inode;
 			if (inode && S_ISDIR(inode->i_mode)) {
-- 
cgit v1.2.3


From d57f341ba08c9f34ccd45a89729e73174d4a3325 Mon Sep 17 00:00:00 2001
From: Gabor Juhos <juhosg@openwrt.org>
Date: Mon, 20 Jun 2011 19:26:11 +0200
Subject: SERIAL: AR933X: Add driver for the built-in UART of the SoC

This patch adds the driver for the built-in UART of the
Atheros AR933X SoCs.

Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
Cc: linux-mips@linux-mips.org
Cc: Kathy Giori <kgiori@qca.qualcomm.com>
Cc: "Luis R.  Rodriguez" <rodrigue@qca.qualcomm.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: linux-serial@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/2526/
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 .../include/asm/mach-ath79/ar933x_uart_platform.h  |  18 +
 drivers/tty/serial/Kconfig                         |  23 +
 drivers/tty/serial/Makefile                        |   1 +
 drivers/tty/serial/ar933x_uart.c                   | 688 +++++++++++++++++++++
 include/linux/serial_core.h                        |   4 +
 5 files changed, 734 insertions(+)
 create mode 100644 arch/mips/include/asm/mach-ath79/ar933x_uart_platform.h
 create mode 100644 drivers/tty/serial/ar933x_uart.c

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/mach-ath79/ar933x_uart_platform.h b/arch/mips/include/asm/mach-ath79/ar933x_uart_platform.h
new file mode 100644
index 000000000000..6cb30f2b7198
--- /dev/null
+++ b/arch/mips/include/asm/mach-ath79/ar933x_uart_platform.h
@@ -0,0 +1,18 @@
+/*
+ *  Platform data definition for Atheros AR933X UART
+ *
+ *  Copyright (C) 2011 Gabor Juhos <juhosg@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#ifndef _AR933X_UART_PLATFORM_H
+#define _AR933X_UART_PLATFORM_H
+
+struct ar933x_uart_platform_data {
+	unsigned	uartclk;
+};
+
+#endif /* _AR933X_UART_PLATFORM_H */
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 925a1e547a83..95a0f5fe7d42 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1610,4 +1610,27 @@ config SERIAL_XILINX_PS_UART_CONSOLE
 	help
 	  Enable a Xilinx PS UART port to be the system console.
 
+config SERIAL_AR933X
+	bool "AR933X serial port support"
+	depends on SOC_AR933X
+	select SERIAL_CORE
+	help
+	  If you have an Atheros AR933X SOC based board and want to use the
+	  built-in UART of the SoC, say Y to this option.
+
+config SERIAL_AR933X_CONSOLE
+	bool "Console on AR933X serial port"
+	depends on SERIAL_AR933X=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  Enable a built-in UART port of the AR933X to be the system console.
+
+config SERIAL_AR933X_NR_UARTS
+	int "Maximum number of AR933X serial ports"
+	depends on SERIAL_AR933X
+	default "2"
+	help
+	  Set this to the number of serial ports you want the driver
+	  to support.
+
 endmenu
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index e10cf5b54b6d..76811cc58591 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -94,3 +94,4 @@ obj-$(CONFIG_SERIAL_MSM_SMD)	+= msm_smd_tty.o
 obj-$(CONFIG_SERIAL_MXS_AUART) += mxs-auart.o
 obj-$(CONFIG_SERIAL_LANTIQ)	+= lantiq.o
 obj-$(CONFIG_SERIAL_XILINX_PS_UART) += xilinx_uartps.o
+obj-$(CONFIG_SERIAL_AR933X)   += ar933x_uart.o
diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
new file mode 100644
index 000000000000..e4f60e2b87f3
--- /dev/null
+++ b/drivers/tty/serial/ar933x_uart.c
@@ -0,0 +1,688 @@
+/*
+ *  Atheros AR933X SoC built-in UART driver
+ *
+ *  Copyright (C) 2011 Gabor Juhos <juhosg@openwrt.org>
+ *
+ *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/sysrq.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+
+#include <asm/mach-ath79/ar933x_uart.h>
+#include <asm/mach-ath79/ar933x_uart_platform.h>
+
+#define DRIVER_NAME "ar933x-uart"
+
+#define AR933X_DUMMY_STATUS_RD	0x01
+
+static struct uart_driver ar933x_uart_driver;
+
+struct ar933x_uart_port {
+	struct uart_port	port;
+	unsigned int		ier;	/* shadow Interrupt Enable Register */
+};
+
+static inline unsigned int ar933x_uart_read(struct ar933x_uart_port *up,
+					    int offset)
+{
+	return readl(up->port.membase + offset);
+}
+
+static inline void ar933x_uart_write(struct ar933x_uart_port *up,
+				     int offset, unsigned int value)
+{
+	writel(value, up->port.membase + offset);
+}
+
+static inline void ar933x_uart_rmw(struct ar933x_uart_port *up,
+				  unsigned int offset,
+				  unsigned int mask,
+				  unsigned int val)
+{
+	unsigned int t;
+
+	t = ar933x_uart_read(up, offset);
+	t &= ~mask;
+	t |= val;
+	ar933x_uart_write(up, offset, t);
+}
+
+static inline void ar933x_uart_rmw_set(struct ar933x_uart_port *up,
+				       unsigned int offset,
+				       unsigned int val)
+{
+	ar933x_uart_rmw(up, offset, 0, val);
+}
+
+static inline void ar933x_uart_rmw_clear(struct ar933x_uart_port *up,
+					 unsigned int offset,
+					 unsigned int val)
+{
+	ar933x_uart_rmw(up, offset, val, 0);
+}
+
+static inline void ar933x_uart_start_tx_interrupt(struct ar933x_uart_port *up)
+{
+	up->ier |= AR933X_UART_INT_TX_EMPTY;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+}
+
+static inline void ar933x_uart_stop_tx_interrupt(struct ar933x_uart_port *up)
+{
+	up->ier &= ~AR933X_UART_INT_TX_EMPTY;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+}
+
+static inline void ar933x_uart_putc(struct ar933x_uart_port *up, int ch)
+{
+	unsigned int rdata;
+
+	rdata = ch & AR933X_UART_DATA_TX_RX_MASK;
+	rdata |= AR933X_UART_DATA_TX_CSR;
+	ar933x_uart_write(up, AR933X_UART_DATA_REG, rdata);
+}
+
+static unsigned int ar933x_uart_tx_empty(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+	unsigned long flags;
+	unsigned int rdata;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+	rdata = ar933x_uart_read(up, AR933X_UART_DATA_REG);
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	return (rdata & AR933X_UART_DATA_TX_CSR) ? 0 : TIOCSER_TEMT;
+}
+
+static unsigned int ar933x_uart_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CAR;
+}
+
+static void ar933x_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+}
+
+static void ar933x_uart_start_tx(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	ar933x_uart_start_tx_interrupt(up);
+}
+
+static void ar933x_uart_stop_tx(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	ar933x_uart_stop_tx_interrupt(up);
+}
+
+static void ar933x_uart_stop_rx(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	up->ier &= ~AR933X_UART_INT_RX_VALID;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+}
+
+static void ar933x_uart_break_ctl(struct uart_port *port, int break_state)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+	unsigned long flags;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+	if (break_state == -1)
+		ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
+				    AR933X_UART_CS_TX_BREAK);
+	else
+		ar933x_uart_rmw_clear(up, AR933X_UART_CS_REG,
+				      AR933X_UART_CS_TX_BREAK);
+	spin_unlock_irqrestore(&up->port.lock, flags);
+}
+
+static void ar933x_uart_enable_ms(struct uart_port *port)
+{
+}
+
+static void ar933x_uart_set_termios(struct uart_port *port,
+				    struct ktermios *new,
+				    struct ktermios *old)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+	unsigned int cs;
+	unsigned long flags;
+	unsigned int baud, scale;
+
+	/* Only CS8 is supported */
+	new->c_cflag &= ~CSIZE;
+	new->c_cflag |= CS8;
+
+	/* Only one stop bit is supported */
+	new->c_cflag &= ~CSTOPB;
+
+	cs = 0;
+	if (new->c_cflag & PARENB) {
+		if (!(new->c_cflag & PARODD))
+			cs |= AR933X_UART_CS_PARITY_EVEN;
+		else
+			cs |= AR933X_UART_CS_PARITY_ODD;
+	} else {
+		cs |= AR933X_UART_CS_PARITY_NONE;
+	}
+
+	/* Mark/space parity is not supported */
+	new->c_cflag &= ~CMSPAR;
+
+	baud = uart_get_baud_rate(port, new, old, 0, port->uartclk / 16);
+	scale = (port->uartclk / (16 * baud)) - 1;
+
+	/*
+	 * Ok, we're now changing the port state. Do it with
+	 * interrupts disabled.
+	 */
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	/* Update the per-port timeout. */
+	uart_update_timeout(port, new->c_cflag, baud);
+
+	up->port.ignore_status_mask = 0;
+
+	/* ignore all characters if CREAD is not set */
+	if ((new->c_cflag & CREAD) == 0)
+		up->port.ignore_status_mask |= AR933X_DUMMY_STATUS_RD;
+
+	ar933x_uart_write(up, AR933X_UART_CLOCK_REG,
+			  scale << AR933X_UART_CLOCK_SCALE_S | 8192);
+
+	/* setup configuration register */
+	ar933x_uart_rmw(up, AR933X_UART_CS_REG, AR933X_UART_CS_PARITY_M, cs);
+
+	/* enable host interrupt */
+	ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
+			    AR933X_UART_CS_HOST_INT_EN);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	if (tty_termios_baud_rate(new))
+		tty_termios_encode_baud_rate(new, baud, baud);
+}
+
+static void ar933x_uart_rx_chars(struct ar933x_uart_port *up)
+{
+	struct tty_struct *tty;
+	int max_count = 256;
+
+	tty = tty_port_tty_get(&up->port.state->port);
+	do {
+		unsigned int rdata;
+		unsigned char ch;
+
+		rdata = ar933x_uart_read(up, AR933X_UART_DATA_REG);
+		if ((rdata & AR933X_UART_DATA_RX_CSR) == 0)
+			break;
+
+		/* remove the character from the FIFO */
+		ar933x_uart_write(up, AR933X_UART_DATA_REG,
+				  AR933X_UART_DATA_RX_CSR);
+
+		if (!tty) {
+			/* discard the data if no tty available */
+			continue;
+		}
+
+		up->port.icount.rx++;
+		ch = rdata & AR933X_UART_DATA_TX_RX_MASK;
+
+		if (uart_handle_sysrq_char(&up->port, ch))
+			continue;
+
+		if ((up->port.ignore_status_mask & AR933X_DUMMY_STATUS_RD) == 0)
+			tty_insert_flip_char(tty, ch, TTY_NORMAL);
+	} while (max_count-- > 0);
+
+	if (tty) {
+		tty_flip_buffer_push(tty);
+		tty_kref_put(tty);
+	}
+}
+
+static void ar933x_uart_tx_chars(struct ar933x_uart_port *up)
+{
+	struct circ_buf *xmit = &up->port.state->xmit;
+	int count;
+
+	if (uart_tx_stopped(&up->port))
+		return;
+
+	count = up->port.fifosize;
+	do {
+		unsigned int rdata;
+
+		rdata = ar933x_uart_read(up, AR933X_UART_DATA_REG);
+		if ((rdata & AR933X_UART_DATA_TX_CSR) == 0)
+			break;
+
+		if (up->port.x_char) {
+			ar933x_uart_putc(up, up->port.x_char);
+			up->port.icount.tx++;
+			up->port.x_char = 0;
+			continue;
+		}
+
+		if (uart_circ_empty(xmit))
+			break;
+
+		ar933x_uart_putc(up, xmit->buf[xmit->tail]);
+
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		up->port.icount.tx++;
+	} while (--count > 0);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&up->port);
+
+	if (!uart_circ_empty(xmit))
+		ar933x_uart_start_tx_interrupt(up);
+}
+
+static irqreturn_t ar933x_uart_interrupt(int irq, void *dev_id)
+{
+	struct ar933x_uart_port *up = dev_id;
+	unsigned int status;
+
+	status = ar933x_uart_read(up, AR933X_UART_CS_REG);
+	if ((status & AR933X_UART_CS_HOST_INT) == 0)
+		return IRQ_NONE;
+
+	spin_lock(&up->port.lock);
+
+	status = ar933x_uart_read(up, AR933X_UART_INT_REG);
+	status &= ar933x_uart_read(up, AR933X_UART_INT_EN_REG);
+
+	if (status & AR933X_UART_INT_RX_VALID) {
+		ar933x_uart_write(up, AR933X_UART_INT_REG,
+				  AR933X_UART_INT_RX_VALID);
+		ar933x_uart_rx_chars(up);
+	}
+
+	if (status & AR933X_UART_INT_TX_EMPTY) {
+		ar933x_uart_write(up, AR933X_UART_INT_REG,
+				  AR933X_UART_INT_TX_EMPTY);
+		ar933x_uart_stop_tx_interrupt(up);
+		ar933x_uart_tx_chars(up);
+	}
+
+	spin_unlock(&up->port.lock);
+
+	return IRQ_HANDLED;
+}
+
+static int ar933x_uart_startup(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+	unsigned long flags;
+	int ret;
+
+	ret = request_irq(up->port.irq, ar933x_uart_interrupt,
+			  up->port.irqflags, dev_name(up->port.dev), up);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	/* Enable HOST interrupts */
+	ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
+			    AR933X_UART_CS_HOST_INT_EN);
+
+	/* Enable RX interrupts */
+	up->ier = AR933X_UART_INT_RX_VALID;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	return 0;
+}
+
+static void ar933x_uart_shutdown(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	/* Disable all interrupts */
+	up->ier = 0;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+
+	/* Disable break condition */
+	ar933x_uart_rmw_clear(up, AR933X_UART_CS_REG,
+			      AR933X_UART_CS_TX_BREAK);
+
+	free_irq(up->port.irq, up);
+}
+
+static const char *ar933x_uart_type(struct uart_port *port)
+{
+	return (port->type == PORT_AR933X) ? "AR933X UART" : NULL;
+}
+
+static void ar933x_uart_release_port(struct uart_port *port)
+{
+	/* Nothing to release ... */
+}
+
+static int ar933x_uart_request_port(struct uart_port *port)
+{
+	/* UARTs always present */
+	return 0;
+}
+
+static void ar933x_uart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE)
+		port->type = PORT_AR933X;
+}
+
+static int ar933x_uart_verify_port(struct uart_port *port,
+				   struct serial_struct *ser)
+{
+	if (ser->type != PORT_UNKNOWN &&
+	    ser->type != PORT_AR933X)
+		return -EINVAL;
+
+	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+		return -EINVAL;
+
+	if (ser->baud_base < 28800)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct uart_ops ar933x_uart_ops = {
+	.tx_empty	= ar933x_uart_tx_empty,
+	.set_mctrl	= ar933x_uart_set_mctrl,
+	.get_mctrl	= ar933x_uart_get_mctrl,
+	.stop_tx	= ar933x_uart_stop_tx,
+	.start_tx	= ar933x_uart_start_tx,
+	.stop_rx	= ar933x_uart_stop_rx,
+	.enable_ms	= ar933x_uart_enable_ms,
+	.break_ctl	= ar933x_uart_break_ctl,
+	.startup	= ar933x_uart_startup,
+	.shutdown	= ar933x_uart_shutdown,
+	.set_termios	= ar933x_uart_set_termios,
+	.type		= ar933x_uart_type,
+	.release_port	= ar933x_uart_release_port,
+	.request_port	= ar933x_uart_request_port,
+	.config_port	= ar933x_uart_config_port,
+	.verify_port	= ar933x_uart_verify_port,
+};
+
+#ifdef CONFIG_SERIAL_AR933X_CONSOLE
+
+static struct ar933x_uart_port *
+ar933x_console_ports[CONFIG_SERIAL_AR933X_NR_UARTS];
+
+static void ar933x_uart_wait_xmitr(struct ar933x_uart_port *up)
+{
+	unsigned int status;
+	unsigned int timeout = 60000;
+
+	/* Wait up to 60ms for the character(s) to be sent. */
+	do {
+		status = ar933x_uart_read(up, AR933X_UART_DATA_REG);
+		if (--timeout == 0)
+			break;
+		udelay(1);
+	} while ((status & AR933X_UART_DATA_TX_CSR) == 0);
+}
+
+static void ar933x_uart_console_putchar(struct uart_port *port, int ch)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	ar933x_uart_wait_xmitr(up);
+	ar933x_uart_putc(up, ch);
+}
+
+static void ar933x_uart_console_write(struct console *co, const char *s,
+				      unsigned int count)
+{
+	struct ar933x_uart_port *up = ar933x_console_ports[co->index];
+	unsigned long flags;
+	unsigned int int_en;
+	int locked = 1;
+
+	local_irq_save(flags);
+
+	if (up->port.sysrq)
+		locked = 0;
+	else if (oops_in_progress)
+		locked = spin_trylock(&up->port.lock);
+	else
+		spin_lock(&up->port.lock);
+
+	/*
+	 * First save the IER then disable the interrupts
+	 */
+	int_en = ar933x_uart_read(up, AR933X_UART_INT_EN_REG);
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, 0);
+
+	uart_console_write(&up->port, s, count, ar933x_uart_console_putchar);
+
+	/*
+	 * Finally, wait for transmitter to become empty
+	 * and restore the IER
+	 */
+	ar933x_uart_wait_xmitr(up);
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, int_en);
+
+	ar933x_uart_write(up, AR933X_UART_INT_REG, AR933X_UART_INT_ALLINTS);
+
+	if (locked)
+		spin_unlock(&up->port.lock);
+
+	local_irq_restore(flags);
+}
+
+static int ar933x_uart_console_setup(struct console *co, char *options)
+{
+	struct ar933x_uart_port *up;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index < 0 || co->index >= CONFIG_SERIAL_AR933X_NR_UARTS)
+		return -EINVAL;
+
+	up = ar933x_console_ports[co->index];
+	if (!up)
+		return -ENODEV;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(&up->port, co, baud, parity, bits, flow);
+}
+
+static struct console ar933x_uart_console = {
+	.name		= "ttyATH",
+	.write		= ar933x_uart_console_write,
+	.device		= uart_console_device,
+	.setup		= ar933x_uart_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &ar933x_uart_driver,
+};
+
+static void ar933x_uart_add_console_port(struct ar933x_uart_port *up)
+{
+	ar933x_console_ports[up->port.line] = up;
+}
+
+#define AR933X_SERIAL_CONSOLE	(&ar933x_uart_console)
+
+#else
+
+static inline void ar933x_uart_add_console_port(struct ar933x_uart_port *up) {}
+
+#define AR933X_SERIAL_CONSOLE	NULL
+
+#endif /* CONFIG_SERIAL_AR933X_CONSOLE */
+
+static struct uart_driver ar933x_uart_driver = {
+	.owner		= THIS_MODULE,
+	.driver_name	= DRIVER_NAME,
+	.dev_name	= "ttyATH",
+	.nr		= CONFIG_SERIAL_AR933X_NR_UARTS,
+	.cons		= AR933X_SERIAL_CONSOLE,
+};
+
+static int __devinit ar933x_uart_probe(struct platform_device *pdev)
+{
+	struct ar933x_uart_platform_data *pdata;
+	struct ar933x_uart_port *up;
+	struct uart_port *port;
+	struct resource *mem_res;
+	struct resource *irq_res;
+	int id;
+	int ret;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata)
+		return -EINVAL;
+
+	id = pdev->id;
+	if (id == -1)
+		id = 0;
+
+	if (id > CONFIG_SERIAL_AR933X_NR_UARTS)
+		return -EINVAL;
+
+	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem_res) {
+		dev_err(&pdev->dev, "no MEM resource\n");
+		return -EINVAL;
+	}
+
+	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq_res) {
+		dev_err(&pdev->dev, "no IRQ resource\n");
+		return -EINVAL;
+	}
+
+	up = kzalloc(sizeof(struct ar933x_uart_port), GFP_KERNEL);
+	if (!up)
+		return -ENOMEM;
+
+	port = &up->port;
+	port->mapbase = mem_res->start;
+
+	port->membase = ioremap(mem_res->start, AR933X_UART_REGS_SIZE);
+	if (!port->membase) {
+		ret = -ENOMEM;
+		goto err_free_up;
+	}
+
+	port->line = id;
+	port->irq = irq_res->start;
+	port->dev = &pdev->dev;
+	port->type = PORT_AR933X;
+	port->iotype = UPIO_MEM32;
+	port->uartclk = pdata->uartclk;
+
+	port->regshift = 2;
+	port->fifosize = AR933X_UART_FIFO_SIZE;
+	port->ops = &ar933x_uart_ops;
+
+	ar933x_uart_add_console_port(up);
+
+	ret = uart_add_one_port(&ar933x_uart_driver, &up->port);
+	if (ret)
+		goto err_unmap;
+
+	platform_set_drvdata(pdev, up);
+	return 0;
+
+err_unmap:
+	iounmap(up->port.membase);
+err_free_up:
+	kfree(up);
+	return ret;
+}
+
+static int __devexit ar933x_uart_remove(struct platform_device *pdev)
+{
+	struct ar933x_uart_port *up;
+
+	up = platform_get_drvdata(pdev);
+	platform_set_drvdata(pdev, NULL);
+
+	if (up) {
+		uart_remove_one_port(&ar933x_uart_driver, &up->port);
+		iounmap(up->port.membase);
+		kfree(up);
+	}
+
+	return 0;
+}
+
+static struct platform_driver ar933x_uart_platform_driver = {
+	.probe		= ar933x_uart_probe,
+	.remove		= __devexit_p(ar933x_uart_remove),
+	.driver		= {
+		.name		= DRIVER_NAME,
+		.owner		= THIS_MODULE,
+	},
+};
+
+static int __init ar933x_uart_init(void)
+{
+	int ret;
+
+	ar933x_uart_driver.nr = CONFIG_SERIAL_AR933X_NR_UARTS;
+	ret = uart_register_driver(&ar933x_uart_driver);
+	if (ret)
+		goto err_out;
+
+	ret = platform_driver_register(&ar933x_uart_platform_driver);
+	if (ret)
+		goto err_unregister_uart_driver;
+
+	return 0;
+
+err_unregister_uart_driver:
+	uart_unregister_driver(&ar933x_uart_driver);
+err_out:
+	return ret;
+}
+
+static void __exit ar933x_uart_exit(void)
+{
+	platform_driver_unregister(&ar933x_uart_platform_driver);
+	uart_unregister_driver(&ar933x_uart_driver);
+}
+
+module_init(ar933x_uart_init);
+module_exit(ar933x_uart_exit);
+
+MODULE_DESCRIPTION("Atheros AR933X UART driver");
+MODULE_AUTHOR("Gabor Juhos <juhosg@openwrt.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index eadf33d0abba..3c35fb2f688f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -207,6 +207,10 @@
 /* Xilinx PSS UART */
 #define PORT_XUARTPS	98
 
+/* Atheros AR933X SoC */
+#define PORT_AR933X	99
+
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From e179816ce60033ce560b28e01bc555ed5116cbe9 Mon Sep 17 00:00:00 2001
From: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Date: Wed, 30 Nov 2011 02:46:55 +0000
Subject: powerpc/cpuidle: Enable cpuidle and directly call cpuidle_idle_call()
 for pSeries

This patch enables cpuidle for pSeries and pSeries_idle is
directly called from the idle loop. As a result of pSeries_idle, cpuidle
driver registered with cpuidle subsystem comes into action. On
failure of loading of the driver or cpuidle framework default idle
is executed as part of the function. This patch
also removes the routines pseries_shared_idle_sleep and
pseries_dedicated_idle_sleep as they are now implemented as part of
pseries_idle cpuidle driver.

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Signed-off-by: Trinabh Gupta <g.trinabh@gmail.com>
Signed-off-by: Arun R Bharadwaj <arun.r.bharadwaj@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/Kconfig         |   6 ++
 arch/powerpc/platforms/pseries/setup.c | 101 ++++++---------------------------
 include/linux/cpuidle.h                |   2 -
 3 files changed, 23 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 3fe6d927ad70..31e1adeaa92a 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -211,6 +211,12 @@ config PPC_PASEMI_CPUFREQ
 
 endmenu
 
+menu "CPUIdle driver"
+
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
 config PPC601_SYNC_FIX
 	bool "Workarounds for PPC601 bugs"
 	depends on 6xx && (PPC_PREP || PPC_PMAC)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f2446da7f2d5..164839cb9fcd 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -39,6 +39,7 @@
 #include <linux/irq.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
+#include <linux/cpuidle.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -74,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
 static struct device_node *pSeries_mpic_node;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
@@ -351,6 +349,21 @@ static int alloc_dispatch_log_kmem_cache(void)
 }
 early_initcall(alloc_dispatch_log_kmem_cache);
 
+static void pSeries_idle(void)
+{
+	/* This would call on the cpuidle framework, and the back-end pseries
+	 * driver to  go to idle states
+	 */
+	if (cpuidle_idle_call()) {
+		/* On error, execute default handler
+		 * to go into low thread priority and possibly
+		 * low power mode.
+		 */
+		HMT_low();
+		HMT_very_low();
+	}
+}
+
 static void __init pSeries_setup_arch(void)
 {
 	/* Discover PIC type and setup ppc_md accordingly */
@@ -373,18 +386,9 @@ static void __init pSeries_setup_arch(void)
 
 	pSeries_nvram_init();
 
-	/* Choose an idle loop */
 	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		vpa_init(boot_cpuid);
-		if (get_lppaca()->shared_proc) {
-			printk(KERN_DEBUG "Using shared processor idle loop\n");
-			ppc_md.power_save = pseries_shared_idle_sleep;
-		} else {
-			printk(KERN_DEBUG "Using dedicated idle loop\n");
-			ppc_md.power_save = pseries_dedicated_idle_sleep;
-		}
-	} else {
-		printk(KERN_DEBUG "Using default idle loop\n");
+		ppc_md.power_save = pSeries_idle;
 	}
 
 	if (firmware_has_feature(FW_FEATURE_LPAR))
@@ -585,77 +589,6 @@ static int __init pSeries_probe(void)
 	return 1;
 }
 
-static void pseries_dedicated_idle_sleep(void)
-{ 
-	unsigned int cpu = smp_processor_id();
-	unsigned long start_snooze;
-	unsigned long in_purr, out_purr;
-	long snooze = __get_cpu_var(smt_snooze_delay);
-
-	/*
-	 * Indicate to the HV that we are idle. Now would be
-	 * a good time to find other work to dispatch.
-	 */
-	get_lppaca()->idle = 1;
-	get_lppaca()->donate_dedicated_cpu = 1;
-	in_purr = mfspr(SPRN_PURR);
-
-	/*
-	 * We come in with interrupts disabled, and need_resched()
-	 * has been checked recently.  If we should poll for a little
-	 * while, do so.
-	 */
-	if (snooze) {
-		start_snooze = get_tb() + snooze * tb_ticks_per_usec;
-		local_irq_enable();
-		set_thread_flag(TIF_POLLING_NRFLAG);
-
-		while ((snooze < 0) || (get_tb() < start_snooze)) {
-			if (need_resched() || cpu_is_offline(cpu))
-				goto out;
-			ppc64_runlatch_off();
-			HMT_low();
-			HMT_very_low();
-		}
-
-		HMT_medium();
-		clear_thread_flag(TIF_POLLING_NRFLAG);
-		smp_mb();
-		local_irq_disable();
-		if (need_resched() || cpu_is_offline(cpu))
-			goto out;
-	}
-
-	cede_processor();
-
-out:
-	HMT_medium();
-	out_purr = mfspr(SPRN_PURR);
-	get_lppaca()->wait_state_cycles += out_purr - in_purr;
-	get_lppaca()->donate_dedicated_cpu = 0;
-	get_lppaca()->idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
-	/*
-	 * Indicate to the HV that we are idle. Now would be
-	 * a good time to find other work to dispatch.
-	 */
-	get_lppaca()->idle = 1;
-
-	/*
-	 * Yield the processor to the hypervisor.  We return if
-	 * an external interrupt occurs (which are driven prior
-	 * to returning here) or if a prod occurs from another
-	 * processor. When returning here, external interrupts
-	 * are enabled.
-	 */
-	cede_processor();
-
-	get_lppaca()->idle = 0;
-}
-
 static int pSeries_pci_probe_mode(struct pci_bus *bus)
 {
 	if (firmware_has_feature(FW_FEATURE_LPAR))
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 7408af843b8a..23f81de51829 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -130,7 +130,6 @@ struct cpuidle_driver {
 #ifdef CONFIG_CPU_IDLE
 extern void disable_cpuidle(void);
 extern int cpuidle_idle_call(void);
-
 extern int cpuidle_register_driver(struct cpuidle_driver *drv);
 struct cpuidle_driver *cpuidle_get_driver(void);
 extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
@@ -145,7 +144,6 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev);
 #else
 static inline void disable_cpuidle(void) { }
 static inline int cpuidle_idle_call(void) { return -ENODEV; }
-
 static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
 {return -ENODEV; }
 static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }
-- 
cgit v1.2.3


From 581adcbe121872429de76ff9884762de71a76200 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:06 -0800
Subject: memblock: Make memblock_{add|remove|free|reserve}() return int and
 update prototypes

memblock_{add|remove|free|reserve}() return either 0 or -errno but had
long as return type.  Chage it to int.  Also, drop 'extern' from all
prototypes in memblock.h - they are unnecessary and used
inconsistently (especially if mm.h is included in the picture).

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/memblock.h | 64 ++++++++++++++++++++++--------------------------
 mm/memblock.c            | 18 +++++++-------
 2 files changed, 38 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index ab89b417655c..2f8e28f859b3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -52,15 +52,15 @@ phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 int memblock_free_reserved_regions(void);
 int memblock_reserve_reserved_regions(void);
 
-extern void memblock_init(void);
-extern void memblock_analyze(void);
-extern long memblock_add(phys_addr_t base, phys_addr_t size);
-extern long memblock_remove(phys_addr_t base, phys_addr_t size);
-extern long memblock_free(phys_addr_t base, phys_addr_t size);
-extern long memblock_reserve(phys_addr_t base, phys_addr_t size);
+void memblock_init(void);
+void memblock_analyze(void);
+int memblock_add(phys_addr_t base, phys_addr_t size);
+int memblock_remove(phys_addr_t base, phys_addr_t size);
+int memblock_free(phys_addr_t base, phys_addr_t size);
+int memblock_reserve(phys_addr_t base, phys_addr_t size);
 
-extern void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
-				  phys_addr_t *out_end, int *out_nid);
+void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
+			   phys_addr_t *out_end, int *out_nid);
 
 /**
  * for_each_free_mem_range - iterate through free memblock areas
@@ -80,7 +80,7 @@ extern void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
 	     __next_free_mem_range(&i, nid, p_start, p_end, p_nid))
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-extern int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
+int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
 
 static inline void memblock_set_region_node(struct memblock_region *r, int nid)
 {
@@ -105,37 +105,31 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 /* The numa aware allocator is only available if
  * CONFIG_ARCH_POPULATES_NODE_MAP is set
  */
-extern phys_addr_t memblock_find_in_range_node(phys_addr_t start,
-					       phys_addr_t end,
-					       phys_addr_t size,
-					       phys_addr_t align, int nid);
-extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align,
-					int nid);
-extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
-					    int nid);
+phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
+					phys_addr_t size, phys_addr_t align, int nid);
+phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
+phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
-extern phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
+phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
 
 /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
 #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
 #define MEMBLOCK_ALLOC_ACCESSIBLE	0
 
-extern phys_addr_t memblock_alloc_base(phys_addr_t size,
-					 phys_addr_t align,
-					 phys_addr_t max_addr);
-extern phys_addr_t __memblock_alloc_base(phys_addr_t size,
-					   phys_addr_t align,
-					   phys_addr_t max_addr);
-extern phys_addr_t memblock_phys_mem_size(void);
-extern phys_addr_t memblock_start_of_DRAM(void);
-extern phys_addr_t memblock_end_of_DRAM(void);
-extern void memblock_enforce_memory_limit(phys_addr_t memory_limit);
-extern int memblock_is_memory(phys_addr_t addr);
-extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
-extern int memblock_is_reserved(phys_addr_t addr);
-extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
-
-extern void memblock_dump_all(void);
+phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
+				phys_addr_t max_addr);
+phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
+				  phys_addr_t max_addr);
+phys_addr_t memblock_phys_mem_size(void);
+phys_addr_t memblock_start_of_DRAM(void);
+phys_addr_t memblock_end_of_DRAM(void);
+void memblock_enforce_memory_limit(phys_addr_t memory_limit);
+int memblock_is_memory(phys_addr_t addr);
+int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
+int memblock_is_reserved(phys_addr_t addr);
+int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
+
+void memblock_dump_all(void);
 
 /**
  * memblock_set_current_limit - Set the current allocation limit to allow
@@ -143,7 +137,7 @@ extern void memblock_dump_all(void);
  *                         accessible during boot
  * @limit: New limit value (physical address)
  */
-extern void memblock_set_current_limit(phys_addr_t limit);
+void memblock_set_current_limit(phys_addr_t limit);
 
 
 /*
diff --git a/mm/memblock.c b/mm/memblock.c
index a57092f63a86..948036718245 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -176,7 +176,7 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
 }
 
 /* Defined below but needed now */
-static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size);
+static int memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size);
 
 static int __init_memblock memblock_double_array(struct memblock_type *type)
 {
@@ -316,8 +316,8 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-static long __init_memblock memblock_add_region(struct memblock_type *type,
-						phys_addr_t base, phys_addr_t size)
+static int __init_memblock memblock_add_region(struct memblock_type *type,
+					       phys_addr_t base, phys_addr_t size)
 {
 	bool insert = false;
 	phys_addr_t obase = base, end = base + size;
@@ -387,13 +387,13 @@ repeat:
 	}
 }
 
-long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
 {
 	return memblock_add_region(&memblock.memory, base, size);
 }
 
-static long __init_memblock __memblock_remove(struct memblock_type *type,
-					      phys_addr_t base, phys_addr_t size)
+static int __init_memblock __memblock_remove(struct memblock_type *type,
+					     phys_addr_t base, phys_addr_t size)
 {
 	phys_addr_t end = base + size;
 	int i;
@@ -443,12 +443,12 @@ static long __init_memblock __memblock_remove(struct memblock_type *type,
 	return 0;
 }
 
-long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
 {
 	return __memblock_remove(&memblock.memory, base, size);
 }
 
-long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
 {
 	memblock_dbg("   memblock_free: [%#016llx-%#016llx] %pF\n",
 		     (unsigned long long)base,
@@ -458,7 +458,7 @@ long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
 	return __memblock_remove(&memblock.reserved, base, size);
 }
 
-long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
 {
 	struct memblock_type *_rgn = &memblock.reserved;
 
-- 
cgit v1.2.3


From 4ff7b82f1e5fc65a7c9512b231b4ea533f28541a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:06 -0800
Subject: memblock: Add __memblock_dump_all()

Add __memblock_dump_all() which dumps memblock configuration whether
memblock_debug is enabled or not.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/memblock.h | 8 +++++++-
 mm/memblock.c            | 5 +----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 2f8e28f859b3..1a3bee78590f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -129,7 +129,13 @@ int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
 int memblock_is_reserved(phys_addr_t addr);
 int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
 
-void memblock_dump_all(void);
+extern void __memblock_dump_all(void);
+
+static inline void memblock_dump_all(void)
+{
+	if (memblock_debug)
+		__memblock_dump_all();
+}
 
 /**
  * memblock_set_current_limit - Set the current allocation limit to allow
diff --git a/mm/memblock.c b/mm/memblock.c
index d0506183c5b1..4b80f6fae091 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -898,11 +898,8 @@ static void __init_memblock memblock_dump(struct memblock_type *type, char *name
 	}
 }
 
-void __init_memblock memblock_dump_all(void)
+void __init_memblock __memblock_dump_all(void)
 {
-	if (!memblock_debug)
-		return;
-
 	pr_info("MEMBLOCK configuration:\n");
 	pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size);
 
-- 
cgit v1.2.3


From c5a1cb284b791fcc3c70962331a682452afaf6cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:07 -0800
Subject: memblock: Kill sentinel entries at the end of static region arrays

memblock no longer depends on having one more entry at the end during
addition making the sentinel entries at the end of region arrays not
too useful.  Remove the sentinels.  This eases further updates.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/poison.h |  6 ------
 mm/memblock.c          | 14 ++------------
 2 files changed, 2 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 79159de0e341..2110a81c5e2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -40,12 +40,6 @@
 #define	RED_INACTIVE	0x09F911029D74E35BULL	/* when obj is inactive */
 #define	RED_ACTIVE	0xD84156C5635688C0ULL	/* when obj is active */
 
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-#define MEMBLOCK_INACTIVE	0x3a84fb0144c9e71bULL
-#else
-#define MEMBLOCK_INACTIVE	0x44c9e71bUL
-#endif
-
 #define SLUB_RED_INACTIVE	0xbb
 #define SLUB_RED_ACTIVE		0xcc
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 4b80f6fae091..e808df845bbb 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -24,8 +24,8 @@ struct memblock memblock __initdata_memblock;
 
 int memblock_debug __initdata_memblock;
 int memblock_can_resize __initdata_memblock;
-static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
-static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
+static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 
 /* inline so we don't get a warning when pr_debug is compiled out */
 static inline const char *memblock_type_name(struct memblock_type *type)
@@ -911,12 +911,6 @@ void __init memblock_analyze(void)
 {
 	int i;
 
-	/* Check marker in the unused last array entry */
-	WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base
-		!= MEMBLOCK_INACTIVE);
-	WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base
-		!= MEMBLOCK_INACTIVE);
-
 	memblock.memory_size = 0;
 
 	for (i = 0; i < memblock.memory.cnt; i++)
@@ -940,10 +934,6 @@ void __init memblock_init(void)
 	memblock.reserved.regions	= memblock_reserved_init_regions;
 	memblock.reserved.max	= INIT_MEMBLOCK_REGIONS;
 
-	/* Write a marker in the unused last array entry */
-	memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE;
-	memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE;
-
 	/* Create a dummy zero size MEMBLOCK which will get coalesced away later.
 	 * This simplifies the memblock_add() code below...
 	 */
-- 
cgit v1.2.3


From fe091c208a40299fba40e62292a610fb91e44b4e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:07 -0800
Subject: memblock: Kill memblock_init()

memblock_init() initializes arrays for regions and memblock itself;
however, all these can be done with struct initializers and
memblock_init() can be removed.  This patch kills memblock_init() and
initializes memblock with struct initializer.

The only difference is that the first dummy entries don't have .nid
set to MAX_NUMNODES initially.  This doesn't cause any behavior
difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/arm/mm/init.c            |  1 -
 arch/microblaze/kernel/prom.c |  1 -
 arch/openrisc/kernel/prom.c   |  1 -
 arch/powerpc/kernel/prom.c    |  2 --
 arch/sh/mm/init.c             |  1 -
 arch/sparc/mm/init_64.c       |  2 --
 arch/unicore32/mm/init.c      |  1 -
 arch/x86/kernel/head32.c      |  2 --
 arch/x86/kernel/head64.c      |  2 --
 arch/x86/xen/enlighten.c      |  2 --
 include/linux/memblock.h      |  1 -
 mm/memblock.c                 | 48 +++++++++++++------------------------------
 12 files changed, 14 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9863f03097d0..4140843399ca 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -333,7 +333,6 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
 
 	sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
 
-	memblock_init();
 	for (i = 0; i < mi->nr_banks; i++)
 		memblock_add(mi->bank[i].start, mi->bank[i].size);
 
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 977484add216..4d65e9721f60 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -122,7 +122,6 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
-	memblock_init();
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 
diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c
index 1bb58ba89afa..7dbc6e090b81 100644
--- a/arch/openrisc/kernel/prom.c
+++ b/arch/openrisc/kernel/prom.c
@@ -76,7 +76,6 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
-	memblock_init();
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa1235b0503b..a7ee83e6eb17 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -733,8 +733,6 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
-	memblock_init();
-
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 939ca0f356f6..2528962609f8 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -324,7 +324,6 @@ void __init paging_init(void)
 	unsigned long vaddr, end;
 	int nid;
 
-	memblock_init();
 	sh_mv.mv_mem_init();
 
 	early_reserve_mem();
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 8584a25a9f0d..f42cc878bf97 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1769,8 +1769,6 @@ void __init paging_init(void)
 		sun4v_ktsb_init();
 	}
 
-	memblock_init();
-
 	/* Find available physical memory...
 	 *
 	 * Read it twice in order to work around a bug in openfirmware.
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 5fb09e2e5d0e..01e235bd669d 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -246,7 +246,6 @@ void __init uc32_memblock_init(struct meminfo *mi)
 	sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]),
 		meminfo_cmp, NULL);
 
-	memblock_init();
 	for (i = 0; i < mi->nr_banks; i++)
 		memblock_add(mi->bank[i].start, mi->bank[i].size);
 
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index be9282bcda72..51ff18616d50 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -31,8 +31,6 @@ static void __init i386_default_early_setup(void)
 
 void __init i386_start_kernel(void)
 {
-	memblock_init();
-
 	memblock_reserve(__pa_symbol(&_text),
 			 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index fd25b11549b8..3a3b779f41d3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
 {
 	copy_bootdata(__va(real_mode_data));
 
-	memblock_init();
-
 	memblock_reserve(__pa_symbol(&_text),
 			 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1f928659c338..12eb07bfb267 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1215,8 +1215,6 @@ asmlinkage void __init xen_start_kernel(void)
 	local_irq_disable();
 	early_boot_irqs_disabled = true;
 
-	memblock_init();
-
 	xen_raw_console_write("mapping kernel into physical memory\n");
 	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
 	xen_ident_map_ISA();
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 1a3bee78590f..6ac91c5b2fd3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -52,7 +52,6 @@ phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 int memblock_free_reserved_regions(void);
 int memblock_reserve_reserved_regions(void);
 
-void memblock_init(void);
 void memblock_analyze(void);
 int memblock_add(phys_addr_t base, phys_addr_t size);
 int memblock_remove(phys_addr_t base, phys_addr_t size);
diff --git a/mm/memblock.c b/mm/memblock.c
index e808df845bbb..5bbb87f59aee 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,12 +20,23 @@
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
 
-struct memblock memblock __initdata_memblock;
+static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+
+struct memblock memblock __initdata_memblock = {
+	.memory.regions		= memblock_memory_init_regions,
+	.memory.cnt		= 1,	/* empty dummy entry */
+	.memory.max		= INIT_MEMBLOCK_REGIONS,
+
+	.reserved.regions	= memblock_reserved_init_regions,
+	.reserved.cnt		= 1,	/* empty dummy entry */
+	.reserved.max		= INIT_MEMBLOCK_REGIONS,
+
+	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
+};
 
 int memblock_debug __initdata_memblock;
 int memblock_can_resize __initdata_memblock;
-static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
-static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 
 /* inline so we don't get a warning when pr_debug is compiled out */
 static inline const char *memblock_type_name(struct memblock_type *type)
@@ -920,37 +931,6 @@ void __init memblock_analyze(void)
 	memblock_can_resize = 1;
 }
 
-void __init memblock_init(void)
-{
-	static int init_done __initdata = 0;
-
-	if (init_done)
-		return;
-	init_done = 1;
-
-	/* Hookup the initial arrays */
-	memblock.memory.regions	= memblock_memory_init_regions;
-	memblock.memory.max		= INIT_MEMBLOCK_REGIONS;
-	memblock.reserved.regions	= memblock_reserved_init_regions;
-	memblock.reserved.max	= INIT_MEMBLOCK_REGIONS;
-
-	/* Create a dummy zero size MEMBLOCK which will get coalesced away later.
-	 * This simplifies the memblock_add() code below...
-	 */
-	memblock.memory.regions[0].base = 0;
-	memblock.memory.regions[0].size = 0;
-	memblock_set_region_node(&memblock.memory.regions[0], MAX_NUMNODES);
-	memblock.memory.cnt = 1;
-
-	/* Ditto. */
-	memblock.reserved.regions[0].base = 0;
-	memblock.reserved.regions[0].size = 0;
-	memblock_set_region_node(&memblock.reserved.regions[0], MAX_NUMNODES);
-	memblock.reserved.cnt = 1;
-
-	memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
-}
-
 static int __init early_memblock(char *p)
 {
 	if (p && strstr(p, "debug"))
-- 
cgit v1.2.3


From 1440c4e2c918532f39131c3330fe2226e16be7b6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:08 -0800
Subject: memblock: Track total size of regions automatically

Total size of memory regions was calculated by memblock_analyze()
requiring explicitly calling the function between operations which can
change memory regions and possible users of total size, which is
cumbersome and fragile.

This patch makes each memblock_type track total size automatically
with minor modifications to memblock manipulation functions and remove
requirements on calling memblock_analyze().  [__]memblock_dump_all()
now also dumps the total size of reserved regions.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/memblock.h |  2 +-
 mm/memblock.c            | 27 +++++++++++++--------------
 2 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6ac91c5b2fd3..5bb15005f0f7 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -30,12 +30,12 @@ struct memblock_region {
 struct memblock_type {
 	unsigned long cnt;	/* number of regions */
 	unsigned long max;	/* size of the allocated array */
+	phys_addr_t total_size;	/* size of all regions */
 	struct memblock_region *regions;
 };
 
 struct memblock {
 	phys_addr_t current_limit;
-	phys_addr_t memory_size;	/* Updated by memblock_analyze() */
 	struct memblock_type memory;
 	struct memblock_type reserved;
 };
diff --git a/mm/memblock.c b/mm/memblock.c
index b44875f5a996..f39964184b4a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -179,12 +179,14 @@ int __init_memblock memblock_reserve_reserved_regions(void)
 
 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
 {
+	type->total_size -= type->regions[r].size;
 	memmove(&type->regions[r], &type->regions[r + 1],
 		(type->cnt - (r + 1)) * sizeof(type->regions[r]));
 	type->cnt--;
 
 	/* Special case for empty arrays */
 	if (type->cnt == 0) {
+		WARN_ON(type->total_size != 0);
 		type->cnt = 1;
 		type->regions[0].base = 0;
 		type->regions[0].size = 0;
@@ -314,6 +316,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
 	rgn->size = size;
 	memblock_set_region_node(rgn, nid);
 	type->cnt++;
+	type->total_size += size;
 }
 
 /**
@@ -340,10 +343,11 @@ static int __init_memblock memblock_add_region(struct memblock_type *type,
 
 	/* special case for empty array */
 	if (type->regions[0].size == 0) {
-		WARN_ON(type->cnt != 1);
+		WARN_ON(type->cnt != 1 || type->total_size);
 		type->regions[0].base = base;
 		type->regions[0].size = size;
 		memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
+		type->total_size = size;
 		return 0;
 	}
 repeat:
@@ -453,7 +457,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
 			 * to process the next region - the new top half.
 			 */
 			rgn->base = base;
-			rgn->size = rend - rgn->base;
+			rgn->size -= base - rbase;
+			type->total_size -= base - rbase;
 			memblock_insert_region(type, i, rbase, base - rbase,
 					       memblock_get_region_node(rgn));
 		} else if (rend > end) {
@@ -462,7 +467,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
 			 * current region - the new bottom half.
 			 */
 			rgn->base = end;
-			rgn->size = rend - rgn->base;
+			rgn->size -= end - rbase;
+			type->total_size -= end - rbase;
 			memblock_insert_region(type, i--, rbase, end - rbase,
 					       memblock_get_region_node(rgn));
 		} else {
@@ -784,10 +790,9 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
  * Remaining API functions
  */
 
-/* You must call memblock_analyze() before this. */
 phys_addr_t __init memblock_phys_mem_size(void)
 {
-	return memblock.memory_size;
+	return memblock.memory.total_size;
 }
 
 /* lowest address */
@@ -803,7 +808,6 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
 	return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
 }
 
-/* You must call memblock_analyze() after this. */
 void __init memblock_enforce_memory_limit(phys_addr_t limit)
 {
 	unsigned long i;
@@ -906,7 +910,9 @@ static void __init_memblock memblock_dump(struct memblock_type *type, char *name
 void __init_memblock __memblock_dump_all(void)
 {
 	pr_info("MEMBLOCK configuration:\n");
-	pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size);
+	pr_info(" memory size = %#llx reserved size = %#llx\n",
+		(unsigned long long)memblock.memory.total_size,
+		(unsigned long long)memblock.reserved.total_size);
 
 	memblock_dump(&memblock.memory, "memory");
 	memblock_dump(&memblock.reserved, "reserved");
@@ -914,13 +920,6 @@ void __init_memblock __memblock_dump_all(void)
 
 void __init memblock_analyze(void)
 {
-	int i;
-
-	memblock.memory_size = 0;
-
-	for (i = 0; i < memblock.memory.cnt; i++)
-		memblock.memory_size += memblock.memory.regions[i].size;
-
 	/* We allow resizing from there */
 	memblock_can_resize = 1;
 }
-- 
cgit v1.2.3


From 1aadc0560f46530f8a0f11055285b876a8a31770 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:08 -0800
Subject: memblock: s/memblock_analyze()/memblock_allow_resize()/ and update
 users

The only function of memblock_analyze() is now allowing resize of
memblock region arrays.  Rename it to memblock_allow_resize() and
update its users.

* The following users remain the same other than renaming.

  arm/mm/init.c::arm_memblock_init()
  microblaze/kernel/prom.c::early_init_devtree()
  powerpc/kernel/prom.c::early_init_devtree()
  openrisc/kernel/prom.c::early_init_devtree()
  sh/mm/init.c::paging_init()
  sparc/mm/init_64.c::paging_init()
  unicore32/mm/init.c::uc32_memblock_init()

* In the following users, analyze was used to update total size which
  is no longer necessary.

  powerpc/kernel/machine_kexec.c::reserve_crashkernel()
  powerpc/kernel/prom.c::early_init_devtree()
  powerpc/mm/init_32.c::MMU_init()
  powerpc/mm/tlb_nohash.c::__early_init_mmu()
  powerpc/platforms/ps3/mm.c::ps3_mm_add_memory()
  powerpc/platforms/embedded6xx/wii.c::wii_memory_fixups()
  sh/kernel/machine_kexec.c::reserve_crashkernel()

* x86/kernel/e820.c::memblock_x86_fill() was directly setting
  memblock_can_resize before populating memblock and calling analyze
  afterwards.  Call memblock_allow_resize() before start populating.

memblock_can_resize is now static inside memblock.c.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/arm/mm/init.c                       | 2 +-
 arch/microblaze/kernel/prom.c            | 2 +-
 arch/openrisc/kernel/prom.c              | 2 +-
 arch/powerpc/kernel/machine_kexec.c      | 3 ---
 arch/powerpc/kernel/prom.c               | 3 +--
 arch/powerpc/mm/init_32.c                | 2 --
 arch/powerpc/mm/tlb_nohash.c             | 1 -
 arch/powerpc/platforms/embedded6xx/wii.c | 1 -
 arch/powerpc/platforms/ps3/mm.c          | 1 -
 arch/sh/kernel/machine_kexec.c           | 3 ---
 arch/sh/mm/init.c                        | 2 +-
 arch/sparc/mm/init_64.c                  | 2 +-
 arch/unicore32/mm/init.c                 | 2 +-
 arch/x86/kernel/e820.c                   | 3 +--
 include/linux/memblock.h                 | 3 +--
 mm/memblock.c                            | 5 ++---
 16 files changed, 11 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 4140843399ca..7c38474e533a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -371,7 +371,7 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
 	if (mdesc->reserve)
 		mdesc->reserve();
 
-	memblock_analyze();
+	memblock_allow_resize();
 	memblock_dump_all();
 }
 
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 4d65e9721f60..80d314e81901 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -129,7 +129,7 @@ void __init early_init_devtree(void *params)
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
 	parse_early_param();
 
-	memblock_analyze();
+	memblock_allow_resize();
 
 	pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size());
 
diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c
index 7dbc6e090b81..3d4478f6c942 100644
--- a/arch/openrisc/kernel/prom.c
+++ b/arch/openrisc/kernel/prom.c
@@ -82,7 +82,7 @@ void __init early_init_devtree(void *params)
 	/* Save command line for /proc/cmdline and then parse parameters */
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
 
-	memblock_analyze();
+	memblock_allow_resize();
 
 	/* We must copy the flattend device tree from init memory to regular
 	 * memory because the device tree references the strings in it
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 9ce1672afb59..a2158a395d96 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -107,9 +107,6 @@ void __init reserve_crashkernel(void)
 	unsigned long long crash_size, crash_base;
 	int ret;
 
-	/* this is necessary because of memblock_phys_mem_size() */
-	memblock_analyze();
-
 	/* use common parsing */
 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 			&crash_size, &crash_base);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 28500d4f29d9..abe405dab34d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -758,11 +758,10 @@ void __init early_init_devtree(void *params)
 	 * Ensure that total memory size is page-aligned, because otherwise
 	 * mark_bootmem() gets upset.
 	 */
-	memblock_analyze();
 	limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
 	memblock_enforce_memory_limit(limit);
 
-	memblock_analyze();
+	memblock_allow_resize();
 	memblock_dump_all();
 
 	DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 12bb528e51c5..58861fa1220e 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -135,7 +135,6 @@ void __init MMU_init(void)
 	if (memblock.memory.cnt > 1) {
 #ifndef CONFIG_WII
 		memblock_enforce_memory_limit(memblock.memory.regions[0].size);
-		memblock_analyze();
 		printk(KERN_WARNING "Only using first contiguous memory region");
 #else
 		wii_memory_fixups();
@@ -158,7 +157,6 @@ void __init MMU_init(void)
 #ifndef CONFIG_HIGHMEM
 		total_memory = total_lowmem;
 		memblock_enforce_memory_limit(total_lowmem);
-		memblock_analyze();
 #endif /* CONFIG_HIGHMEM */
 	}
 
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 4e13d6f9023e..573ba3b69d1f 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -615,7 +615,6 @@ static void __early_init_mmu(int boot_cpu)
 
 		/* limit memory so we dont have linear faults */
 		memblock_enforce_memory_limit(linear_map_top);
-		memblock_analyze();
 
 		patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
 		patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 1cbe9d3c7977..6d8dadf19f0b 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -88,7 +88,6 @@ void __init wii_memory_fixups(void)
 	wii_hole_size = p[1].base - wii_hole_start;
 	memblock_add(wii_hole_start, wii_hole_size);
 	memblock_reserve(wii_hole_start, wii_hole_size);
-	memblock_analyze();
 
 	BUG_ON(memblock.memory.cnt != 1);
 	__memblock_dump_all();
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 72714ad27842..8bd6ba542691 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -319,7 +319,6 @@ static int __init ps3_mm_add_memory(void)
 	}
 
 	memblock_add(start_addr, map.r1.size);
-	memblock_analyze();
 
 	result = online_pages(start_pfn, nr_pages);
 
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index c5a33f007f88..9fea49f6e667 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -157,9 +157,6 @@ void __init reserve_crashkernel(void)
 	unsigned long long crash_size, crash_base;
 	int ret;
 
-	/* this is necessary because of memblock_phys_mem_size() */
-	memblock_analyze();
-
 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size > 0) {
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 2528962609f8..82cc576fab15 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -336,7 +336,7 @@ void __init paging_init(void)
 		sh_mv.mv_mem_reserve();
 
 	memblock_enforce_memory_limit(memory_limit);
-	memblock_analyze();
+	memblock_allow_resize();
 
 	memblock_dump_all();
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f42cc878bf97..29723a2031fc 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1794,7 +1794,7 @@ void __init paging_init(void)
 
 	memblock_enforce_memory_limit(cmdline_memory_size);
 
-	memblock_analyze();
+	memblock_allow_resize();
 	memblock_dump_all();
 
 	set_bit(0, mmu_context_bmap);
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 01e235bd669d..de186bde8975 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -264,7 +264,7 @@ void __init uc32_memblock_init(struct meminfo *mi)
 
 	uc32_mm_memblock_reserve();
 
-	memblock_analyze();
+	memblock_allow_resize();
 	memblock_dump_all();
 }
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 056e65d5012b..8071e2f3d6eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1072,7 +1072,7 @@ void __init memblock_x86_fill(void)
 	 * We are safe to enable resizing, beause memblock_x86_fill()
 	 * is rather later for x86
 	 */
-	memblock_can_resize = 1;
+	memblock_allow_resize();
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
@@ -1087,7 +1087,6 @@ void __init memblock_x86_fill(void)
 		memblock_add(ei->addr, ei->size);
 	}
 
-	memblock_analyze();
 	memblock_dump_all();
 }
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5bb15005f0f7..c5b3bbc75897 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -42,7 +42,6 @@ struct memblock {
 
 extern struct memblock memblock;
 extern int memblock_debug;
-extern int memblock_can_resize;
 
 #define memblock_dbg(fmt, ...) \
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
@@ -52,7 +51,7 @@ phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 int memblock_free_reserved_regions(void);
 int memblock_reserve_reserved_regions(void);
 
-void memblock_analyze(void);
+void memblock_allow_resize(void);
 int memblock_add(phys_addr_t base, phys_addr_t size);
 int memblock_remove(phys_addr_t base, phys_addr_t size);
 int memblock_free(phys_addr_t base, phys_addr_t size);
diff --git a/mm/memblock.c b/mm/memblock.c
index f39964184b4a..a3ca95f35e03 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -36,7 +36,7 @@ struct memblock memblock __initdata_memblock = {
 };
 
 int memblock_debug __initdata_memblock;
-int memblock_can_resize __initdata_memblock;
+static int memblock_can_resize __initdata_memblock;
 
 /* inline so we don't get a warning when pr_debug is compiled out */
 static inline const char *memblock_type_name(struct memblock_type *type)
@@ -918,9 +918,8 @@ void __init_memblock __memblock_dump_all(void)
 	memblock_dump(&memblock.reserved, "reserved");
 }
 
-void __init memblock_analyze(void)
+void __init memblock_allow_resize(void)
 {
-	/* We allow resizing from there */
 	memblock_can_resize = 1;
 }
 
-- 
cgit v1.2.3


From 7fb0bc3f06fdc3a35e41bcea7a15e53d2515362f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:08 -0800
Subject: memblock: Implement memblock_add_node()

Implement memblock_add_node() which can add a new memblock memory
region with specific node ID.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/memblock.h |  1 +
 mm/memblock.c            | 20 +++++++++++++-------
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c5b3bbc75897..c7b68f489d46 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -52,6 +52,7 @@ int memblock_free_reserved_regions(void);
 int memblock_reserve_reserved_regions(void);
 
 void memblock_allow_resize(void);
+int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
 int memblock_add(phys_addr_t base, phys_addr_t size);
 int memblock_remove(phys_addr_t base, phys_addr_t size);
 int memblock_free(phys_addr_t base, phys_addr_t size);
diff --git a/mm/memblock.c b/mm/memblock.c
index a3ca95f35e03..ef4987b03afd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -324,6 +324,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  * @type: memblock type to add new region into
  * @base: base address of the new region
  * @size: size of the new region
+ * @nid: nid of the new region
  *
  * Add new memblock region [@base,@base+@size) into @type.  The new region
  * is allowed to overlap with existing ones - overlaps don't affect already
@@ -334,7 +335,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  * 0 on success, -errno on failure.
  */
 static int __init_memblock memblock_add_region(struct memblock_type *type,
-					       phys_addr_t base, phys_addr_t size)
+				phys_addr_t base, phys_addr_t size, int nid)
 {
 	bool insert = false;
 	phys_addr_t obase = base;
@@ -346,7 +347,7 @@ static int __init_memblock memblock_add_region(struct memblock_type *type,
 		WARN_ON(type->cnt != 1 || type->total_size);
 		type->regions[0].base = base;
 		type->regions[0].size = size;
-		memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
+		memblock_set_region_node(&type->regions[0], nid);
 		type->total_size = size;
 		return 0;
 	}
@@ -376,7 +377,7 @@ repeat:
 			nr_new++;
 			if (insert)
 				memblock_insert_region(type, i++, base,
-						rbase - base, MAX_NUMNODES);
+						       rbase - base, nid);
 		}
 		/* area below @rend is dealt with, forget about it */
 		base = min(rend, end);
@@ -386,8 +387,7 @@ repeat:
 	if (base < end) {
 		nr_new++;
 		if (insert)
-			memblock_insert_region(type, i, base, end - base,
-					       MAX_NUMNODES);
+			memblock_insert_region(type, i, base, end - base, nid);
 	}
 
 	/*
@@ -406,9 +406,15 @@ repeat:
 	}
 }
 
+int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
+				       int nid)
+{
+	return memblock_add_region(&memblock.memory, base, size, nid);
+}
+
 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
 {
-	return memblock_add_region(&memblock.memory, base, size);
+	return memblock_add_region(&memblock.memory, base, size, MAX_NUMNODES);
 }
 
 /**
@@ -522,7 +528,7 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
 		     (void *)_RET_IP_);
 	BUG_ON(0 == size);
 
-	return memblock_add_region(_rgn, base, size);
+	return memblock_add_region(_rgn, base, size, MAX_NUMNODES);
 }
 
 /**
-- 
cgit v1.2.3


From 0ee332c1451869963626bf9cac88f165a90990e1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:09 -0800
Subject: memblock: Kill early_node_map[]

Now all ARCH_POPULATES_NODE_MAP archs select HAVE_MEBLOCK_NODE_MAP -
there's no user of early_node_map[] left.  Kill early_node_map[] and
replace ARCH_POPULATES_NODE_MAP with HAVE_MEMBLOCK_NODE_MAP.  Also,
relocate for_each_mem_pfn_range() and helper from mm.h to memblock.h
as page_alloc.c would no longer host an alternative implementation.

This change is ultimately one to one mapping and shouldn't cause any
observable difference; however, after the recent changes, there are
some functions which now would fit memblock.c better than page_alloc.c
and dependency on HAVE_MEMBLOCK_NODE_MAP instead of HAVE_MEMBLOCK
doesn't make much sense on some of them.  Further cleanups for
functions inside HAVE_MEMBLOCK_NODE_MAP in mm.h would be nice.

-v2: Fix compile bug introduced by mis-spelling
 CONFIG_HAVE_MEMBLOCK_NODE_MAP to CONFIG_MEMBLOCK_HAVE_NODE_MAP in
 mmzone.h.  Reported by Stephen Rothwell.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/ia64/Kconfig           |   3 -
 arch/mips/Kconfig           |   3 -
 arch/powerpc/Kconfig        |   3 -
 arch/s390/Kconfig           |   3 -
 arch/score/Kconfig          |   3 -
 arch/sh/mm/Kconfig          |   3 -
 arch/sparc/Kconfig          |   3 -
 arch/x86/Kconfig            |   3 -
 drivers/iommu/intel-iommu.c |   1 +
 include/linux/memblock.h    |  23 +++-
 include/linux/mm.h          |  50 ++-------
 include/linux/mmzone.h      |   8 +-
 mm/memblock.c               |   2 +-
 mm/page_alloc.c             | 259 +++-----------------------------------------
 14 files changed, 55 insertions(+), 312 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index e2c7de0d823d..3b7a7c483785 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -477,9 +477,6 @@ config NODES_SHIFT
 	  MAX_NUMNODES will be 2^(This value).
 	  If in doubt, use the default.
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
 # VIRTUAL_MEM_MAP has been retained for historical reasons.
 config VIRTUAL_MEM_MAP
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b789847d93fd..9c652eb68aaa 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2067,9 +2067,6 @@ config ARCH_DISCONTIGMEM_ENABLE
 	  or have huge holes in the physical address space for other reasons.
 	  See <file:Documentation/vm/numa> for more.
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config ARCH_SPARSEMEM_ENABLE
 	bool
 	select SPARSEMEM_STATIC
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8516477c4dc5..ead0bc68439d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -422,9 +422,6 @@ config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
 	depends on (SMP && PPC_PSERIES) || PPC_PS3
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config SYS_SUPPORTS_HUGETLBFS
 	bool
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index e383caf251a3..d48ede334434 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -348,9 +348,6 @@ config WARN_DYNAMIC_STACK
 
 	  Say N if you are unsure.
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 comment "Kernel preemption"
 
 source "kernel/Kconfig.preempt"
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index e5ae12f48781..8b0c9464aa9d 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -63,9 +63,6 @@ config 32BIT
 config ARCH_FLATMEM_ENABLE
 	def_bool y
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 source "mm/Kconfig"
 
 config MEMORY_START
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index c3e61b366493..cb8f9920f4dd 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -143,9 +143,6 @@ config MAX_ACTIVE_REGIONS
 		       CPU_SUBTYPE_SH7785)
 	default "1"
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 91a6d1e2bf35..70ae9d81870e 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -353,9 +353,6 @@ config NODES_SPAN_OTHER_NODES
 	def_bool y
 	depends on NEED_MULTIPLE_NODES
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y if SPARC64
-
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y if SPARC64
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5d1514c263f8..9bab4a90d7a1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -206,9 +206,6 @@ config ZONE_DMA32
 	bool
 	default X86_64
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config AUDIT_ARCH
 	bool
 	default X86_64
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index bcbd693b351a..d1c17934d66f 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -41,6 +41,7 @@
 #include <linux/tboot.h>
 #include <linux/dmi.h>
 #include <linux/pci-ats.h>
+#include <linux/memblock.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c7b68f489d46..cd7606b71e5a 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -58,6 +58,26 @@ int memblock_remove(phys_addr_t base, phys_addr_t size);
 int memblock_free(phys_addr_t base, phys_addr_t size);
 int memblock_reserve(phys_addr_t base, phys_addr_t size);
 
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
+			  unsigned long *out_end_pfn, int *out_nid);
+
+/**
+ * for_each_mem_pfn_range - early memory pfn range iterator
+ * @i: an integer used as loop variable
+ * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @p_start: ptr to ulong for start pfn of the range, can be %NULL
+ * @p_end: ptr to ulong for end pfn of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ *
+ * Walks over configured memory ranges.  Available after early_node_map is
+ * populated.
+ */
+#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid)		\
+	for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
+	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+
 void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
 			   phys_addr_t *out_end, int *out_nid);
 
@@ -101,9 +121,6 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-/* The numa aware allocator is only available if
- * CONFIG_ARCH_POPULATES_NODE_MAP is set
- */
 phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
 					phys_addr_t size, phys_addr_t align, int nid);
 phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6b365aee8396..c6f49bea52a3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1252,43 +1252,34 @@ static inline void pgtable_page_dtor(struct page *page)
 extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, unsigned long * zones_size,
 		unsigned long zone_start_pfn, unsigned long *zholes_size);
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 /*
- * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its
+ * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
  * zones, allocate the backing mem_map and account for memory holes in a more
  * architecture independent manner. This is a substitute for creating the
  * zone_sizes[] and zholes_size[] arrays and passing them to
  * free_area_init_node()
  *
  * An architecture is expected to register range of page frames backed by
- * physical memory with add_active_range() before calling
+ * physical memory with memblock_add[_node]() before calling
  * free_area_init_nodes() passing in the PFN each zone ends at. At a basic
  * usage, an architecture is expected to do something like
  *
  * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
  * 							 max_highmem_pfn};
  * for_each_valid_physical_page_range()
- * 	add_active_range(node_id, start_pfn, end_pfn)
+ * 	memblock_add_node(base, size, nid)
  * free_area_init_nodes(max_zone_pfns);
  *
- * If the architecture guarantees that there are no holes in the ranges
- * registered with add_active_range(), free_bootmem_active_regions()
- * will call free_bootmem_node() for each registered physical page range.
- * Similarly sparse_memory_present_with_active_regions() calls
- * memory_present() for each range when SPARSEMEM is enabled.
+ * free_bootmem_with_active_regions() calls free_bootmem_node() for each
+ * registered physical page range.  Similarly
+ * sparse_memory_present_with_active_regions() calls memory_present() for
+ * each range when SPARSEMEM is enabled.
  *
  * See mm/page_alloc.c for more information on each function exposed by
- * CONFIG_ARCH_POPULATES_NODE_MAP
+ * CONFIG_HAVE_MEMBLOCK_NODE_MAP.
  */
 extern void free_area_init_nodes(unsigned long *max_zone_pfn);
-#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-extern void add_active_range(unsigned int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
-extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
-extern void remove_all_active_ranges(void);
-void sort_node_map(void);
-#endif
 unsigned long node_map_pfn_alignment(void);
 unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
 						unsigned long end_pfn);
@@ -1303,28 +1294,9 @@ int add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid);
 extern void sparse_memory_present_with_active_regions(int nid);
 
-extern void __next_mem_pfn_range(int *idx, int nid,
-				 unsigned long *out_start_pfn,
-				 unsigned long *out_end_pfn, int *out_nid);
-
-/**
- * for_each_mem_pfn_range - early memory pfn range iterator
- * @i: an integer used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
- * @p_start: ptr to ulong for start pfn of the range, can be %NULL
- * @p_end: ptr to ulong for end pfn of the range, can be %NULL
- * @p_nid: ptr to int for nid of the range, can be %NULL
- *
- * Walks over configured memory ranges.  Available after early_node_map is
- * populated.
- */
-#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid)		\
-	for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
-	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
-
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \
+#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
     !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
 static inline int __early_pfn_to_nid(unsigned long pfn)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 188cb2ffe8db..3ac040f19369 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -598,13 +598,13 @@ struct zonelist {
 #endif
 };
 
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 struct node_active_region {
 	unsigned long start_pfn;
 	unsigned long end_pfn;
 	int nid;
 };
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 #ifndef CONFIG_DISCONTIGMEM
 /* The array of struct pages - for discontigmem use pgdat->lmem_map */
@@ -720,7 +720,7 @@ extern int movable_zone;
 
 static inline int zone_movable_is_highmem(void)
 {
-#if defined(CONFIG_HIGHMEM) && defined(CONFIG_ARCH_POPULATES_NODE_MAP)
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE)
 	return movable_zone == ZONE_HIGHMEM;
 #else
 	return 0;
@@ -938,7 +938,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 #endif
 
 #if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
-	!defined(CONFIG_ARCH_POPULATES_NODE_MAP)
+	!defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
 static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 {
 	return 0;
diff --git a/mm/memblock.c b/mm/memblock.c
index ef4987b03afd..1adbef09b43a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -716,7 +716,7 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
 static phys_addr_t __init memblock_nid_range_rev(phys_addr_t start,
 						 phys_addr_t end, int *nid)
 {
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 	unsigned long start_pfn, end_pfn;
 	int i;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6ce27331834c..63ff8dab433a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -181,42 +181,17 @@ static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
 
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
-  #ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-    /*
-     * MAX_ACTIVE_REGIONS determines the maximum number of distinct ranges
-     * of memory (RAM) that may be registered with add_active_range().
-     * Ranges passed to add_active_range() will be merged if possible so
-     * the number of times add_active_range() can be called is related to
-     * the number of nodes and the number of holes
-     */
-    #ifdef CONFIG_MAX_ACTIVE_REGIONS
-      /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */
-      #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
-    #else
-      #if MAX_NUMNODES >= 32
-        /* If there can be many nodes, allow up to 50 holes per node */
-        #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
-      #else
-        /* By default, allow up to 256 distinct regions */
-        #define MAX_ACTIVE_REGIONS 256
-      #endif
-    #endif
-
-    static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
-    static int __meminitdata nr_nodemap_entries;
-#endif /* !CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-
-  static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
-  static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-  static unsigned long __initdata required_kernelcore;
-  static unsigned long __initdata required_movablecore;
-  static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
-
-  /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
-  int movable_zone;
-  EXPORT_SYMBOL(movable_zone);
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
+static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
+static unsigned long __initdata required_kernelcore;
+static unsigned long __initdata required_movablecore;
+static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
+
+/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
+int movable_zone;
+EXPORT_SYMBOL(movable_zone);
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 #if MAX_NUMNODES > 1
 int nr_node_ids __read_mostly = MAX_NUMNODES;
@@ -3734,7 +3709,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
 	return 0;
 }
 
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
 /*
  * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@@ -4002,7 +3977,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
 	return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
 }
 
-#else
+#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
 					unsigned long zone_type,
 					unsigned long *zones_size)
@@ -4020,7 +3995,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
 	return zholes_size[zone_type];
 }
 
-#endif
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
 		unsigned long *zones_size, unsigned long *zholes_size)
@@ -4243,10 +4218,10 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
 	 */
 	if (pgdat == NODE_DATA(0)) {
 		mem_map = NODE_DATA(0)->node_mem_map;
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 		if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
 			mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 	}
 #endif
 #endif /* CONFIG_FLAT_NODE_MEM_MAP */
@@ -4271,7 +4246,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 	free_area_init_core(pgdat, zones_size, zholes_size);
 }
 
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 
 #if MAX_NUMNODES > 1
 /*
@@ -4292,201 +4267,6 @@ static inline void setup_nr_node_ids(void)
 }
 #endif
 
-#ifndef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-/*
- * Common iterator interface used to define for_each_mem_pfn_range().
- */
-void __meminit __next_mem_pfn_range(int *idx, int nid,
-				    unsigned long *out_start_pfn,
-				    unsigned long *out_end_pfn, int *out_nid)
-{
-	struct node_active_region *r = NULL;
-
-	while (++*idx < nr_nodemap_entries) {
-		if (nid == MAX_NUMNODES || nid == early_node_map[*idx].nid) {
-			r = &early_node_map[*idx];
-			break;
-		}
-	}
-	if (!r) {
-		*idx = -1;
-		return;
-	}
-
-	if (out_start_pfn)
-		*out_start_pfn = r->start_pfn;
-	if (out_end_pfn)
-		*out_end_pfn = r->end_pfn;
-	if (out_nid)
-		*out_nid = r->nid;
-}
-
-/**
- * add_active_range - Register a range of PFNs backed by physical memory
- * @nid: The node ID the range resides on
- * @start_pfn: The start PFN of the available physical memory
- * @end_pfn: The end PFN of the available physical memory
- *
- * These ranges are stored in an early_node_map[] and later used by
- * free_area_init_nodes() to calculate zone sizes and holes. If the
- * range spans a memory hole, it is up to the architecture to ensure
- * the memory is not freed by the bootmem allocator. If possible
- * the range being registered will be merged with existing ranges.
- */
-void __init add_active_range(unsigned int nid, unsigned long start_pfn,
-						unsigned long end_pfn)
-{
-	int i;
-
-	mminit_dprintk(MMINIT_TRACE, "memory_register",
-			"Entering add_active_range(%d, %#lx, %#lx) "
-			"%d entries of %d used\n",
-			nid, start_pfn, end_pfn,
-			nr_nodemap_entries, MAX_ACTIVE_REGIONS);
-
-	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
-
-	/* Merge with existing active regions if possible */
-	for (i = 0; i < nr_nodemap_entries; i++) {
-		if (early_node_map[i].nid != nid)
-			continue;
-
-		/* Skip if an existing region covers this new one */
-		if (start_pfn >= early_node_map[i].start_pfn &&
-				end_pfn <= early_node_map[i].end_pfn)
-			return;
-
-		/* Merge forward if suitable */
-		if (start_pfn <= early_node_map[i].end_pfn &&
-				end_pfn > early_node_map[i].end_pfn) {
-			early_node_map[i].end_pfn = end_pfn;
-			return;
-		}
-
-		/* Merge backward if suitable */
-		if (start_pfn < early_node_map[i].start_pfn &&
-				end_pfn >= early_node_map[i].start_pfn) {
-			early_node_map[i].start_pfn = start_pfn;
-			return;
-		}
-	}
-
-	/* Check that early_node_map is large enough */
-	if (i >= MAX_ACTIVE_REGIONS) {
-		printk(KERN_CRIT "More than %d memory regions, truncating\n",
-							MAX_ACTIVE_REGIONS);
-		return;
-	}
-
-	early_node_map[i].nid = nid;
-	early_node_map[i].start_pfn = start_pfn;
-	early_node_map[i].end_pfn = end_pfn;
-	nr_nodemap_entries = i + 1;
-}
-
-/**
- * remove_active_range - Shrink an existing registered range of PFNs
- * @nid: The node id the range is on that should be shrunk
- * @start_pfn: The new PFN of the range
- * @end_pfn: The new PFN of the range
- *
- * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
- * The map is kept near the end physical page range that has already been
- * registered. This function allows an arch to shrink an existing registered
- * range.
- */
-void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
-				unsigned long end_pfn)
-{
-	unsigned long this_start_pfn, this_end_pfn;
-	int i, j;
-	int removed = 0;
-
-	printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
-			  nid, start_pfn, end_pfn);
-
-	/* Find the old active region end and shrink */
-	for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
-		if (this_start_pfn >= start_pfn && this_end_pfn <= end_pfn) {
-			/* clear it */
-			early_node_map[i].start_pfn = 0;
-			early_node_map[i].end_pfn = 0;
-			removed = 1;
-			continue;
-		}
-		if (this_start_pfn < start_pfn && this_end_pfn > start_pfn) {
-			early_node_map[i].end_pfn = start_pfn;
-			if (this_end_pfn > end_pfn)
-				add_active_range(nid, end_pfn, this_end_pfn);
-			continue;
-		}
-		if (this_start_pfn >= start_pfn && this_end_pfn > end_pfn &&
-		    this_start_pfn < end_pfn) {
-			early_node_map[i].start_pfn = end_pfn;
-			continue;
-		}
-	}
-
-	if (!removed)
-		return;
-
-	/* remove the blank ones */
-	for (i = nr_nodemap_entries - 1; i > 0; i--) {
-		if (early_node_map[i].nid != nid)
-			continue;
-		if (early_node_map[i].end_pfn)
-			continue;
-		/* we found it, get rid of it */
-		for (j = i; j < nr_nodemap_entries - 1; j++)
-			memcpy(&early_node_map[j], &early_node_map[j+1],
-				sizeof(early_node_map[j]));
-		j = nr_nodemap_entries - 1;
-		memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
-		nr_nodemap_entries--;
-	}
-}
-
-/**
- * remove_all_active_ranges - Remove all currently registered regions
- *
- * During discovery, it may be found that a table like SRAT is invalid
- * and an alternative discovery method must be used. This function removes
- * all currently registered regions.
- */
-void __init remove_all_active_ranges(void)
-{
-	memset(early_node_map, 0, sizeof(early_node_map));
-	nr_nodemap_entries = 0;
-}
-
-/* Compare two active node_active_regions */
-static int __init cmp_node_active_region(const void *a, const void *b)
-{
-	struct node_active_region *arange = (struct node_active_region *)a;
-	struct node_active_region *brange = (struct node_active_region *)b;
-
-	/* Done this way to avoid overflows */
-	if (arange->start_pfn > brange->start_pfn)
-		return 1;
-	if (arange->start_pfn < brange->start_pfn)
-		return -1;
-
-	return 0;
-}
-
-/* sort the node_map by start_pfn */
-void __init sort_node_map(void)
-{
-	sort(early_node_map, (size_t)nr_nodemap_entries,
-			sizeof(struct node_active_region),
-			cmp_node_active_region, NULL);
-}
-#else /* !CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-static inline void sort_node_map(void)
-{
-}
-#endif
-
 /**
  * node_map_pfn_alignment - determine the maximum internode alignment
  *
@@ -4764,9 +4544,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 	unsigned long start_pfn, end_pfn;
 	int i, nid;
 
-	/* Sort early_node_map as initialisation assumes it is sorted */
-	sort_node_map();
-
 	/* Record where the zone boundaries are */
 	memset(arch_zone_lowest_possible_pfn, 0,
 				sizeof(arch_zone_lowest_possible_pfn));
@@ -4867,7 +4644,7 @@ static int __init cmdline_parse_movablecore(char *p)
 early_param("kernelcore", cmdline_parse_kernelcore);
 early_param("movablecore", cmdline_parse_movablecore);
 
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 /**
  * set_dma_reserve - set the specified number of pages reserved in the first zone
-- 
cgit v1.2.3


From 7bd0b0f0da3b1ec11cbcc798eb0ef747a1184077 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Dec 2011 10:22:09 -0800
Subject: memblock: Reimplement memblock allocation using reverse free area
 iterator

Now that all early memory information is in memblock when enabled, we
can implement reverse free area iterator and use it to implement NUMA
aware allocator which is then wrapped for simpler variants instead of
the confusing and inefficient mending of information in separate NUMA
aware allocator.

Implement for_each_free_mem_range_reverse(), use it to reimplement
memblock_find_in_range_node() which in turn is used by all allocators.

The visible allocator interface is inconsistent and can probably use
some cleanup too.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/memblock.h |  24 ++++-
 mm/memblock.c            | 273 ++++++++++++++++++++++-------------------------
 2 files changed, 149 insertions(+), 148 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index cd7606b71e5a..a6bb10235148 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -46,6 +46,8 @@ extern int memblock_debug;
 #define memblock_dbg(fmt, ...) \
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
+phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
+				phys_addr_t size, phys_addr_t align, int nid);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 				   phys_addr_t size, phys_addr_t align);
 int memblock_free_reserved_regions(void);
@@ -98,6 +100,26 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
 	     i != (u64)ULLONG_MAX;					\
 	     __next_free_mem_range(&i, nid, p_start, p_end, p_nid))
 
+void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
+			       phys_addr_t *out_end, int *out_nid);
+
+/**
+ * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
+ * @i: u64 used as loop variable
+ * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ *
+ * Walks over free (memory && !reserved) areas of memblock in reverse
+ * order.  Available as soon as memblock is initialized.
+ */
+#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid)	\
+	for (i = (u64)ULLONG_MAX,					\
+	     __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid);	\
+	     i != (u64)ULLONG_MAX;					\
+	     __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
+
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
 
@@ -121,8 +143,6 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
-					phys_addr_t size, phys_addr_t align, int nid);
 phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
 phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 1adbef09b43a..2f55f19b7c86 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -79,78 +79,66 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
 	return (i < type->cnt) ? i : -1;
 }
 
-/*
- * Find, allocate, deallocate or reserve unreserved regions. All allocations
- * are top-down.
+/**
+ * memblock_find_in_range_node - find free area in given range and node
+ * @start: start of candidate range
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @size: size of free area to find
+ * @align: alignment of free area to find
+ * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ *
+ * Find @size free area aligned to @align in the specified range and node.
+ *
+ * RETURNS:
+ * Found address on success, %0 on failure.
  */
-
-static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end,
-					  phys_addr_t size, phys_addr_t align)
+phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
+					phys_addr_t end, phys_addr_t size,
+					phys_addr_t align, int nid)
 {
-	phys_addr_t base, res_base;
-	long j;
+	phys_addr_t this_start, this_end, cand;
+	u64 i;
 
-	/* In case, huge size is requested */
-	if (end < size)
-		return 0;
+	/* align @size to avoid excessive fragmentation on reserved array */
+	size = round_up(size, align);
+
+	/* pump up @end */
+	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
+		end = memblock.current_limit;
 
-	base = round_down(end - size, align);
+	/* adjust @start to avoid underflow and allocating the first page */
+	start = max3(start, size, (phys_addr_t)PAGE_SIZE);
+	end = max(start, end);
 
-	/* Prevent allocations returning 0 as it's also used to
-	 * indicate an allocation failure
-	 */
-	if (start == 0)
-		start = PAGE_SIZE;
-
-	while (start <= base) {
-		j = memblock_overlaps_region(&memblock.reserved, base, size);
-		if (j < 0)
-			return base;
-		res_base = memblock.reserved.regions[j].base;
-		if (res_base < size)
-			break;
-		base = round_down(res_base - size, align);
-	}
+	for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
+		this_start = clamp(this_start, start, end);
+		this_end = clamp(this_end, start, end);
 
+		cand = round_down(this_end - size, align);
+		if (cand >= this_start)
+			return cand;
+	}
 	return 0;
 }
 
-/*
- * Find a free area with specified alignment in a specific range.
+/**
+ * memblock_find_in_range - find free area in given range
+ * @start: start of candidate range
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @size: size of free area to find
+ * @align: alignment of free area to find
+ *
+ * Find @size free area aligned to @align in the specified range.
+ *
+ * RETURNS:
+ * Found address on success, %0 on failure.
  */
-phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, phys_addr_t end,
-					phys_addr_t size, phys_addr_t align)
+phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
+					phys_addr_t end, phys_addr_t size,
+					phys_addr_t align)
 {
-	long i;
-
-	BUG_ON(0 == size);
-
-	/* Pump up max_addr */
-	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
-		end = memblock.current_limit;
-
-	/* We do a top-down search, this tends to limit memory
-	 * fragmentation by keeping early boot allocs near the
-	 * top of memory
-	 */
-	for (i = memblock.memory.cnt - 1; i >= 0; i--) {
-		phys_addr_t memblockbase = memblock.memory.regions[i].base;
-		phys_addr_t memblocksize = memblock.memory.regions[i].size;
-		phys_addr_t bottom, top, found;
-
-		if (memblocksize < size)
-			continue;
-		if ((memblockbase + memblocksize) <= start)
-			break;
-		bottom = max(memblockbase, start);
-		top = min(memblockbase + memblocksize, end);
-		if (bottom >= top)
-			continue;
-		found = memblock_find_region(bottom, top, size, align);
-		if (found)
-			return found;
-	}
-	return 0;
+	return memblock_find_in_range_node(start, end, size, align,
+					   MAX_NUMNODES);
 }
 
 /*
@@ -607,6 +595,70 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid,
 	*idx = ULLONG_MAX;
 }
 
+/**
+ * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
+ * @idx: pointer to u64 loop variable
+ * @nid: nid: node selector, %MAX_NUMNODES for all nodes
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ *
+ * Reverse of __next_free_mem_range().
+ */
+void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
+					   phys_addr_t *out_start,
+					   phys_addr_t *out_end, int *out_nid)
+{
+	struct memblock_type *mem = &memblock.memory;
+	struct memblock_type *rsv = &memblock.reserved;
+	int mi = *idx & 0xffffffff;
+	int ri = *idx >> 32;
+
+	if (*idx == (u64)ULLONG_MAX) {
+		mi = mem->cnt - 1;
+		ri = rsv->cnt;
+	}
+
+	for ( ; mi >= 0; mi--) {
+		struct memblock_region *m = &mem->regions[mi];
+		phys_addr_t m_start = m->base;
+		phys_addr_t m_end = m->base + m->size;
+
+		/* only memory regions are associated with nodes, check it */
+		if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
+			continue;
+
+		/* scan areas before each reservation for intersection */
+		for ( ; ri >= 0; ri--) {
+			struct memblock_region *r = &rsv->regions[ri];
+			phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
+			phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
+
+			/* if ri advanced past mi, break out to advance mi */
+			if (r_end <= m_start)
+				break;
+			/* if the two regions intersect, we're done */
+			if (m_end > r_start) {
+				if (out_start)
+					*out_start = max(m_start, r_start);
+				if (out_end)
+					*out_end = min(m_end, r_end);
+				if (out_nid)
+					*out_nid = memblock_get_region_node(m);
+
+				if (m_start >= r_start)
+					mi--;
+				else
+					ri--;
+				*idx = (u32)mi | (u64)ri << 32;
+				return;
+			}
+		}
+	}
+
+	*idx = ULLONG_MAX;
+}
+
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 /*
  * Common iterator interface used to define for_each_mem_range().
@@ -670,22 +722,29 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
+static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
+					phys_addr_t align, phys_addr_t max_addr,
+					int nid)
 {
 	phys_addr_t found;
 
-	/* We align the size to limit fragmentation. Without this, a lot of
-	 * small allocs quickly eat up the whole reserve array on sparc
-	 */
-	size = round_up(size, align);
-
-	found = memblock_find_in_range(0, max_addr, size, align);
+	found = memblock_find_in_range_node(0, max_addr, size, align, nid);
 	if (found && !memblock_reserve(found, size))
 		return found;
 
 	return 0;
 }
 
+phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
+{
+	return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+}
+
+phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
+{
+	return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES);
+}
+
 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
 {
 	phys_addr_t alloc;
@@ -704,84 +763,6 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
 	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
-
-/*
- * Additional node-local top-down allocators.
- *
- * WARNING: Only available after early_node_map[] has been populated,
- * on some architectures, that is after all the calls to add_active_range()
- * have been done to populate it.
- */
-
-static phys_addr_t __init memblock_nid_range_rev(phys_addr_t start,
-						 phys_addr_t end, int *nid)
-{
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-	unsigned long start_pfn, end_pfn;
-	int i;
-
-	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, nid)
-		if (end > PFN_PHYS(start_pfn) && end <= PFN_PHYS(end_pfn))
-			return max(start, PFN_PHYS(start_pfn));
-#endif
-	*nid = 0;
-	return start;
-}
-
-phys_addr_t __init memblock_find_in_range_node(phys_addr_t start,
-					       phys_addr_t end,
-					       phys_addr_t size,
-					       phys_addr_t align, int nid)
-{
-	struct memblock_type *mem = &memblock.memory;
-	int i;
-
-	BUG_ON(0 == size);
-
-	/* Pump up max_addr */
-	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
-		end = memblock.current_limit;
-
-	for (i = mem->cnt - 1; i >= 0; i--) {
-		struct memblock_region *r = &mem->regions[i];
-		phys_addr_t base = max(start, r->base);
-		phys_addr_t top = min(end, r->base + r->size);
-
-		while (base < top) {
-			phys_addr_t tbase, ret;
-			int tnid;
-
-			tbase = memblock_nid_range_rev(base, top, &tnid);
-			if (nid == MAX_NUMNODES || tnid == nid) {
-				ret = memblock_find_region(tbase, top, size, align);
-				if (ret)
-					return ret;
-			}
-			top = tbase;
-		}
-	}
-
-	return 0;
-}
-
-phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
-{
-	phys_addr_t found;
-
-	/*
-	 * We align the size to limit fragmentation. Without this, a lot of
-	 * small allocs quickly eat up the whole reserve array on sparc
-	 */
-	size = round_up(size, align);
-
-	found = memblock_find_in_range_node(0, MEMBLOCK_ALLOC_ACCESSIBLE,
-					    size, align, nid);
-	if (found && !memblock_reserve(found, size))
-		return found;
-
-	return 0;
-}
-
 phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
 	phys_addr_t res = memblock_alloc_nid(size, align, nid);
-- 
cgit v1.2.3


From 467de1fc67d1bd2954eaac7019c564f28fa2b6a5 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Tue, 6 Dec 2011 23:17:51 +0100
Subject: PM / Freezer: Remove the "userspace only" constraint from
 freezer[_do_not]_count()

At present, the functions freezer_count() and freezer_do_not_count()
impose the restriction that they are effective only for userspace processes.
However, now, these functions have found more utility than originally
intended by the commit which introduced it: ba96a0c8 (freezer:
fix vfork problem). And moreover, even the vfork issue actually does not
need the above restriction in these functions.

So, modify these functions to make them work even for kernel threads, so
that they can be used at other places in the kernel, where the userspace
restriction doesn't apply.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/freezer.h | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 30f06c220467..7bcfe73d999b 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -67,33 +67,27 @@ static inline bool cgroup_freezing(struct task_struct *task)
  * appropriately in case the child has exited before the freezing of tasks is
  * complete.  However, we don't want kernel threads to be frozen in unexpected
  * places, so we allow them to block freeze_processes() instead or to set
- * PF_NOFREEZE if needed and PF_FREEZER_SKIP is only set for userland vfork
- * parents.  Fortunately, in the ____call_usermodehelper() case the parent won't
- * really block freeze_processes(), since ____call_usermodehelper() (the child)
- * does a little before exec/exit and it can't be frozen before waking up the
- * parent.
+ * PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the
+ * parent won't really block freeze_processes(), since ____call_usermodehelper()
+ * (the child) does a little before exec/exit and it can't be frozen before
+ * waking up the parent.
  */
 
-/*
- * If the current task is a user space one, tell the freezer not to count it as
- * freezable.
- */
+
+/* Tell the freezer not to count the current task as freezable. */
 static inline void freezer_do_not_count(void)
 {
-	if (current->mm)
-		current->flags |= PF_FREEZER_SKIP;
+	current->flags |= PF_FREEZER_SKIP;
 }
 
 /*
- * If the current task is a user space one, tell the freezer to count it as
- * freezable again and try to freeze it.
+ * Tell the freezer to count the current task as freezable again and try to
+ * freeze it.
  */
 static inline void freezer_count(void)
 {
-	if (current->mm) {
-		current->flags &= ~PF_FREEZER_SKIP;
-		try_to_freeze();
-	}
+	current->flags &= ~PF_FREEZER_SKIP;
+	try_to_freeze();
 }
 
 /*
-- 
cgit v1.2.3


From 33e638b9070ba5e8812836e20390da6a6af13900 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Tue, 6 Dec 2011 23:18:12 +0100
Subject: PM / Sleep: Use the freezer_count() functions in
 [un]lock_system_sleep() APIs

Now that freezer_count() and freezer_do_not_count() don't have the restriction
that they are effective only when called by userspace processes, use
them in lock_system_sleep() and unlock_system_sleep() instead of open-coding
their parts.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/suspend.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 1f7fff47cfac..906d62cfc15c 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/mm.h>
+#include <linux/freezer.h>
 #include <asm/errno.h>
 
 #ifdef CONFIG_VT
@@ -380,16 +381,14 @@ static inline void unlock_system_sleep(void) {}
 
 static inline void lock_system_sleep(void)
 {
-	/* simplified freezer_do_not_count() */
-	current->flags |= PF_FREEZER_SKIP;
+	freezer_do_not_count();
 	mutex_lock(&pm_mutex);
 }
 
 static inline void unlock_system_sleep(void)
 {
 	mutex_unlock(&pm_mutex);
-	/* simplified freezer_count() */
-	current->flags &= ~PF_FREEZER_SKIP;
+	freezer_count();
 }
 #endif
 
-- 
cgit v1.2.3


From 9b6fc5dc879bc90f765db0e95eefcf123d0d06dd Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Tue, 6 Dec 2011 23:24:38 +0100
Subject: PM / Sleep: Make [un]lock_system_sleep() generic

The [un]lock_system_sleep() APIs were originally introduced to mutually
exclude memory hotplug and hibernation.

Directly using mutex_lock(&pm_mutex) to achieve mutual exclusion with
suspend or hibernation code can lead to freezing failures. However, the
APIs [un]lock_system_sleep() can be safely used to achieve the same,
without causing freezing failures.

So, since it would be beneficial to modify all the existing users of
mutex_lock(&pm_mutex) (in all parts of the kernel), so that they use these
safe APIs intead, make these APIs generic by removing the restriction that
they work only when CONFIG_HIBERNATE_CALLBACKS is set. Moreover, that
restriction didn't buy us anything anyway.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/suspend.h | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 906d62cfc15c..95040cc33107 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -332,6 +332,8 @@ static inline bool system_entering_hibernation(void) { return false; }
 #define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
 #define PM_POST_RESTORE		0x0006 /* Restore failed */
 
+extern struct mutex pm_mutex;
+
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
 void restore_processor_state(void);
@@ -352,6 +354,19 @@ extern bool events_check_enabled;
 extern bool pm_wakeup_pending(void);
 extern bool pm_get_wakeup_count(unsigned int *count);
 extern bool pm_save_wakeup_count(unsigned int count);
+
+static inline void lock_system_sleep(void)
+{
+	freezer_do_not_count();
+	mutex_lock(&pm_mutex);
+}
+
+static inline void unlock_system_sleep(void)
+{
+	mutex_unlock(&pm_mutex);
+	freezer_count();
+}
+
 #else /* !CONFIG_PM_SLEEP */
 
 static inline int register_pm_notifier(struct notifier_block *nb)
@@ -367,30 +382,11 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
-#endif /* !CONFIG_PM_SLEEP */
-
-extern struct mutex pm_mutex;
 
-#ifndef CONFIG_HIBERNATE_CALLBACKS
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
 
-#else
-
-/* Let some subsystems like memory hotadd exclude hibernation */
-
-static inline void lock_system_sleep(void)
-{
-	freezer_do_not_count();
-	mutex_lock(&pm_mutex);
-}
-
-static inline void unlock_system_sleep(void)
-{
-	mutex_unlock(&pm_mutex);
-	freezer_count();
-}
-#endif
+#endif /* !CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS
 /*
-- 
cgit v1.2.3


From 7da82c06ded105bf601bfa0eafc92e84eb0ceeed Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 04:11:15 +0000
Subject: vlan: rename vlan_dev_info to vlan_dev_priv

As this structure is priv, name it approprietely. Also for pointer to it
use name "vlan".

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h  |  2 +-
 net/8021q/vlan.c         | 24 ++++++++--------
 net/8021q/vlan.h         |  8 +++---
 net/8021q/vlan_core.c    |  8 +++---
 net/8021q/vlan_dev.c     | 72 ++++++++++++++++++++++++------------------------
 net/8021q/vlan_gvrp.c    |  4 +--
 net/8021q/vlan_netlink.c | 10 +++----
 net/8021q/vlanproc.c     | 42 ++++++++++++++--------------
 8 files changed, 85 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 070ac50c1d2d..31d7c976f063 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -386,7 +386,7 @@ struct vlan_ioctl_args {
 		unsigned int skb_priority;
 		unsigned int name_type;
 		unsigned int bind_type;
-		unsigned int flag; /* Matches vlan_dev_info flags */
+		unsigned int flag; /* Matches vlan_dev_priv flags */
         } u;
 
 	short vlan_qos;   
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5471628d3ffe..e075625efeeb 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -99,7 +99,7 @@ static void vlan_rcu_free(struct rcu_head *rcu)
 
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	struct vlan_group *grp;
@@ -167,7 +167,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 
 int register_vlan_dev(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	u16 vlan_id = vlan->vlan_id;
@@ -192,7 +192,7 @@ int register_vlan_dev(struct net_device *dev)
 	if (err < 0)
 		goto out_uninit_applicant;
 
-	/* Account for reference in struct vlan_dev_info */
+	/* Account for reference in struct vlan_dev_priv */
 	dev_hold(real_dev);
 
 	netif_stacked_transfer_operstate(real_dev, dev);
@@ -267,7 +267,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
 	}
 
-	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup);
+	new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, vlan_setup);
 
 	if (new_dev == NULL)
 		return -ENOBUFS;
@@ -278,10 +278,10 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	 */
 	new_dev->mtu = real_dev->mtu;
 
-	vlan_dev_info(new_dev)->vlan_id = vlan_id;
-	vlan_dev_info(new_dev)->real_dev = real_dev;
-	vlan_dev_info(new_dev)->dent = NULL;
-	vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
+	vlan_dev_priv(new_dev)->vlan_id = vlan_id;
+	vlan_dev_priv(new_dev)->real_dev = real_dev;
+	vlan_dev_priv(new_dev)->dent = NULL;
+	vlan_dev_priv(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
 
 	new_dev->rtnl_link_ops = &vlan_link_ops;
 	err = register_vlan_dev(new_dev);
@@ -298,7 +298,7 @@ out_free_newdev:
 static void vlan_sync_address(struct net_device *dev,
 			      struct net_device *vlandev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(vlandev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 
 	/* May be called without an actual change */
 	if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr))
@@ -362,7 +362,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	struct vlan_group *grp;
 	int i, flgs;
 	struct net_device *vlandev;
-	struct vlan_dev_info *vlan;
+	struct vlan_dev_priv *vlan;
 	LIST_HEAD(list);
 
 	if (is_vlan_dev(dev))
@@ -447,7 +447,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			if (!(flgs & IFF_UP))
 				continue;
 
-			vlan = vlan_dev_info(vlandev);
+			vlan = vlan_dev_priv(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 				dev_change_flags(vlandev, flgs & ~IFF_UP);
 			netif_stacked_transfer_operstate(dev, vlandev);
@@ -465,7 +465,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			if (flgs & IFF_UP)
 				continue;
 
-			vlan = vlan_dev_info(vlandev);
+			vlan = vlan_dev_priv(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 				dev_change_flags(vlandev, flgs | IFF_UP);
 			netif_stacked_transfer_operstate(dev, vlandev);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 9fd45f3571f9..d3c4ea4a3836 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -41,7 +41,7 @@ struct vlan_pcpu_stats {
 };
 
 /**
- *	struct vlan_dev_info - VLAN private device data
+ *	struct vlan_dev_priv - VLAN private device data
  *	@nr_ingress_mappings: number of ingress priority mappings
  *	@ingress_priority_map: ingress priority mappings
  *	@nr_egress_mappings: number of egress priority mappings
@@ -53,7 +53,7 @@ struct vlan_pcpu_stats {
  *	@dent: proc dir entry
  *	@vlan_pcpu_stats: ptr to percpu rx stats
  */
-struct vlan_dev_info {
+struct vlan_dev_priv {
 	unsigned int				nr_ingress_mappings;
 	u32					ingress_priority_map[8];
 	unsigned int				nr_egress_mappings;
@@ -69,7 +69,7 @@ struct vlan_dev_info {
 	struct vlan_pcpu_stats __percpu		*vlan_pcpu_stats;
 };
 
-static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
+static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
 {
 	return netdev_priv(dev);
 }
@@ -121,7 +121,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
 static inline u32 vlan_get_ingress_priority(struct net_device *dev,
 					    u16 vlan_tci)
 {
-	struct vlan_dev_info *vip = vlan_dev_info(dev);
+	struct vlan_dev_priv *vip = vlan_dev_priv(dev);
 
 	return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7];
 }
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 9c95e8e054f9..85241f044294 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -36,7 +36,7 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
 			skb->pkt_type = PACKET_HOST;
 	}
 
-	if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 
 		/*
@@ -55,7 +55,7 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
-	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
+	rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
@@ -90,13 +90,13 @@ EXPORT_SYMBOL(__vlan_find_dev_deep);
 
 struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
-	return vlan_dev_info(dev)->real_dev;
+	return vlan_dev_priv(dev)->real_dev;
 }
 EXPORT_SYMBOL(vlan_dev_real_dev);
 
 u16 vlan_dev_vlan_id(const struct net_device *dev)
 {
-	return vlan_dev_info(dev)->vlan_id;
+	return vlan_dev_priv(dev)->vlan_id;
 }
 EXPORT_SYMBOL(vlan_dev_vlan_id);
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2b5fcde1f629..3b4db82b016f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -72,7 +72,7 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
 {
 	struct vlan_priority_tci_mapping *mp;
 
-	mp = vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)];
+	mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)];
 	while (mp) {
 		if (mp->priority == skb->priority) {
 			return mp->vlan_qos; /* This should already be shifted
@@ -103,10 +103,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	u16 vlan_tci = 0;
 	int rc;
 
-	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
 
-		vlan_tci = vlan_dev_info(dev)->vlan_id;
+		vlan_tci = vlan_dev_priv(dev)->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
@@ -129,7 +129,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		saddr = dev->dev_addr;
 
 	/* Now make the underlying real hard header */
-	dev = vlan_dev_info(dev)->real_dev;
+	dev = vlan_dev_priv(dev)->real_dev;
 	rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
 	if (rc > 0)
 		rc += vhdrlen;
@@ -149,27 +149,27 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
 	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
-	    vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+	    vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
-		vlan_tci = vlan_dev_info(dev)->vlan_id;
+		vlan_tci = vlan_dev_priv(dev)->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
 	}
 
-	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
+	skb_set_dev(skb, vlan_dev_priv(dev)->real_dev);
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		struct vlan_pcpu_stats *stats;
 
-		stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats);
+		stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->tx_packets++;
 		stats->tx_bytes += len;
 		u64_stats_update_end(&stats->syncp);
 	} else {
-		this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
+		this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped);
 	}
 
 	return ret;
@@ -180,7 +180,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 	/* TODO: gotta make sure the underlying layer can handle it,
 	 * maybe an IFF_VLAN_CAPABLE flag for devices?
 	 */
-	if (vlan_dev_info(dev)->real_dev->mtu < new_mtu)
+	if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu)
 		return -ERANGE;
 
 	dev->mtu = new_mtu;
@@ -191,7 +191,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, u16 vlan_prio)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
 	if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
 		vlan->nr_ingress_mappings--;
@@ -204,7 +204,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
 int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, u16 vlan_prio)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_priority_tci_mapping *mp = NULL;
 	struct vlan_priority_tci_mapping *np;
 	u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
@@ -241,7 +241,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 /* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
 int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	u32 old_flags = vlan->flags;
 
 	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
@@ -261,12 +261,12 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
 {
-	strncpy(result, vlan_dev_info(dev)->real_dev->name, 23);
+	strncpy(result, vlan_dev_priv(dev)->real_dev->name, 23);
 }
 
 static int vlan_dev_open(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	int err;
 
@@ -313,7 +313,7 @@ out:
 
 static int vlan_dev_stop(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 
 	dev_mc_unsync(real_dev, dev);
@@ -332,7 +332,7 @@ static int vlan_dev_stop(struct net_device *dev)
 
 static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	struct sockaddr *addr = p;
 	int err;
 
@@ -358,7 +358,7 @@ out:
 
 static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	struct ifreq ifrr;
 	int err = -EOPNOTSUPP;
@@ -383,7 +383,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int err = 0;
 
@@ -397,7 +397,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
 static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
 				   struct scatterlist *sgl, unsigned int sgc)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = 0;
 
@@ -409,7 +409,7 @@ static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
 
 static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int len = 0;
 
@@ -421,7 +421,7 @@ static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid)
 
 static int vlan_dev_fcoe_enable(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = -EINVAL;
 
@@ -432,7 +432,7 @@ static int vlan_dev_fcoe_enable(struct net_device *dev)
 
 static int vlan_dev_fcoe_disable(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = -EINVAL;
 
@@ -443,7 +443,7 @@ static int vlan_dev_fcoe_disable(struct net_device *dev)
 
 static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = -EINVAL;
 
@@ -455,7 +455,7 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
 static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
 				    struct scatterlist *sgl, unsigned int sgc)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = 0;
 
@@ -468,7 +468,7 @@ static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
 
 static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 
 	if (dev->flags & IFF_UP) {
 		if (change & IFF_ALLMULTI)
@@ -480,8 +480,8 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 
 static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 {
-	dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
-	dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
+	dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+	dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
 }
 
 /*
@@ -519,7 +519,7 @@ static const struct net_device_ops vlan_netdev_ops;
 
 static int vlan_dev_init(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	int subclass = 0;
 
 	netif_carrier_off(dev);
@@ -568,8 +568,8 @@ static int vlan_dev_init(struct net_device *dev)
 
 	vlan_dev_set_lockdep_class(dev, subclass);
 
-	vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
-	if (!vlan_dev_info(dev)->vlan_pcpu_stats)
+	vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
+	if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
 		return -ENOMEM;
 
 	return 0;
@@ -578,7 +578,7 @@ static int vlan_dev_init(struct net_device *dev)
 static void vlan_dev_uninit(struct net_device *dev)
 {
 	struct vlan_priority_tci_mapping *pm;
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	int i;
 
 	free_percpu(vlan->vlan_pcpu_stats);
@@ -594,7 +594,7 @@ static void vlan_dev_uninit(struct net_device *dev)
 static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
 	netdev_features_t features)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	u32 old_features = features;
 
 	features &= real_dev->vlan_features;
@@ -610,7 +610,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
 static int vlan_ethtool_get_settings(struct net_device *dev,
 				     struct ethtool_cmd *cmd)
 {
-	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
 	return __ethtool_get_settings(vlan->real_dev, cmd);
 }
@@ -626,7 +626,7 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 
-	if (vlan_dev_info(dev)->vlan_pcpu_stats) {
+	if (vlan_dev_priv(dev)->vlan_pcpu_stats) {
 		struct vlan_pcpu_stats *p;
 		u32 rx_errors = 0, tx_dropped = 0;
 		int i;
@@ -635,7 +635,7 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
 			u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
 			unsigned int start;
 
-			p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i);
+			p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
 			do {
 				start = u64_stats_fetch_begin_bh(&p->syncp);
 				rxpackets	= p->rx_packets;
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 061ceceeef12..6f9755352760 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -29,7 +29,7 @@ static struct garp_application vlan_gvrp_app __read_mostly = {
 
 int vlan_gvrp_request_join(const struct net_device *dev)
 {
-	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
 	return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
@@ -38,7 +38,7 @@ int vlan_gvrp_request_join(const struct net_device *dev)
 
 void vlan_gvrp_request_leave(const struct net_device *dev)
 {
-	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
 	garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 235c2197dbb6..50711368ad6a 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -105,7 +105,7 @@ static int vlan_changelink(struct net_device *dev,
 static int vlan_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev;
 	int err;
 
@@ -149,7 +149,7 @@ static inline size_t vlan_qos_map_size(unsigned int n)
 
 static size_t vlan_get_size(const struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
 	return nla_total_size(2) +	/* IFLA_VLAN_ID */
 	       sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
@@ -159,14 +159,14 @@ static size_t vlan_get_size(const struct net_device *dev)
 
 static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_priority_tci_mapping *pm;
 	struct ifla_vlan_flags f;
 	struct ifla_vlan_qos_mapping m;
 	struct nlattr *nest;
 	unsigned int i;
 
-	NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id);
+	NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id);
 	if (vlan->flags) {
 		f.flags = vlan->flags;
 		f.mask  = ~0;
@@ -218,7 +218,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
 	.kind		= "vlan",
 	.maxtype	= IFLA_VLAN_MAX,
 	.policy		= vlan_policy,
-	.priv_size	= sizeof(struct vlan_dev_info),
+	.priv_size	= sizeof(struct vlan_dev_priv),
 	.setup		= vlan_setup,
 	.validate	= vlan_validate,
 	.newlink	= vlan_newlink,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d34b6daf8930..c718fd3664b6 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -168,13 +168,13 @@ err:
 
 int vlan_proc_add_dev(struct net_device *vlandev)
 {
-	struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 	struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
 
-	dev_info->dent =
+	vlan->dent =
 		proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
 				 vn->proc_vlan_dir, &vlandev_fops, vlandev);
-	if (!dev_info->dent)
+	if (!vlan->dent)
 		return -ENOBUFS;
 	return 0;
 }
@@ -187,10 +187,10 @@ int vlan_proc_rem_dev(struct net_device *vlandev)
 	struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
 
 	/** NOTE:  This will consume the memory pointed to by dent, it seems. */
-	if (vlan_dev_info(vlandev)->dent) {
-		remove_proc_entry(vlan_dev_info(vlandev)->dent->name,
+	if (vlan_dev_priv(vlandev)->dent) {
+		remove_proc_entry(vlan_dev_priv(vlandev)->dent->name,
 				  vn->proc_vlan_dir);
-		vlan_dev_info(vlandev)->dent = NULL;
+		vlan_dev_priv(vlandev)->dent = NULL;
 	}
 	return 0;
 }
@@ -268,10 +268,10 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 			   nmtype ? nmtype :  "UNKNOWN");
 	} else {
 		const struct net_device *vlandev = v;
-		const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+		const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 
 		seq_printf(seq, "%-15s| %d  | %s\n",  vlandev->name,
-			   dev_info->vlan_id,    dev_info->real_dev->name);
+			   vlan->vlan_id,    vlan->real_dev->name);
 	}
 	return 0;
 }
@@ -279,7 +279,7 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
-	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 	static const char fmt64[] = "%30s %12llu\n";
@@ -291,8 +291,8 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	stats = dev_get_stats(vlandev, &temp);
 	seq_printf(seq,
 		   "%s  VID: %d	 REORDER_HDR: %i  dev->priv_flags: %hx\n",
-		   vlandev->name, dev_info->vlan_id,
-		   (int)(dev_info->flags & 1), vlandev->priv_flags);
+		   vlandev->name, vlan->vlan_id,
+		   (int)(vlan->flags & 1), vlandev->priv_flags);
 
 	seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
 	seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
@@ -300,23 +300,23 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	seq_puts(seq, "\n");
 	seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
 	seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
-	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
+	seq_printf(seq, "Device: %s", vlan->real_dev->name);
 	/* now show all PRIORITY mappings relating to this VLAN */
 	seq_printf(seq, "\nINGRESS priority mappings: "
 			"0:%u  1:%u  2:%u  3:%u  4:%u  5:%u  6:%u 7:%u\n",
-		   dev_info->ingress_priority_map[0],
-		   dev_info->ingress_priority_map[1],
-		   dev_info->ingress_priority_map[2],
-		   dev_info->ingress_priority_map[3],
-		   dev_info->ingress_priority_map[4],
-		   dev_info->ingress_priority_map[5],
-		   dev_info->ingress_priority_map[6],
-		   dev_info->ingress_priority_map[7]);
+		   vlan->ingress_priority_map[0],
+		   vlan->ingress_priority_map[1],
+		   vlan->ingress_priority_map[2],
+		   vlan->ingress_priority_map[3],
+		   vlan->ingress_priority_map[4],
+		   vlan->ingress_priority_map[5],
+		   vlan->ingress_priority_map[6],
+		   vlan->ingress_priority_map[7]);
 
 	seq_printf(seq, " EGRESS priority mappings: ");
 	for (i = 0; i < 16; i++) {
 		const struct vlan_priority_tci_mapping *mp
-			= dev_info->egress_priority_map[i];
+			= vlan->egress_priority_map[i];
 		while (mp) {
 			seq_printf(seq, "%u:%hu ",
 				   mp->priority, ((mp->vlan_qos >> 13) & 0x7));
-- 
cgit v1.2.3


From 8e586137e6b63af1e881b328466ab5ffbe562510 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 19:52:37 -0500
Subject: net: make vlan ndo_vlan_rx_[add/kill]_vid return error value

Let caller know the result of adding/removing vlan id to/from vlan
filter.

In some drivers I make those functions to just return 0. But in those
where there is able to see if hw setup went correctly, return value is
set appropriately.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c                   | 10 ++++--
 drivers/net/ethernet/adaptec/starfire.c           |  8 +++--
 drivers/net/ethernet/brocade/bna/bnad.c           | 12 ++++---
 drivers/net/ethernet/cisco/enic/enic_dev.c        | 14 ++++++---
 drivers/net/ethernet/cisco/enic/enic_dev.h        |  4 +--
 drivers/net/ethernet/emulex/benet/be_main.c       | 12 ++++---
 drivers/net/ethernet/ibm/ehea/ehea_main.c         | 21 ++++++++++---
 drivers/net/ethernet/intel/e1000/e1000_main.c     | 14 ++++++---
 drivers/net/ethernet/intel/e1000e/netdev.c        | 12 ++++---
 drivers/net/ethernet/intel/igb/igb_main.c         | 12 ++++---
 drivers/net/ethernet/intel/igbvf/netdev.c         | 20 +++++++-----
 drivers/net/ethernet/intel/ixgb/ixgb_main.c       | 12 ++++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |  8 +++--
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  8 +++--
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c    |  7 +++--
 drivers/net/ethernet/neterion/vxge/vxge-main.c    |  6 ++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c  | 10 +++---
 drivers/net/ethernet/qlogic/qlge/qlge_main.c      | 38 ++++++++++++++---------
 drivers/net/ethernet/tehuti/tehuti.c              |  6 ++--
 drivers/net/ethernet/via/via-rhine.c              | 10 +++---
 drivers/net/ethernet/via/via-velocity.c           |  6 ++--
 drivers/net/macvlan.c                             | 10 +++---
 drivers/net/team/team.c                           |  8 +++--
 drivers/net/virtio_net.c                          |  6 ++--
 drivers/net/vmxnet3/vmxnet3_drv.c                 |  8 +++--
 drivers/s390/net/qeth_l2_main.c                   | 18 ++++++-----
 drivers/s390/net/qeth_l3_main.c                   |  9 +++---
 include/linux/netdevice.h                         |  8 ++---
 28 files changed, 210 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3216c514fdc8..d72c37f03e50 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -428,7 +428,7 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
  * @bond_dev: bonding net device that got called
  * @vid: vlan id being added
  */
-static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
+static int bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave;
@@ -448,7 +448,10 @@ static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
 	if (res) {
 		pr_err("%s: Error: Failed to add vlan id %d\n",
 		       bond_dev->name, vid);
+		return res;
 	}
+
+	return 0;
 }
 
 /**
@@ -456,7 +459,7 @@ static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
  * @bond_dev: bonding net device that got called
  * @vid: vlan id being removed
  */
-static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
+static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave;
@@ -476,7 +479,10 @@ static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 	if (res) {
 		pr_err("%s: Error: Failed to remove vlan id %d\n",
 		       bond_dev->name, vid);
+		return res;
 	}
+
+	return 0;
 }
 
 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index a446e251908b..cb4f38a17f20 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -607,7 +607,7 @@ static const struct ethtool_ops ethtool_ops;
 
 
 #ifdef VLAN_SUPPORT
-static void netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct netdev_private *np = netdev_priv(dev);
 
@@ -617,9 +617,11 @@ static void netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 	set_bit(vid, np->active_vlans);
 	set_rx_mode(dev);
 	spin_unlock(&np->lock);
+
+	return 0;
 }
 
-static void netdev_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int netdev_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct netdev_private *np = netdev_priv(dev);
 
@@ -629,6 +631,8 @@ static void netdev_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	clear_bit(vid, np->active_vlans);
 	set_rx_mode(dev);
 	spin_unlock(&np->lock);
+
+	return 0;
 }
 #endif /* VLAN_SUPPORT */
 
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 7f3091e7eb42..aac3a3b710a0 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -2968,7 +2968,7 @@ bnad_change_mtu(struct net_device *netdev, int new_mtu)
 	return err;
 }
 
-static void
+static int
 bnad_vlan_rx_add_vid(struct net_device *netdev,
 				 unsigned short vid)
 {
@@ -2976,7 +2976,7 @@ bnad_vlan_rx_add_vid(struct net_device *netdev,
 	unsigned long flags;
 
 	if (!bnad->rx_info[0].rx)
-		return;
+		return 0;
 
 	mutex_lock(&bnad->conf_mutex);
 
@@ -2986,9 +2986,11 @@ bnad_vlan_rx_add_vid(struct net_device *netdev,
 	spin_unlock_irqrestore(&bnad->bna_lock, flags);
 
 	mutex_unlock(&bnad->conf_mutex);
+
+	return 0;
 }
 
-static void
+static int
 bnad_vlan_rx_kill_vid(struct net_device *netdev,
 				  unsigned short vid)
 {
@@ -2996,7 +2998,7 @@ bnad_vlan_rx_kill_vid(struct net_device *netdev,
 	unsigned long flags;
 
 	if (!bnad->rx_info[0].rx)
-		return;
+		return 0;
 
 	mutex_lock(&bnad->conf_mutex);
 
@@ -3006,6 +3008,8 @@ bnad_vlan_rx_kill_vid(struct net_device *netdev,
 	spin_unlock_irqrestore(&bnad->bna_lock, flags);
 
 	mutex_unlock(&bnad->conf_mutex);
+
+	return 0;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.c b/drivers/net/ethernet/cisco/enic/enic_dev.c
index fd6247b3c0ee..bf0fc56dba19 100644
--- a/drivers/net/ethernet/cisco/enic/enic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/enic_dev.c
@@ -212,23 +212,29 @@ int enic_dev_deinit_done(struct enic *enic, int *status)
 }
 
 /* rtnl lock is held */
-void enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct enic *enic = netdev_priv(netdev);
+	int err;
 
 	spin_lock(&enic->devcmd_lock);
-	enic_add_vlan(enic, vid);
+	err = enic_add_vlan(enic, vid);
 	spin_unlock(&enic->devcmd_lock);
+
+	return err;
 }
 
 /* rtnl lock is held */
-void enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+int enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct enic *enic = netdev_priv(netdev);
+	int err;
 
 	spin_lock(&enic->devcmd_lock);
-	enic_del_vlan(enic, vid);
+	err = enic_del_vlan(enic, vid);
 	spin_unlock(&enic->devcmd_lock);
+
+	return err;
 }
 
 int enic_dev_enable2(struct enic *enic, int active)
diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.h b/drivers/net/ethernet/cisco/enic/enic_dev.h
index 1f83a4747ba0..da1cba3c410e 100644
--- a/drivers/net/ethernet/cisco/enic/enic_dev.h
+++ b/drivers/net/ethernet/cisco/enic/enic_dev.h
@@ -46,8 +46,8 @@ int enic_dev_packet_filter(struct enic *enic, int directed, int multicast,
 	int broadcast, int promisc, int allmulti);
 int enic_dev_add_addr(struct enic *enic, u8 *addr);
 int enic_dev_del_addr(struct enic *enic, u8 *addr);
-void enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
-void enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
+int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
+int enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
 int enic_dev_notify_unset(struct enic *enic);
 int enic_dev_hang_notify(struct enic *enic);
 int enic_dev_set_ig_vlan_rewrite_mode(struct enic *enic);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3854fb0610ba..b8a526f9efc8 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -780,31 +780,35 @@ static int be_vid_config(struct be_adapter *adapter, bool vf, u32 vf_num)
 	return status;
 }
 
-static void be_vlan_add_vid(struct net_device *netdev, u16 vid)
+static int be_vlan_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
 	adapter->vlans_added++;
 	if (!be_physfn(adapter))
-		return;
+		return 0;
 
 	adapter->vlan_tag[vid] = 1;
 	if (adapter->vlans_added <= (adapter->max_vlans + 1))
 		be_vid_config(adapter, false, 0);
+
+	return 0;
 }
 
-static void be_vlan_rem_vid(struct net_device *netdev, u16 vid)
+static int be_vlan_rem_vid(struct net_device *netdev, u16 vid)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
 	adapter->vlans_added--;
 
 	if (!be_physfn(adapter))
-		return;
+		return 0;
 
 	adapter->vlan_tag[vid] = 0;
 	if (adapter->vlans_added <= adapter->max_vlans)
 		be_vid_config(adapter, false, 0);
+
+	return 0;
 }
 
 static void be_set_rx_mode(struct net_device *netdev)
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index bfeccbfde236..3554414eb5e2 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2114,17 +2114,19 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static void ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct ehea_port *port = netdev_priv(dev);
 	struct ehea_adapter *adapter = port->adapter;
 	struct hcp_ehea_port_cb1 *cb1;
 	int index;
 	u64 hret;
+	int err = 0;
 
 	cb1 = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!cb1) {
 		pr_err("no mem for cb1\n");
+		err = -ENOMEM;
 		goto out;
 	}
 
@@ -2132,6 +2134,7 @@ static void ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 				      H_PORT_CB1, H_PORT_CB1_ALL, cb1);
 	if (hret != H_SUCCESS) {
 		pr_err("query_ehea_port failed\n");
+		err = -EINVAL;
 		goto out;
 	}
 
@@ -2140,24 +2143,28 @@ static void ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 
 	hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id,
 				       H_PORT_CB1, H_PORT_CB1_ALL, cb1);
-	if (hret != H_SUCCESS)
+	if (hret != H_SUCCESS) {
 		pr_err("modify_ehea_port failed\n");
+		err = -EINVAL;
+	}
 out:
 	free_page((unsigned long)cb1);
-	return;
+	return err;
 }
 
-static void ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct ehea_port *port = netdev_priv(dev);
 	struct ehea_adapter *adapter = port->adapter;
 	struct hcp_ehea_port_cb1 *cb1;
 	int index;
 	u64 hret;
+	int err = 0;
 
 	cb1 = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!cb1) {
 		pr_err("no mem for cb1\n");
+		err = -ENOMEM;
 		goto out;
 	}
 
@@ -2165,6 +2172,7 @@ static void ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 				      H_PORT_CB1, H_PORT_CB1_ALL, cb1);
 	if (hret != H_SUCCESS) {
 		pr_err("query_ehea_port failed\n");
+		err = -EINVAL;
 		goto out;
 	}
 
@@ -2173,10 +2181,13 @@ static void ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 
 	hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id,
 				       H_PORT_CB1, H_PORT_CB1_ALL, cb1);
-	if (hret != H_SUCCESS)
+	if (hret != H_SUCCESS) {
 		pr_err("modify_ehea_port failed\n");
+		err = -EINVAL;
+	}
 out:
 	free_page((unsigned long)cb1);
+	return err;
 }
 
 int ehea_activate_qp(struct ehea_adapter *adapter, struct ehea_qp *qp)
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 82f4ef142259..053f01289eff 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -169,8 +169,8 @@ static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
 static bool e1000_vlan_used(struct e1000_adapter *adapter);
 static void e1000_vlan_mode(struct net_device *netdev,
 			    netdev_features_t features);
-static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
-static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
+static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
+static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
 static void e1000_restore_vlan(struct e1000_adapter *adapter);
 
 #ifdef CONFIG_PM
@@ -4604,7 +4604,7 @@ static void e1000_vlan_mode(struct net_device *netdev,
 		e1000_irq_enable(adapter);
 }
 
-static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -4613,7 +4613,7 @@ static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	if ((hw->mng_cookie.status &
 	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) &&
 	    (vid == adapter->mng_vlan_id))
-		return;
+		return 0;
 
 	if (!e1000_vlan_used(adapter))
 		e1000_vlan_filter_on_off(adapter, true);
@@ -4625,9 +4625,11 @@ static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	e1000_write_vfta(hw, index, vfta);
 
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -4648,6 +4650,8 @@ static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 
 	if (!e1000_vlan_used(adapter))
 		e1000_vlan_filter_on_off(adapter, false);
+
+	return 0;
 }
 
 static void e1000_restore_vlan(struct e1000_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 93ae0c26d434..90953b4d6bfa 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -2522,7 +2522,7 @@ clean_rx:
 	return work_done;
 }
 
-static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -2532,7 +2532,7 @@ static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	if ((adapter->hw.mng_cookie.status &
 	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
 	    (vid == adapter->mng_vlan_id))
-		return;
+		return 0;
 
 	/* add VID to filter table */
 	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
@@ -2543,9 +2543,11 @@ static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	}
 
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -2556,7 +2558,7 @@ static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	    (vid == adapter->mng_vlan_id)) {
 		/* release control to f/w */
 		e1000e_release_hw_control(adapter);
-		return;
+		return 0;
 	}
 
 	/* remove VID from filter table */
@@ -2568,6 +2570,8 @@ static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	}
 
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 143cfebe3182..89d576ce5776 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -148,8 +148,8 @@ static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 static void igb_tx_timeout(struct net_device *);
 static void igb_reset_task(struct work_struct *);
 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
-static void igb_vlan_rx_add_vid(struct net_device *, u16);
-static void igb_vlan_rx_kill_vid(struct net_device *, u16);
+static int igb_vlan_rx_add_vid(struct net_device *, u16);
+static int igb_vlan_rx_kill_vid(struct net_device *, u16);
 static void igb_restore_vlan(struct igb_adapter *);
 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
 static void igb_ping_all_vfs(struct igb_adapter *);
@@ -6491,7 +6491,7 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
 	igb_rlpml_set(adapter);
 }
 
-static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -6504,9 +6504,11 @@ static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	igb_vfta_set(hw, vid, true);
 
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -6521,6 +6523,8 @@ static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 		igb_vfta_set(hw, vid, false);
 
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 static void igb_restore_vlan(struct igb_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index c358973ce414..fd3da3076c2f 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -1176,18 +1176,20 @@ static void igbvf_set_rlpml(struct igbvf_adapter *adapter)
 	e1000_rlpml_set_vf(hw, max_frame_size);
 }
 
-static void igbvf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int igbvf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 
-	if (hw->mac.ops.set_vfta(hw, vid, true))
+	if (hw->mac.ops.set_vfta(hw, vid, true)) {
 		dev_err(&adapter->pdev->dev, "Failed to add vlan id %d\n", vid);
-	else
-		set_bit(vid, adapter->active_vlans);
+		return -EINVAL;
+	}
+	set_bit(vid, adapter->active_vlans);
+	return 0;
 }
 
-static void igbvf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int igbvf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -1197,11 +1199,13 @@ static void igbvf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	if (!test_bit(__IGBVF_DOWN, &adapter->state))
 		igbvf_irq_enable(adapter);
 
-	if (hw->mac.ops.set_vfta(hw, vid, false))
+	if (hw->mac.ops.set_vfta(hw, vid, false)) {
 		dev_err(&adapter->pdev->dev,
 		        "Failed to remove vlan id %d\n", vid);
-	else
-		clear_bit(vid, adapter->active_vlans);
+		return -EINVAL;
+	}
+	clear_bit(vid, adapter->active_vlans);
+	return 0;
 }
 
 static void igbvf_restore_vlan(struct igbvf_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 247cf9219e03..c573655f3307 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -101,8 +101,8 @@ static void ixgb_tx_timeout_task(struct work_struct *work);
 
 static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter);
 static void ixgb_vlan_strip_disable(struct ixgb_adapter *adapter);
-static void ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
-static void ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
+static int ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
+static int ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
 static void ixgb_restore_vlan(struct ixgb_adapter *adapter);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2217,7 +2217,7 @@ ixgb_vlan_strip_disable(struct ixgb_adapter *adapter)
 	IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl);
 }
 
-static void
+static int
 ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
@@ -2230,9 +2230,11 @@ ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	vfta |= (1 << (vid & 0x1F));
 	ixgb_write_vfta(&adapter->hw, index, vfta);
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void
+static int
 ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
@@ -2245,6 +2247,8 @@ ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	vfta &= ~(1 << (vid & 0x1F));
 	ixgb_write_vfta(&adapter->hw, index, vfta);
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 static void
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 1b28ed9d8cc1..5d94ce1c0fc3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3044,7 +3044,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
 	hw->mac.ops.enable_rx_dma(hw, rxctrl);
 }
 
-static void ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -3053,9 +3053,11 @@ static void ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	/* add VID to filter table */
 	hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, true);
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -3064,6 +3066,8 @@ static void ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	/* remove VID from filter table */
 	hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, false);
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 5d1a64398169..891162d1610c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1403,7 +1403,7 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 	}
 }
 
-static void ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -1412,9 +1412,11 @@ static void ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	if (hw->mac.ops.set_vfta)
 		hw->mac.ops.set_vfta(hw, vid, 0, true);
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -1423,6 +1425,8 @@ static void ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	if (hw->mac.ops.set_vfta)
 		hw->mac.ops.set_vfta(hw, vid, 0, false);
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 4c5bbb3aad31..2083f3b5d689 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -45,7 +45,7 @@
 #include "mlx4_en.h"
 #include "en_port.h"
 
-static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -67,9 +67,10 @@ static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 		en_err(priv, "failed adding vlan %d\n", vid);
 	mutex_unlock(&mdev->state_lock);
 
+	return 0;
 }
 
-static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -93,6 +94,8 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 			en_err(priv, "Failed configuring VLAN filter\n");
 	}
 	mutex_unlock(&mdev->state_lock);
+
+	return 0;
 }
 
 u64 mlx4_en_mac_to_u64(u8 *addr)
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 16d4d8e913c3..ef76725454d2 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3305,7 +3305,7 @@ static void vxge_tx_watchdog(struct net_device *dev)
  *
  * Add the vlan id to the devices vlan id table
  */
-static void
+static int
 vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct vxgedev *vdev = netdev_priv(dev);
@@ -3320,6 +3320,7 @@ vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 		vxge_hw_vpath_vid_add(vpath->handle, vid);
 	}
 	set_bit(vid, vdev->active_vlans);
+	return 0;
 }
 
 /**
@@ -3329,7 +3330,7 @@ vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
  *
  * Remove the vlan id from the device's vlan id table
  */
-static void
+static int
 vxge_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct vxgedev *vdev = netdev_priv(dev);
@@ -3348,6 +3349,7 @@ vxge_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	vxge_debug_entryexit(VXGE_TRACE,
 		"%s:%d  Exiting...", __func__, __LINE__);
 	clear_bit(vid, vdev->active_vlans);
+	return 0;
 }
 
 static const struct net_device_ops vxge_netdev_ops = {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 823f845ddc04..69b8e4ef14d9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -97,8 +97,8 @@ static int qlcnicvf_config_bridged_mode(struct qlcnic_adapter *, u32);
 static int qlcnicvf_start_firmware(struct qlcnic_adapter *);
 static void qlcnic_set_netdev_features(struct qlcnic_adapter *,
 				struct qlcnic_esw_func_cfg *);
-static void qlcnic_vlan_rx_add(struct net_device *, u16);
-static void qlcnic_vlan_rx_del(struct net_device *, u16);
+static int qlcnic_vlan_rx_add(struct net_device *, u16);
+static int qlcnic_vlan_rx_del(struct net_device *, u16);
 
 /*  PCI Device ID Table  */
 #define ENTRY(device) \
@@ -735,20 +735,22 @@ qlcnic_set_vlan_config(struct qlcnic_adapter *adapter,
 		adapter->pvid = 0;
 }
 
-static void
+static int
 qlcnic_vlan_rx_add(struct net_device *netdev, u16 vid)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	set_bit(vid, adapter->vlans);
+	return 0;
 }
 
-static void
+static int
 qlcnic_vlan_rx_del(struct net_device *netdev, u16 vid)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
 	qlcnic_restore_indev_addr(netdev, NETDEV_DOWN);
 	clear_bit(vid, adapter->vlans);
+	return 0;
 }
 
 static void
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 1ce4e08037b8..b54898737284 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2349,56 +2349,66 @@ static int qlge_set_features(struct net_device *ndev,
 	return 0;
 }
 
-static void __qlge_vlan_rx_add_vid(struct ql_adapter *qdev, u16 vid)
+static int __qlge_vlan_rx_add_vid(struct ql_adapter *qdev, u16 vid)
 {
 	u32 enable_bit = MAC_ADDR_E;
+	int err;
 
-	if (ql_set_mac_addr_reg
-	    (qdev, (u8 *) &enable_bit, MAC_ADDR_TYPE_VLAN, vid)) {
+	err = ql_set_mac_addr_reg(qdev, (u8 *) &enable_bit,
+				  MAC_ADDR_TYPE_VLAN, vid);
+	if (err)
 		netif_err(qdev, ifup, qdev->ndev,
 			  "Failed to init vlan address.\n");
-	}
+	return err;
 }
 
-static void qlge_vlan_rx_add_vid(struct net_device *ndev, u16 vid)
+static int qlge_vlan_rx_add_vid(struct net_device *ndev, u16 vid)
 {
 	struct ql_adapter *qdev = netdev_priv(ndev);
 	int status;
+	int err;
 
 	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
 	if (status)
-		return;
+		return status;
 
-	__qlge_vlan_rx_add_vid(qdev, vid);
+	err = __qlge_vlan_rx_add_vid(qdev, vid);
 	set_bit(vid, qdev->active_vlans);
 
 	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
+
+	return err;
 }
 
-static void __qlge_vlan_rx_kill_vid(struct ql_adapter *qdev, u16 vid)
+static int __qlge_vlan_rx_kill_vid(struct ql_adapter *qdev, u16 vid)
 {
 	u32 enable_bit = 0;
+	int err;
 
-	if (ql_set_mac_addr_reg
-	    (qdev, (u8 *) &enable_bit, MAC_ADDR_TYPE_VLAN, vid)) {
+	err = ql_set_mac_addr_reg(qdev, (u8 *) &enable_bit,
+				  MAC_ADDR_TYPE_VLAN, vid);
+	if (err)
 		netif_err(qdev, ifup, qdev->ndev,
 			  "Failed to clear vlan address.\n");
-	}
+	return err;
 }
 
-static void qlge_vlan_rx_kill_vid(struct net_device *ndev, u16 vid)
+static int qlge_vlan_rx_kill_vid(struct net_device *ndev, u16 vid)
 {
 	struct ql_adapter *qdev = netdev_priv(ndev);
 	int status;
+	int err;
 
 	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
 	if (status)
-		return;
+		return status;
 
-	__qlge_vlan_rx_kill_vid(qdev, vid);
+	err = __qlge_vlan_rx_kill_vid(qdev, vid);
 	clear_bit(vid, qdev->active_vlans);
 
 	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
+
+	return err;
 }
 
 static void qlge_restore_vlan(struct ql_adapter *qdev)
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 3a90af6d111c..4b19e9b0606b 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -727,9 +727,10 @@ static void __bdx_vlan_rx_vid(struct net_device *ndev, uint16_t vid, int enable)
  * @ndev network device
  * @vid  VLAN vid to add
  */
-static void bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid)
+static int bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid)
 {
 	__bdx_vlan_rx_vid(ndev, vid, 1);
+	return 0;
 }
 
 /*
@@ -737,9 +738,10 @@ static void bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid)
  * @ndev network device
  * @vid  VLAN vid to kill
  */
-static void bdx_vlan_rx_kill_vid(struct net_device *ndev, unsigned short vid)
+static int bdx_vlan_rx_kill_vid(struct net_device *ndev, unsigned short vid)
 {
 	__bdx_vlan_rx_vid(ndev, vid, 0);
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 5587ecdf32e3..bcdbdc72b558 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -488,8 +488,8 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static const struct ethtool_ops netdev_ethtool_ops;
 static int  rhine_close(struct net_device *dev);
 static void rhine_shutdown (struct pci_dev *pdev);
-static void rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid);
-static void rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid);
+static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid);
+static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid);
 static void rhine_set_cam(void __iomem *ioaddr, int idx, u8 *addr);
 static void rhine_set_vlan_cam(void __iomem *ioaddr, int idx, u8 *addr);
 static void rhine_set_cam_mask(void __iomem *ioaddr, u32 mask);
@@ -1261,7 +1261,7 @@ static void rhine_update_vcam(struct net_device *dev)
 	rhine_set_vlan_cam_mask(ioaddr, vCAMmask);
 }
 
-static void rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct rhine_private *rp = netdev_priv(dev);
 
@@ -1269,9 +1269,10 @@ static void rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 	set_bit(vid, rp->active_vlans);
 	rhine_update_vcam(dev);
 	spin_unlock_irq(&rp->lock);
+	return 0;
 }
 
-static void rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct rhine_private *rp = netdev_priv(dev);
 
@@ -1279,6 +1280,7 @@ static void rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	clear_bit(vid, rp->active_vlans);
 	rhine_update_vcam(dev);
 	spin_unlock_irq(&rp->lock);
+	return 0;
 }
 
 static void init_registers(struct net_device *dev)
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 59bb5fd56afe..4128d6b8cc28 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -522,7 +522,7 @@ static void velocity_init_cam_filter(struct velocity_info *vptr)
 	mac_set_vlan_cam_mask(regs, vptr->vCAMmask);
 }
 
-static void velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct velocity_info *vptr = netdev_priv(dev);
 
@@ -530,9 +530,10 @@ static void velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 	set_bit(vid, vptr->active_vlans);
 	velocity_init_cam_filter(vptr);
 	spin_unlock_irq(&vptr->lock);
+	return 0;
 }
 
-static void velocity_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int velocity_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct velocity_info *vptr = netdev_priv(dev);
 
@@ -540,6 +541,7 @@ static void velocity_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid
 	clear_bit(vid, vptr->active_vlans);
 	velocity_init_cam_filter(vptr);
 	spin_unlock_irq(&vptr->lock);
+	return 0;
 }
 
 static void velocity_init_rx_ring_indexes(struct velocity_info *vptr)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 74134970b709..2511bc5c34f3 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -520,7 +520,7 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
 	return stats;
 }
 
-static void macvlan_vlan_rx_add_vid(struct net_device *dev,
+static int macvlan_vlan_rx_add_vid(struct net_device *dev,
 				    unsigned short vid)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
@@ -528,10 +528,11 @@ static void macvlan_vlan_rx_add_vid(struct net_device *dev,
 	const struct net_device_ops *ops = lowerdev->netdev_ops;
 
 	if (ops->ndo_vlan_rx_add_vid)
-		ops->ndo_vlan_rx_add_vid(lowerdev, vid);
+		return ops->ndo_vlan_rx_add_vid(lowerdev, vid);
+	return 0;
 }
 
-static void macvlan_vlan_rx_kill_vid(struct net_device *dev,
+static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
 				     unsigned short vid)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
@@ -539,7 +540,8 @@ static void macvlan_vlan_rx_kill_vid(struct net_device *dev,
 	const struct net_device_ops *ops = lowerdev->netdev_ops;
 
 	if (ops->ndo_vlan_rx_kill_vid)
-		ops->ndo_vlan_rx_kill_vid(lowerdev, vid);
+		return ops->ndo_vlan_rx_kill_vid(lowerdev, vid);
+	return 0;
 }
 
 static void macvlan_ethtool_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 064155d56bce..8e8bf958539e 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -902,7 +902,7 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	return stats;
 }
 
-static void team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
+static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
 {
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
@@ -915,9 +915,11 @@ static void team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
 			ops->ndo_vlan_rx_add_vid(port->dev, vid);
 	}
 	rcu_read_unlock();
+
+	return 0;
 }
 
-static void team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
+static int team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
 {
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
@@ -930,6 +932,8 @@ static void team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
 			ops->ndo_vlan_rx_kill_vid(port->dev, vid);
 	}
 	rcu_read_unlock();
+
+	return 0;
 }
 
 static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 5a961720f64c..609c51f90e6c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -855,7 +855,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	kfree(buf);
 }
 
-static void virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
+static int virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct scatterlist sg;
@@ -865,9 +865,10 @@ static void virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
 				  VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
 		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
+	return 0;
 }
 
-static void virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
+static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct scatterlist sg;
@@ -877,6 +878,7 @@ static void virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
 				  VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
 		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
+	return 0;
 }
 
 static void virtnet_get_ringparam(struct net_device *dev,
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index d96bfb1ac20b..1c2ae11a9e35 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1926,7 +1926,7 @@ vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
 }
 
 
-static void
+static int
 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
@@ -1943,10 +1943,12 @@ vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	}
 
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 
-static void
+static int
 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
@@ -1963,6 +1965,8 @@ vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	}
 
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index a21ae3d549db..c4e2004bd0e8 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -301,21 +301,21 @@ static void qeth_l2_process_vlans(struct qeth_card *card)
 	spin_unlock_bh(&card->vlanlock);
 }
 
-static void qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct qeth_card *card = dev->ml_priv;
 	struct qeth_vlan_vid *id;
 
 	QETH_CARD_TEXT_(card, 4, "aid:%d", vid);
 	if (!vid)
-		return;
+		return 0;
 	if (card->info.type == QETH_CARD_TYPE_OSM) {
 		QETH_CARD_TEXT(card, 3, "aidOSM");
-		return;
+		return 0;
 	}
 	if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
 		QETH_CARD_TEXT(card, 3, "aidREC");
-		return;
+		return 0;
 	}
 	id = kmalloc(sizeof(struct qeth_vlan_vid), GFP_ATOMIC);
 	if (id) {
@@ -324,10 +324,13 @@ static void qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 		spin_lock_bh(&card->vlanlock);
 		list_add_tail(&id->list, &card->vid_list);
 		spin_unlock_bh(&card->vlanlock);
+	} else {
+		return -ENOMEM;
 	}
+	return 0;
 }
 
-static void qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct qeth_vlan_vid *id, *tmpid = NULL;
 	struct qeth_card *card = dev->ml_priv;
@@ -335,11 +338,11 @@ static void qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
 	if (card->info.type == QETH_CARD_TYPE_OSM) {
 		QETH_CARD_TEXT(card, 3, "kidOSM");
-		return;
+		return 0;
 	}
 	if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
 		QETH_CARD_TEXT(card, 3, "kidREC");
-		return;
+		return 0;
 	}
 	spin_lock_bh(&card->vlanlock);
 	list_for_each_entry(id, &card->vid_list, list) {
@@ -355,6 +358,7 @@ static void qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 		kfree(tmpid);
 	}
 	qeth_l2_set_multicast_list(card->dev);
+	return 0;
 }
 
 static int qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b2a55e3fde0b..b3b045c21e2c 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1869,15 +1869,15 @@ static void qeth_l3_free_vlan_addresses(struct qeth_card *card,
 	qeth_l3_free_vlan_addresses6(card, vid);
 }
 
-static void qeth_l3_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int qeth_l3_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct qeth_card *card = dev->ml_priv;
 
 	set_bit(vid, card->active_vlans);
-	return;
+	return 0;
 }
 
-static void qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct qeth_card *card = dev->ml_priv;
 	unsigned long flags;
@@ -1885,7 +1885,7 @@ static void qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
 	if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
 		QETH_CARD_TEXT(card, 3, "kidREC");
-		return;
+		return 0;
 	}
 	spin_lock_irqsave(&card->vlanlock, flags);
 	/* unregister IP addresses of vlan device */
@@ -1893,6 +1893,7 @@ static void qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	clear_bit(vid, card->active_vlans);
 	spin_unlock_irqrestore(&card->vlanlock, flags);
 	qeth_l3_set_multicast_list(card->dev);
+	return 0;
 }
 
 static inline int qeth_l3_rebuild_skb(struct qeth_card *card,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eef257c76a40..f7bff9615728 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -792,11 +792,11 @@ struct netdev_tc_txq {
  *	3. Update dev->stats asynchronously and atomically, and define
  *	   neither operation.
  *
- * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid);
+ * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid);
  *	If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER)
  *	this function is called when a VLAN id is registered.
  *
- * void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid);
+ * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid);
  *	If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER)
  *	this function is called when a VLAN id is unregistered.
  *
@@ -911,9 +911,9 @@ struct net_device_ops {
 						     struct rtnl_link_stats64 *storage);
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 
-	void			(*ndo_vlan_rx_add_vid)(struct net_device *dev,
+	int			(*ndo_vlan_rx_add_vid)(struct net_device *dev,
 						       unsigned short vid);
-	void			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
+	int			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
-- 
cgit v1.2.3


From 87002b03baabd2b8f6281ab6411ed88d24958de1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 04:11:17 +0000
Subject: net: introduce vlan_vid_[add/del] and use them instead of direct
 [add/kill]_vid ndo calls

This patch adds wrapper for ndo_vlan_rx_add_vid/ndo_vlan_rx_kill_vid
functions. Check for NETIF_F_HW_VLAN_FILTER feature is done in this
wrapper.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 53 ++++++++++++++++++-----------------------
 drivers/net/macvlan.c           | 10 +++-----
 drivers/net/team/team.c         | 34 ++++++++++++++++----------
 include/linux/if_vlan.h         | 12 ++++++++++
 net/8021q/vlan.c                | 14 ++++-------
 net/8021q/vlan_core.c           | 23 ++++++++++++++++++
 6 files changed, 87 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d72c37f03e50..0c0dacba1f51 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -431,17 +431,13 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 static int bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct slave *slave;
+	struct slave *slave, *stop_at;
 	int i, res;
 
 	bond_for_each_slave(bond, slave, i) {
-		struct net_device *slave_dev = slave->dev;
-		const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
-
-		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
-		    slave_ops->ndo_vlan_rx_add_vid) {
-			slave_ops->ndo_vlan_rx_add_vid(slave_dev, vid);
-		}
+		res = vlan_vid_add(slave->dev, vid);
+		if (res)
+			goto unwind;
 	}
 
 	res = bond_add_vlan(bond, vid);
@@ -452,6 +448,14 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
 	}
 
 	return 0;
+
+unwind:
+	/* unwind from head to the slave that failed */
+	stop_at = slave;
+	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at)
+		vlan_vid_del(slave->dev, vid);
+
+	return res;
 }
 
 /**
@@ -465,15 +469,8 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 	struct slave *slave;
 	int i, res;
 
-	bond_for_each_slave(bond, slave, i) {
-		struct net_device *slave_dev = slave->dev;
-		const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
-
-		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
-		    slave_ops->ndo_vlan_rx_kill_vid) {
-			slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vid);
-		}
-	}
+	bond_for_each_slave(bond, slave, i)
+		vlan_vid_del(slave->dev, vid);
 
 	res = bond_del_vlan(bond, vid);
 	if (res) {
@@ -488,30 +485,26 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
 {
 	struct vlan_entry *vlan;
-	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
-
-	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
-	    !(slave_ops->ndo_vlan_rx_add_vid))
-		return;
+	int res;
 
-	list_for_each_entry(vlan, &bond->vlan_list, vlan_list)
-		slave_ops->ndo_vlan_rx_add_vid(slave_dev, vlan->vlan_id);
+	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+		res = vlan_vid_add(slave_dev, vlan->vlan_id);
+		if (res)
+			pr_warning("%s: Failed to add vlan id %d to device %s\n",
+				   bond->dev->name, vlan->vlan_id,
+				   slave_dev->name);
+	}
 }
 
 static void bond_del_vlans_from_slave(struct bonding *bond,
 				      struct net_device *slave_dev)
 {
-	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
 	struct vlan_entry *vlan;
 
-	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
-	    !(slave_ops->ndo_vlan_rx_kill_vid))
-		return;
-
 	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
 		if (!vlan->vlan_id)
 			continue;
-		slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
+		vlan_vid_del(slave_dev, vlan->vlan_id);
 	}
 }
 
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 2511bc5c34f3..f2f820c4b40a 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -26,6 +26,7 @@
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
+#include <linux/if_vlan.h>
 #include <linux/if_link.h>
 #include <linux/if_macvlan.h>
 #include <net/rtnetlink.h>
@@ -525,11 +526,8 @@ static int macvlan_vlan_rx_add_vid(struct net_device *dev,
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
-	const struct net_device_ops *ops = lowerdev->netdev_ops;
 
-	if (ops->ndo_vlan_rx_add_vid)
-		return ops->ndo_vlan_rx_add_vid(lowerdev, vid);
-	return 0;
+	return vlan_vid_add(lowerdev, vid);
 }
 
 static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
@@ -537,10 +535,8 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
-	const struct net_device_ops *ops = lowerdev->netdev_ops;
 
-	if (ops->ndo_vlan_rx_kill_vid)
-		return ops->ndo_vlan_rx_kill_vid(lowerdev, vid);
+	vlan_vid_del(lowerdev, vid);
 	return 0;
 }
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 8e8bf958539e..79c2d1b52eb6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -18,6 +18,7 @@
 #include <linux/ctype.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
+#include <linux/if_vlan.h>
 #include <linux/if_arp.h>
 #include <linux/socket.h>
 #include <linux/etherdevice.h>
@@ -906,17 +907,28 @@ static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
 {
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
+	int err;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(port, &team->port_list, list) {
-		const struct net_device_ops *ops = port->dev->netdev_ops;
-
-		if (ops->ndo_vlan_rx_add_vid)
-			ops->ndo_vlan_rx_add_vid(port->dev, vid);
+	/*
+	 * Alhough this is reader, it's guarded by team lock. It's not possible
+	 * to traverse list in reverse under rcu_read_lock
+	 */
+	mutex_lock(&team->lock);
+	list_for_each_entry(port, &team->port_list, list) {
+		err = vlan_vid_add(port->dev, vid);
+		if (err)
+			goto unwind;
 	}
-	rcu_read_unlock();
+	mutex_unlock(&team->lock);
 
 	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(port, &team->port_list, list)
+		vlan_vid_del(port->dev, vid);
+	mutex_unlock(&team->lock);
+
+	return err;
 }
 
 static int team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
@@ -925,12 +937,8 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
 	struct team_port *port;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(port, &team->port_list, list) {
-		const struct net_device_ops *ops = port->dev->netdev_ops;
-
-		if (ops->ndo_vlan_rx_kill_vid)
-			ops->ndo_vlan_rx_kill_vid(port->dev, vid);
-	}
+	list_for_each_entry_rcu(port, &team->port_list, list)
+		vlan_vid_del(port->dev, vid);
 	rcu_read_unlock();
 
 	return 0;
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 31d7c976f063..71168a6f3347 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -109,6 +109,9 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 extern bool vlan_do_receive(struct sk_buff **skb, bool last_handler);
 extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 
+extern int vlan_vid_add(struct net_device *dev, unsigned short vid);
+extern void vlan_vid_del(struct net_device *dev, unsigned short vid);
+
 #else
 static inline struct net_device *
 __vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id)
@@ -139,6 +142,15 @@ static inline struct sk_buff *vlan_untag(struct sk_buff *skb)
 {
 	return skb;
 }
+
+static inline int vlan_vid_add(struct net_device *dev, unsigned short vid)
+{
+	return 0;
+}
+
+static inline void vlan_vid_del(struct net_device *dev, unsigned short vid)
+{
+}
 #endif
 
 /**
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e075625efeeb..dd9aa400888b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -101,7 +101,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
 	struct vlan_group *grp;
 	u16 vlan_id = vlan->vlan_id;
 
@@ -114,8 +113,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	 * HW accelerating devices or SW vlan input packet processing if
 	 * VLAN is not 0 (leave it there for 802.1p).
 	 */
-	if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
-		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
+	if (vlan_id)
+		vlan_vid_del(real_dev, vlan_id);
 
 	grp->nr_vlans--;
 
@@ -169,7 +168,6 @@ int register_vlan_dev(struct net_device *dev)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
 	u16 vlan_id = vlan->vlan_id;
 	struct vlan_group *grp, *ngrp = NULL;
 	int err;
@@ -207,8 +205,7 @@ int register_vlan_dev(struct net_device *dev)
 	if (ngrp) {
 		rcu_assign_pointer(real_dev->vlgrp, ngrp);
 	}
-	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
-		ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
+	vlan_vid_add(real_dev, vlan_id);
 
 	return 0;
 
@@ -369,11 +366,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		__vlan_device_event(dev, event);
 
 	if ((event == NETDEV_UP) &&
-	    (dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	    dev->netdev_ops->ndo_vlan_rx_add_vid) {
+	    (dev->features & NETIF_F_HW_VLAN_FILTER)) {
 		pr_info("adding VLAN 0 to HW filter on device %s\n",
 			dev->name);
-		dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
+		vlan_vid_add(dev, 0);
 	}
 
 	grp = rtnl_dereference(dev->vlgrp);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 85241f044294..544f9cb9678c 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -146,3 +146,26 @@ err_free:
 	kfree_skb(skb);
 	return NULL;
 }
+
+int vlan_vid_add(struct net_device *dev, unsigned short vid)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
+	     ops->ndo_vlan_rx_add_vid) {
+		return ops->ndo_vlan_rx_add_vid(dev, vid);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(vlan_vid_add);
+
+void vlan_vid_del(struct net_device *dev, unsigned short vid)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
+	     ops->ndo_vlan_rx_kill_vid) {
+		ops->ndo_vlan_rx_kill_vid(dev, vid);
+	}
+}
+EXPORT_SYMBOL(vlan_vid_del);
-- 
cgit v1.2.3


From 5b9ea6e022e9ba0fe39cb349ac40361f78d5da5b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 04:11:18 +0000
Subject: vlan: introduce vid list with reference counting

This allows to keep track of vids needed to be in rx vlan filters of
devices even if they are used in bond/team etc.

vlan_info as well as vlan_group previously was, is allocated when first
vid is added and dealocated whan last vid is deleted.

vlan_group definition is moved to private header.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h   |  17 +----
 include/linux/netdevice.h |   3 +-
 net/8021q/vlan.c          |  90 +++++++++----------------
 net/8021q/vlan.h          |  30 ++++++++-
 net/8021q/vlan_core.c     | 168 +++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 219 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 71168a6f3347..0c9691305298 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -74,22 +74,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
 
-/* if this changes, algorithm will have to be reworked because this
- * depends on completely exhausting the VLAN identifier space.  Thus
- * it gives constant time look-up, but in many cases it wastes memory.
- */
-#define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
-#define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
-
-struct vlan_group {
-	struct net_device	*real_dev; /* The ethernet(like) device
-					    * the vlan is attached to.
-					    */
-	unsigned int		nr_vlans;
-	struct hlist_node	hlist;	/* linked list */
-	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
-	struct rcu_head		rcu;
-};
+struct vlan_info;
 
 static inline int is_vlan_dev(struct net_device *dev)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f7bff9615728..603730804da5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -55,7 +55,6 @@
 
 #include <linux/netdev_features.h>
 
-struct vlan_group;
 struct netpoll_info;
 struct phy_device;
 /* 802.11 specific */
@@ -1096,7 +1095,7 @@ struct net_device {
 	/* Protocol specific pointers */
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
-	struct vlan_group __rcu	*vlgrp;		/* VLAN group */
+	struct vlan_info __rcu	*vlan_info;	/* VLAN info */
 #endif
 #if IS_ENABLED(CONFIG_NET_DSA)
 	struct dsa_switch_tree	*dsa_ptr;	/* dsa specific data */
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index dd9aa400888b..efea35b02e7f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -51,27 +51,6 @@ const char vlan_version[] = DRV_VERSION;
 
 /* End of global variables definitions. */
 
-static void vlan_group_free(struct vlan_group *grp)
-{
-	int i;
-
-	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
-		kfree(grp->vlan_devices_arrays[i]);
-	kfree(grp);
-}
-
-static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
-{
-	struct vlan_group *grp;
-
-	grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
-	if (!grp)
-		return NULL;
-
-	grp->real_dev = real_dev;
-	return grp;
-}
-
 static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
 {
 	struct net_device **array;
@@ -92,22 +71,20 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
 	return 0;
 }
 
-static void vlan_rcu_free(struct rcu_head *rcu)
-{
-	vlan_group_free(container_of(rcu, struct vlan_group, rcu));
-}
-
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
+	struct vlan_info *vlan_info;
 	struct vlan_group *grp;
 	u16 vlan_id = vlan->vlan_id;
 
 	ASSERT_RTNL();
 
-	grp = rtnl_dereference(real_dev->vlgrp);
-	BUG_ON(!grp);
+	vlan_info = rtnl_dereference(real_dev->vlan_info);
+	BUG_ON(!vlan_info);
+
+	grp = &vlan_info->grp;
 
 	/* Take it out of our own structures, but be sure to interlock with
 	 * HW accelerating devices or SW vlan input packet processing if
@@ -116,7 +93,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	if (vlan_id)
 		vlan_vid_del(real_dev, vlan_id);
 
-	grp->nr_vlans--;
+	grp->nr_vlan_devs--;
 
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_leave(dev);
@@ -128,16 +105,9 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	 */
 	unregister_netdevice_queue(dev, head);
 
-	/* If the group is now empty, kill off the group. */
-	if (grp->nr_vlans == 0) {
+	if (grp->nr_vlan_devs == 0)
 		vlan_gvrp_uninit_applicant(real_dev);
 
-		RCU_INIT_POINTER(real_dev->vlgrp, NULL);
-
-		/* Free the group, after all cpu's are done. */
-		call_rcu(&grp->rcu, vlan_rcu_free);
-	}
-
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
 }
@@ -169,17 +139,23 @@ int register_vlan_dev(struct net_device *dev)
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	u16 vlan_id = vlan->vlan_id;
-	struct vlan_group *grp, *ngrp = NULL;
+	struct vlan_info *vlan_info;
+	struct vlan_group *grp;
 	int err;
 
-	grp = rtnl_dereference(real_dev->vlgrp);
-	if (!grp) {
-		ngrp = grp = vlan_group_alloc(real_dev);
-		if (!grp)
-			return -ENOBUFS;
+	err = vlan_vid_add(real_dev, vlan_id);
+	if (err)
+		return err;
+
+	vlan_info = rtnl_dereference(real_dev->vlan_info);
+	/* vlan_info should be there now. vlan_vid_add took care of it */
+	BUG_ON(!vlan_info);
+
+	grp = &vlan_info->grp;
+	if (grp->nr_vlan_devs == 0) {
 		err = vlan_gvrp_init_applicant(real_dev);
 		if (err < 0)
-			goto out_free_group;
+			goto out_vid_del;
 	}
 
 	err = vlan_group_prealloc_vid(grp, vlan_id);
@@ -200,23 +176,15 @@ int register_vlan_dev(struct net_device *dev)
 	 * it into our local structure.
 	 */
 	vlan_group_set_device(grp, vlan_id, dev);
-	grp->nr_vlans++;
-
-	if (ngrp) {
-		rcu_assign_pointer(real_dev->vlgrp, ngrp);
-	}
-	vlan_vid_add(real_dev, vlan_id);
+	grp->nr_vlan_devs++;
 
 	return 0;
 
 out_uninit_applicant:
-	if (ngrp)
+	if (grp->nr_vlan_devs == 0)
 		vlan_gvrp_uninit_applicant(real_dev);
-out_free_group:
-	if (ngrp) {
-		/* Free the group, after all cpu's are done. */
-		call_rcu(&ngrp->rcu, vlan_rcu_free);
-	}
+out_vid_del:
+	vlan_vid_del(real_dev, vlan_id);
 	return err;
 }
 
@@ -357,6 +325,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 {
 	struct net_device *dev = ptr;
 	struct vlan_group *grp;
+	struct vlan_info *vlan_info;
 	int i, flgs;
 	struct net_device *vlandev;
 	struct vlan_dev_priv *vlan;
@@ -372,9 +341,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		vlan_vid_add(dev, 0);
 	}
 
-	grp = rtnl_dereference(dev->vlgrp);
-	if (!grp)
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info)
 		goto out;
+	grp = &vlan_info->grp;
 
 	/* It is OK that we do not hold the group lock right now,
 	 * as we run under the RTNL lock.
@@ -478,9 +448,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			if (!vlandev)
 				continue;
 
-			/* unregistration of last vlan destroys group, abort
+			/* removal of last vid destroys vlan_info, abort
 			 * afterwards */
-			if (grp->nr_vlans == 1)
+			if (vlan_info->nr_vids == 1)
 				i = VLAN_N_VID;
 
 			unregister_vlan_dev(vlandev, &list);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index d3c4ea4a3836..28d8dc20cb6d 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -3,6 +3,7 @@
 
 #include <linux/if_vlan.h>
 #include <linux/u64_stats_sync.h>
+#include <linux/list.h>
 
 
 /**
@@ -74,6 +75,29 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
 	return netdev_priv(dev);
 }
 
+/* if this changes, algorithm will have to be reworked because this
+ * depends on completely exhausting the VLAN identifier space.  Thus
+ * it gives constant time look-up, but in many cases it wastes memory.
+ */
+#define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
+#define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
+
+struct vlan_group {
+	unsigned int		nr_vlan_devs;
+	struct hlist_node	hlist;	/* linked list */
+	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
+};
+
+struct vlan_info {
+	struct net_device	*real_dev; /* The ethernet(like) device
+					    * the vlan is attached to.
+					    */
+	struct vlan_group	grp;
+	struct list_head	vid_list;
+	unsigned int		nr_vids;
+	struct rcu_head		rcu;
+};
+
 static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
 						       u16 vlan_id)
 {
@@ -97,10 +121,10 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
 					       u16 vlan_id)
 {
-	struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
+	struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
 
-	if (grp)
-		return vlan_group_get_device(grp, vlan_id);
+	if (vlan_info)
+		return vlan_group_get_device(&vlan_info->grp, vlan_id);
 
 	return NULL;
 }
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 544f9cb9678c..329e0313e01f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -71,10 +71,10 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
 struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
 					u16 vlan_id)
 {
-	struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
+	struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
 
-	if (grp) {
-		return vlan_group_get_device(grp, vlan_id);
+	if (vlan_info) {
+		return vlan_group_get_device(&vlan_info->grp, vlan_id);
 	} else {
 		/*
 		 * Bonding slaves do not have grp assigned to themselves.
@@ -147,25 +147,177 @@ err_free:
 	return NULL;
 }
 
-int vlan_vid_add(struct net_device *dev, unsigned short vid)
+
+/*
+ * vlan info and vid list
+ */
+
+static void vlan_group_free(struct vlan_group *grp)
+{
+	int i;
+
+	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
+		kfree(grp->vlan_devices_arrays[i]);
+}
+
+static void vlan_info_free(struct vlan_info *vlan_info)
+{
+	vlan_group_free(&vlan_info->grp);
+	kfree(vlan_info);
+}
+
+static void vlan_info_rcu_free(struct rcu_head *rcu)
+{
+	vlan_info_free(container_of(rcu, struct vlan_info, rcu));
+}
+
+static struct vlan_info *vlan_info_alloc(struct net_device *dev)
+{
+	struct vlan_info *vlan_info;
+
+	vlan_info = kzalloc(sizeof(struct vlan_info), GFP_KERNEL);
+	if (!vlan_info)
+		return NULL;
+
+	vlan_info->real_dev = dev;
+	INIT_LIST_HEAD(&vlan_info->vid_list);
+	return vlan_info;
+}
+
+struct vlan_vid_info {
+	struct list_head list;
+	unsigned short vid;
+	int refcount;
+};
+
+static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info,
+					       unsigned short vid)
+{
+	struct vlan_vid_info *vid_info;
+
+	list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+		if (vid_info->vid == vid)
+			return vid_info;
+	}
+	return NULL;
+}
+
+static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid)
+{
+	struct vlan_vid_info *vid_info;
+
+	vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL);
+	if (!vid_info)
+		return NULL;
+	vid_info->vid = vid;
+
+	return vid_info;
+}
+
+static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
+			  struct vlan_vid_info **pvid_info)
 {
+	struct net_device *dev = vlan_info->real_dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	struct vlan_vid_info *vid_info;
+	int err;
+
+	vid_info = vlan_vid_info_alloc(vid);
+	if (!vid_info)
+		return -ENOMEM;
 
 	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	     ops->ndo_vlan_rx_add_vid) {
-		return ops->ndo_vlan_rx_add_vid(dev, vid);
+	    ops->ndo_vlan_rx_add_vid) {
+		err =  ops->ndo_vlan_rx_add_vid(dev, vid);
+		if (err) {
+			kfree(vid_info);
+			return err;
+		}
 	}
+	list_add(&vid_info->list, &vlan_info->vid_list);
+	vlan_info->nr_vids++;
+	*pvid_info = vid_info;
 	return 0;
 }
+
+int vlan_vid_add(struct net_device *dev, unsigned short vid)
+{
+	struct vlan_info *vlan_info;
+	struct vlan_vid_info *vid_info;
+	bool vlan_info_created = false;
+	int err;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info) {
+		vlan_info = vlan_info_alloc(dev);
+		if (!vlan_info)
+			return -ENOMEM;
+		vlan_info_created = true;
+	}
+	vid_info = vlan_vid_info_get(vlan_info, vid);
+	if (!vid_info) {
+		err = __vlan_vid_add(vlan_info, vid, &vid_info);
+		if (err)
+			goto out_free_vlan_info;
+	}
+	vid_info->refcount++;
+
+	if (vlan_info_created)
+		rcu_assign_pointer(dev->vlan_info, vlan_info);
+
+	return 0;
+
+out_free_vlan_info:
+	if (vlan_info_created)
+		kfree(vlan_info);
+	return err;
+}
 EXPORT_SYMBOL(vlan_vid_add);
 
-void vlan_vid_del(struct net_device *dev, unsigned short vid)
+static void __vlan_vid_del(struct vlan_info *vlan_info,
+			   struct vlan_vid_info *vid_info)
 {
+	struct net_device *dev = vlan_info->real_dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	unsigned short vid = vid_info->vid;
+	int err;
 
 	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
 	     ops->ndo_vlan_rx_kill_vid) {
-		ops->ndo_vlan_rx_kill_vid(dev, vid);
+		err = ops->ndo_vlan_rx_kill_vid(dev, vid);
+		if (err) {
+			pr_warn("failed to kill vid %d for device %s\n",
+				vid, dev->name);
+		}
+	}
+	list_del(&vid_info->list);
+	kfree(vid_info);
+	vlan_info->nr_vids--;
+}
+
+void vlan_vid_del(struct net_device *dev, unsigned short vid)
+{
+	struct vlan_info *vlan_info;
+	struct vlan_vid_info *vid_info;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info)
+		return;
+
+	vid_info = vlan_vid_info_get(vlan_info, vid);
+	if (!vid_info)
+		return;
+	vid_info->refcount--;
+	if (vid_info->refcount == 0) {
+		__vlan_vid_del(vlan_info, vid_info);
+		if (vlan_info->nr_vids == 0) {
+			RCU_INIT_POINTER(dev->vlan_info, NULL);
+			call_rcu(&vlan_info->rcu, vlan_info_rcu_free);
+		}
 	}
 }
 EXPORT_SYMBOL(vlan_vid_del);
-- 
cgit v1.2.3


From 348a1443cc4303c72cf1ee3b26e476fec8e7b5fa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 04:11:19 +0000
Subject: vlan: introduce functions to do mass addition/deletion of vids by
 another device

Introduce functions handy to copy vlan ids from one driver's list to
another.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 15 +++++++++++++++
 net/8021q/vlan_core.c   | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 0c9691305298..13aff1e2183b 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -97,6 +97,10 @@ extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 extern int vlan_vid_add(struct net_device *dev, unsigned short vid);
 extern void vlan_vid_del(struct net_device *dev, unsigned short vid);
 
+extern int vlan_vids_add_by_dev(struct net_device *dev,
+				const struct net_device *by_dev);
+extern void vlan_vids_del_by_dev(struct net_device *dev,
+				 const struct net_device *by_dev);
 #else
 static inline struct net_device *
 __vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id)
@@ -136,6 +140,17 @@ static inline int vlan_vid_add(struct net_device *dev, unsigned short vid)
 static inline void vlan_vid_del(struct net_device *dev, unsigned short vid)
 {
 }
+
+static inline int vlan_vids_add_by_dev(struct net_device *dev,
+				       const struct net_device *by_dev)
+{
+	return 0;
+}
+
+static inline void vlan_vids_del_by_dev(struct net_device *dev,
+					const struct net_device *by_dev)
+{
+}
 #endif
 
 /**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 329e0313e01f..1414c931bd3f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -321,3 +321,47 @@ void vlan_vid_del(struct net_device *dev, unsigned short vid)
 	}
 }
 EXPORT_SYMBOL(vlan_vid_del);
+
+int vlan_vids_add_by_dev(struct net_device *dev,
+			 const struct net_device *by_dev)
+{
+	struct vlan_vid_info *vid_info;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (!by_dev->vlan_info)
+		return 0;
+
+	list_for_each_entry(vid_info, &by_dev->vlan_info->vid_list, list) {
+		err = vlan_vid_add(dev, vid_info->vid);
+		if (err)
+			goto unwind;
+	}
+	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(vid_info,
+					     &by_dev->vlan_info->vid_list,
+					     list) {
+		vlan_vid_del(dev, vid_info->vid);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(vlan_vids_add_by_dev);
+
+void vlan_vids_del_by_dev(struct net_device *dev,
+			  const struct net_device *by_dev)
+{
+	struct vlan_vid_info *vid_info;
+
+	ASSERT_RTNL();
+
+	if (!by_dev->vlan_info)
+		return;
+
+	list_for_each_entry(vid_info, &by_dev->vlan_info->vid_list, list)
+		vlan_vid_del(dev, vid_info->vid);
+}
+EXPORT_SYMBOL(vlan_vids_del_by_dev);
-- 
cgit v1.2.3


From 8af2a218de38f51ea4b4fa48cac1273319ae260c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Dec 2011 06:06:03 +0000
Subject: sch_red: Adaptative RED AQM

Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :

http://icir.org/floyd/papers/adaptiveRed.pdf

Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2

Every 500 ms:
 if (avg > target and max_p <= 0.5)
  increase max_p : max_p += alpha;
 else if (avg < target and max_p >= 0.01)
  decrease max_p : max_p *= beta;

target :[min_th + 0.4*(min_th - max_th),
          min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)

Changes against our RED implementation are :

max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.

To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.

dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.

Example on a 10Mbit link :

tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
   limit 400000 min 30000 max 90000 avpkt 1000 \
   burst 55 ecn adaptative bandwidth 10Mbit

# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
 Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
 rate 9749Kbit 831pps backlog 72056b 16p requeues 0
  marked 1357 early 35 pdrop 0 other 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |   6 ++-
 include/net/red.h         | 101 ++++++++++++++++++++++++++++++++++++++--------
 lib/reciprocal_div.c      |   2 +
 net/sched/sch_red.c       |  21 ++++++++++
 4 files changed, 111 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index fb556dc594d3..e41e0d4de24b 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -181,6 +181,7 @@ enum {
 	TCA_RED_UNSPEC,
 	TCA_RED_PARMS,
 	TCA_RED_STAB,
+	TCA_RED_MAX_P,
 	__TCA_RED_MAX,
 };
 
@@ -194,8 +195,9 @@ struct tc_red_qopt {
 	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
 	unsigned char   Scell_log;	/* cell size for idle damping */
 	unsigned char	flags;
-#define TC_RED_ECN	1
-#define TC_RED_HARDDROP	2
+#define TC_RED_ECN		1
+#define TC_RED_HARDDROP		2
+#define TC_RED_ADAPTATIVE	4
 };
 
 struct tc_red_xstats {
diff --git a/include/net/red.h b/include/net/red.h
index b72a3b833936..24606b22d01e 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -5,6 +5,7 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 #include <net/dsfield.h>
+#include <linux/reciprocal_div.h>
 
 /*	Random Early Detection (RED) algorithm.
 	=======================================
@@ -87,6 +88,29 @@
 	etc.
  */
 
+/*
+ * Adaptative RED : An Algorithm for Increasing the Robustness of RED's AQM
+ * (Sally FLoyd, Ramakrishna Gummadi, and Scott Shenker) August 2001
+ *
+ * Every 500 ms:
+ *  if (avg > target and max_p <= 0.5)
+ *   increase max_p : max_p += alpha;
+ *  else if (avg < target and max_p >= 0.01)
+ *   decrease max_p : max_p *= beta;
+ *
+ * target :[qth_min + 0.4*(qth_min - qth_max),
+ *          qth_min + 0.6*(qth_min - qth_max)].
+ * alpha : min(0.01, max_p / 4)
+ * beta : 0.9
+ * max_P is a Q0.32 fixed point number (with 32 bits mantissa)
+ * max_P between 0.01 and 0.5 (1% - 50%) [ Its no longer a negative power of two ]
+ */
+#define RED_ONE_PERCENT ((u32)DIV_ROUND_CLOSEST(1ULL<<32, 100))
+
+#define MAX_P_MIN (1 * RED_ONE_PERCENT)
+#define MAX_P_MAX (50 * RED_ONE_PERCENT)
+#define MAX_P_ALPHA(val) min(MAX_P_MIN, val / 4)
+
 #define RED_STAB_SIZE	256
 #define RED_STAB_MASK	(RED_STAB_SIZE - 1)
 
@@ -101,10 +125,14 @@ struct red_stats {
 
 struct red_parms {
 	/* Parameters */
-	u32		qth_min;	/* Min avg length threshold: A scaled */
-	u32		qth_max;	/* Max avg length threshold: A scaled */
+	u32		qth_min;	/* Min avg length threshold: Wlog scaled */
+	u32		qth_max;	/* Max avg length threshold: Wlog scaled */
 	u32		Scell_max;
-	u32		Rmask;		/* Cached random mask, see red_rmask */
+	u32		max_P;		/* probability, [0 .. 1.0] 32 scaled */
+	u32		max_P_reciprocal; /* reciprocal_value(max_P / qth_delta) */
+	u32		qth_delta;	/* max_th - min_th */
+	u32		target_min;	/* min_th + 0.4*(max_th - min_th) */
+	u32		target_max;	/* min_th + 0.6*(max_th - min_th) */
 	u8		Scell_log;
 	u8		Wlog;		/* log(W)		*/
 	u8		Plog;		/* random number bits	*/
@@ -115,19 +143,22 @@ struct red_parms {
 					   number generation */
 	u32		qR;		/* Cached random number */
 
-	unsigned long	qavg;		/* Average queue length: A scaled */
+	unsigned long	qavg;		/* Average queue length: Wlog scaled */
 	ktime_t		qidlestart;	/* Start of current idle period */
 };
 
-static inline u32 red_rmask(u8 Plog)
+static inline u32 red_maxp(u8 Plog)
 {
-	return Plog < 32 ? ((1 << Plog) - 1) : ~0UL;
+	return Plog < 32 ? (~0U >> Plog) : ~0U;
 }
 
+
 static inline void red_set_parms(struct red_parms *p,
 				 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
 				 u8 Scell_log, u8 *stab)
 {
+	int delta = qth_max - qth_min;
+
 	/* Reset average queue length, the value is strictly bound
 	 * to the parameters below, reseting hurts a bit but leaving
 	 * it might result in an unreasonable qavg for a while. --TGR
@@ -139,14 +170,29 @@ static inline void red_set_parms(struct red_parms *p,
 	p->qth_max	= qth_max << Wlog;
 	p->Wlog		= Wlog;
 	p->Plog		= Plog;
-	p->Rmask	= red_rmask(Plog);
+	if (delta < 0)
+		delta = 1;
+	p->qth_delta	= delta;
+	p->max_P	= red_maxp(Plog);
+	p->max_P	*= delta; /* max_P = (qth_max-qth_min)/2^Plog */
+
+	p->max_P_reciprocal  = reciprocal_value(p->max_P / delta);
+
+	/* RED Adaptative target :
+	 * [min_th + 0.4*(min_th - max_th),
+	 *  min_th + 0.6*(min_th - max_th)].
+	 */
+	delta /= 5;
+	p->target_min = qth_min + 2*delta;
+	p->target_max = qth_min + 3*delta;
+
 	p->Scell_log	= Scell_log;
 	p->Scell_max	= (255 << Scell_log);
 
 	memcpy(p->Stab, stab, sizeof(p->Stab));
 }
 
-static inline int red_is_idling(struct red_parms *p)
+static inline int red_is_idling(const struct red_parms *p)
 {
 	return p->qidlestart.tv64 != 0;
 }
@@ -168,7 +214,7 @@ static inline void red_restart(struct red_parms *p)
 	p->qcount = -1;
 }
 
-static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
+static inline unsigned long red_calc_qavg_from_idle_time(const struct red_parms *p)
 {
 	s64 delta = ktime_us_delta(ktime_get(), p->qidlestart);
 	long us_idle = min_t(s64, delta, p->Scell_max);
@@ -215,7 +261,7 @@ static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
 	}
 }
 
-static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p,
+static inline unsigned long red_calc_qavg_no_idle_time(const struct red_parms *p,
 						       unsigned int backlog)
 {
 	/*
@@ -230,7 +276,7 @@ static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p,
 	return p->qavg + (backlog - (p->qavg >> p->Wlog));
 }
 
-static inline unsigned long red_calc_qavg(struct red_parms *p,
+static inline unsigned long red_calc_qavg(const struct red_parms *p,
 					  unsigned int backlog)
 {
 	if (!red_is_idling(p))
@@ -239,23 +285,24 @@ static inline unsigned long red_calc_qavg(struct red_parms *p,
 		return red_calc_qavg_from_idle_time(p);
 }
 
-static inline u32 red_random(struct red_parms *p)
+
+static inline u32 red_random(const struct red_parms *p)
 {
-	return net_random() & p->Rmask;
+	return reciprocal_divide(net_random(), p->max_P_reciprocal);
 }
 
-static inline int red_mark_probability(struct red_parms *p, unsigned long qavg)
+static inline int red_mark_probability(const struct red_parms *p, unsigned long qavg)
 {
 	/* The formula used below causes questions.
 
-	   OK. qR is random number in the interval 0..Rmask
+	   OK. qR is random number in the interval
+		(0..1/max_P)*(qth_max-qth_min)
 	   i.e. 0..(2^Plog). If we used floating point
 	   arithmetics, it would be: (2^Plog)*rnd_num,
 	   where rnd_num is less 1.
 
 	   Taking into account, that qavg have fixed
-	   point at Wlog, and Plog is related to max_P by
-	   max_P = (qth_max-qth_min)/2^Plog; two lines
+	   point at Wlog, two lines
 	   below have the following floating point equivalent:
 
 	   max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount
@@ -315,4 +362,24 @@ static inline int red_action(struct red_parms *p, unsigned long qavg)
 	return RED_DONT_MARK;
 }
 
+static inline void red_adaptative_algo(struct red_parms *p)
+{
+	unsigned long qavg;
+	u32 max_p_delta;
+
+	qavg = p->qavg;
+	if (red_is_idling(p))
+		qavg = red_calc_qavg_from_idle_time(p);
+
+	/* p->qavg is fixed point number with point at Wlog */
+	qavg >>= p->Wlog;
+
+	if (qavg > p->target_max && p->max_P <= MAX_P_MAX)
+		p->max_P += MAX_P_ALPHA(p->max_P); /* maxp = maxp + alpha */
+	else if (qavg < p->target_min && p->max_P >= MAX_P_MIN)
+		p->max_P = (p->max_P/10)*9; /* maxp = maxp * Beta */
+
+	max_p_delta = DIV_ROUND_CLOSEST(p->max_P, p->qth_delta);
+	p->max_P_reciprocal = reciprocal_value(max_p_delta);
+}
 #endif
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index 6a3bd48fa2a0..75510e94f7d0 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,5 +1,6 @@
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
+#include <linux/export.h>
 
 u32 reciprocal_value(u32 k)
 {
@@ -7,3 +8,4 @@ u32 reciprocal_value(u32 k)
 	do_div(val, k);
 	return (u32)val;
 }
+EXPORT_SYMBOL(reciprocal_value);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index d617161f8dd3..8f5a85bf9d10 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -39,6 +39,7 @@
 struct red_sched_data {
 	u32			limit;		/* HARD maximal queue length */
 	unsigned char		flags;
+	struct timer_list	adapt_timer;
 	struct red_parms	parms;
 	struct red_stats	stats;
 	struct Qdisc		*qdisc;
@@ -161,6 +162,8 @@ static void red_reset(struct Qdisc *sch)
 static void red_destroy(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
+
+	del_timer_sync(&q->adapt_timer);
 	qdisc_destroy(q->qdisc);
 }
 
@@ -209,6 +212,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 				 ctl->Plog, ctl->Scell_log,
 				 nla_data(tb[TCA_RED_STAB]));
 
+	del_timer(&q->adapt_timer);
+	if (ctl->flags & TC_RED_ADAPTATIVE)
+		mod_timer(&q->adapt_timer, jiffies + HZ/2);
+
 	if (!q->qdisc->q.qlen)
 		red_start_of_idle_period(&q->parms);
 
@@ -216,11 +223,24 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	return 0;
 }
 
+static inline void red_adaptative_timer(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	struct red_sched_data *q = qdisc_priv(sch);
+	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+
+	spin_lock(root_lock);
+	red_adaptative_algo(&q->parms);
+	mod_timer(&q->adapt_timer, jiffies + HZ/2);
+	spin_unlock(root_lock);
+}
+
 static int red_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
 	q->qdisc = &noop_qdisc;
+	setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
 	return red_change(sch, opt);
 }
 
@@ -243,6 +263,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 	NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+	NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P);
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-- 
cgit v1.2.3


From eda2030a5b60bb818f062adacbcfb6fd2d366fb9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Thu, 8 Dec 2011 22:58:54 +0900
Subject: sh: extend clock struct with mapped_reg member

Add a "mapped_reg" member to struct clk and use that
to keep the ioremapped register based on enable_reg.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/clk/core.c  | 9 +++++++--
 include/linux/sh_clk.h | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c
index db257a35e71a..7715de2629c1 100644
--- a/drivers/sh/clk/core.c
+++ b/drivers/sh/clk/core.c
@@ -355,7 +355,7 @@ static int clk_establish_mapping(struct clk *clk)
 		 */
 		if (!clk->parent) {
 			clk->mapping = &dummy_mapping;
-			return 0;
+			goto out;
 		}
 
 		/*
@@ -384,6 +384,9 @@ static int clk_establish_mapping(struct clk *clk)
 	}
 
 	clk->mapping = mapping;
+out:
+	clk->mapped_reg = clk->mapping->base;
+	clk->mapped_reg += (phys_addr_t)clk->enable_reg - clk->mapping->phys;
 	return 0;
 }
 
@@ -402,10 +405,12 @@ static void clk_teardown_mapping(struct clk *clk)
 
 	/* Nothing to do */
 	if (mapping == &dummy_mapping)
-		return;
+		goto out;
 
 	kref_put(&mapping->ref, clk_destroy_mapping);
 	clk->mapping = NULL;
+out:
+	clk->mapped_reg = NULL;
 }
 
 int clk_register(struct clk *clk)
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index e834304c0b6a..54341d811685 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -49,6 +49,7 @@ struct clk {
 
 	void __iomem		*enable_reg;
 	unsigned int		enable_bit;
+	void __iomem		*mapped_reg;
 
 	unsigned long		arch_flags;
 	void			*priv;
-- 
cgit v1.2.3


From b0e10211cba1629e2e534ca9cb3d87cfc7e389ea Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 9 Dec 2011 12:14:27 +0900
Subject: sh: pfc: ioremap() support

Add support for non-entity mapped PFC registers through
the use of struct resource and ioremap()/iounmap().

The PFC main data structure gets updated with a pointer
to a struct resources array that point out all register
windows used by the PFC instance. The register definitions
are kept as physical addresses but the PFC code will do
transparent conversion into virtual addresses whenever
register windows are specified using with struct resource.

To introduce as little performance penalty as possible the
virtual address of each data register is cached in memory.
The virtual address of each configuration register is however
calculated during run time. This because the configuration
is considered slow path so focus is instead put on keeping
memory foot print as small as possible.

The PFC register access  code is in this patch updated from
__raw_readN() / __raw_writeN() into ioreadN() / iowriteN().

This patch is needed to support the PFC block in r8a7779.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/pfc.c       | 137 ++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sh_pfc.h |  11 ++++
 2 files changed, 124 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/pfc.c b/drivers/sh/pfc.c
index e67fe170d8d5..e7d127a9c1c5 100644
--- a/drivers/sh/pfc.c
+++ b/drivers/sh/pfc.c
@@ -19,6 +19,75 @@
 #include <linux/irq.h>
 #include <linux/bitops.h>
 #include <linux/gpio.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+
+static void pfc_iounmap(struct pinmux_info *pip)
+{
+	int k;
+
+	for (k = 0; k < pip->num_resources; k++)
+		if (pip->window[k].virt)
+			iounmap(pip->window[k].virt);
+
+	kfree(pip->window);
+	pip->window = NULL;
+}
+
+static int pfc_ioremap(struct pinmux_info *pip)
+{
+	struct resource *res;
+	int k;
+
+	if (!pip->num_resources)
+		return 0;
+
+	pip->window = kzalloc(pip->num_resources * sizeof(*pip->window),
+			      GFP_NOWAIT);
+	if (!pip->window)
+		goto err1;
+
+	for (k = 0; k < pip->num_resources; k++) {
+		res = pip->resource + k;
+		WARN_ON(resource_type(res) != IORESOURCE_MEM);
+		pip->window[k].phys = res->start;
+		pip->window[k].size = resource_size(res);
+		pip->window[k].virt = ioremap_nocache(res->start,
+							 resource_size(res));
+		if (!pip->window[k].virt)
+			goto err2;
+	}
+
+	return 0;
+
+err2:
+	pfc_iounmap(pip);
+err1:
+	return -1;
+}
+
+static void __iomem *pfc_phys_to_virt(struct pinmux_info *pip,
+				      unsigned long address)
+{
+	struct pfc_window *window;
+	int k;
+
+	/* scan through physical windows and convert address */
+	for (k = 0; k < pip->num_resources; k++) {
+		window = pip->window + k;
+
+		if (address < window->phys)
+			continue;
+
+		if (address >= (window->phys + window->size))
+			continue;
+
+		return window->virt + (address - window->phys);
+	}
+
+	/* no windows defined, register must be 1:1 mapped virt:phys */
+	return (void __iomem *)address;
+}
 
 static int enum_in_range(pinmux_enum_t enum_id, struct pinmux_range *r)
 {
@@ -31,35 +100,35 @@ static int enum_in_range(pinmux_enum_t enum_id, struct pinmux_range *r)
 	return 1;
 }
 
-static unsigned long gpio_read_raw_reg(unsigned long reg,
+static unsigned long gpio_read_raw_reg(void __iomem *mapped_reg,
 				       unsigned long reg_width)
 {
 	switch (reg_width) {
 	case 8:
-		return __raw_readb(reg);
+		return ioread8(mapped_reg);
 	case 16:
-		return __raw_readw(reg);
+		return ioread16(mapped_reg);
 	case 32:
-		return __raw_readl(reg);
+		return ioread32(mapped_reg);
 	}
 
 	BUG();
 	return 0;
 }
 
-static void gpio_write_raw_reg(unsigned long reg,
+static void gpio_write_raw_reg(void __iomem *mapped_reg,
 			       unsigned long reg_width,
 			       unsigned long data)
 {
 	switch (reg_width) {
 	case 8:
-		__raw_writeb(data, reg);
+		iowrite8(data, mapped_reg);
 		return;
 	case 16:
-		__raw_writew(data, reg);
+		iowrite16(data, mapped_reg);
 		return;
 	case 32:
-		__raw_writel(data, reg);
+		iowrite32(data, mapped_reg);
 		return;
 	}
 
@@ -82,11 +151,12 @@ static void gpio_write_bit(struct pinmux_data_reg *dr,
 	else
 		clear_bit(pos, &dr->reg_shadow);
 
-	gpio_write_raw_reg(dr->reg, dr->reg_width, dr->reg_shadow);
+	gpio_write_raw_reg(dr->mapped_reg, dr->reg_width, dr->reg_shadow);
 }
 
-static int gpio_read_reg(unsigned long reg, unsigned long reg_width,
-			 unsigned long field_width, unsigned long in_pos)
+static int gpio_read_reg(void __iomem *mapped_reg, unsigned long reg_width,
+			 unsigned long field_width, unsigned long in_pos,
+			 unsigned long reg)
 {
 	unsigned long data, mask, pos;
 
@@ -98,13 +168,13 @@ static int gpio_read_reg(unsigned long reg, unsigned long reg_width,
 		 "r_width = %ld, f_width = %ld\n",
 		 reg, pos, reg_width, field_width);
 
-	data = gpio_read_raw_reg(reg, reg_width);
+	data = gpio_read_raw_reg(mapped_reg, reg_width);
 	return (data >> pos) & mask;
 }
 
-static void gpio_write_reg(unsigned long reg, unsigned long reg_width,
+static void gpio_write_reg(void __iomem *mapped_reg, unsigned long reg_width,
 			   unsigned long field_width, unsigned long in_pos,
-			   unsigned long value)
+			   unsigned long value, unsigned long reg)
 {
 	unsigned long mask, pos;
 
@@ -120,13 +190,13 @@ static void gpio_write_reg(unsigned long reg, unsigned long reg_width,
 
 	switch (reg_width) {
 	case 8:
-		__raw_writeb((__raw_readb(reg) & mask) | value, reg);
+		iowrite8((ioread8(mapped_reg) & mask) | value, mapped_reg);
 		break;
 	case 16:
-		__raw_writew((__raw_readw(reg) & mask) | value, reg);
+		iowrite16((ioread16(mapped_reg) & mask) | value, mapped_reg);
 		break;
 	case 32:
-		__raw_writel((__raw_readl(reg) & mask) | value, reg);
+		iowrite32((ioread32(mapped_reg) & mask) | value, mapped_reg);
 		break;
 	}
 }
@@ -147,6 +217,8 @@ static int setup_data_reg(struct pinmux_info *gpioc, unsigned gpio)
 		if (!data_reg->reg_width)
 			break;
 
+		data_reg->mapped_reg = pfc_phys_to_virt(gpioc, data_reg->reg);
+
 		for (n = 0; n < data_reg->reg_width; n++) {
 			if (data_reg->enum_ids[n] == gpiop->enum_id) {
 				gpiop->flags &= ~PINMUX_FLAG_DREG;
@@ -179,7 +251,8 @@ static void setup_data_regs(struct pinmux_info *gpioc)
 		if (!drp->reg_width)
 			break;
 
-		drp->reg_shadow = gpio_read_raw_reg(drp->reg, drp->reg_width);
+		drp->reg_shadow = gpio_read_raw_reg(drp->mapped_reg,
+						    drp->reg_width);
 		k++;
 	}
 }
@@ -266,12 +339,16 @@ static void write_config_reg(struct pinmux_info *gpioc,
 			     int index)
 {
 	unsigned long ncomb, pos, value;
+	void __iomem *mapped_reg;
 
 	ncomb = 1 << crp->field_width;
 	pos = index / ncomb;
 	value = index % ncomb;
 
-	gpio_write_reg(crp->reg, crp->reg_width, crp->field_width, pos, value);
+	mapped_reg = pfc_phys_to_virt(gpioc, crp->reg);
+
+	gpio_write_reg(mapped_reg, crp->reg_width, crp->field_width,
+		       pos, value, crp->reg);
 }
 
 static int check_config_reg(struct pinmux_info *gpioc,
@@ -279,13 +356,16 @@ static int check_config_reg(struct pinmux_info *gpioc,
 			    int index)
 {
 	unsigned long ncomb, pos, value;
+	void __iomem *mapped_reg;
 
 	ncomb = 1 << crp->field_width;
 	pos = index / ncomb;
 	value = index % ncomb;
 
-	if (gpio_read_reg(crp->reg, crp->reg_width,
-			  crp->field_width, pos) == value)
+	mapped_reg = pfc_phys_to_virt(gpioc, crp->reg);
+
+	if (gpio_read_reg(mapped_reg, crp->reg_width,
+			  crp->field_width, pos, crp->reg) == value)
 		return 0;
 
 	return -1;
@@ -564,7 +644,7 @@ static int sh_gpio_get_value(struct pinmux_info *gpioc, unsigned gpio)
 	if (!gpioc || get_data_reg(gpioc, gpio, &dr, &bit) != 0)
 		return -EINVAL;
 
-	return gpio_read_reg(dr->reg, dr->reg_width, 1, bit);
+	return gpio_read_reg(dr->mapped_reg, dr->reg_width, 1, bit, dr->reg);
 }
 
 static int sh_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -606,10 +686,15 @@ static int sh_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
 int register_pinmux(struct pinmux_info *pip)
 {
 	struct gpio_chip *chip = &pip->chip;
+	int ret;
 
 	pr_info("%s handling gpio %d -> %d\n",
 		pip->name, pip->first_gpio, pip->last_gpio);
 
+	ret = pfc_ioremap(pip);
+	if (ret < 0)
+		return ret;
+
 	setup_data_regs(pip);
 
 	chip->request = sh_gpio_request;
@@ -627,12 +712,16 @@ int register_pinmux(struct pinmux_info *pip)
 	chip->base = pip->first_gpio;
 	chip->ngpio = (pip->last_gpio - pip->first_gpio) + 1;
 
-	return gpiochip_add(chip);
+	ret = gpiochip_add(chip);
+	if (ret < 0)
+		pfc_iounmap(pip);
+
+	return ret;
 }
 
 int unregister_pinmux(struct pinmux_info *pip)
 {
 	pr_info("%s deregistering\n", pip->name);
-
+	pfc_iounmap(pip);
 	return gpiochip_remove(&pip->chip);
 }
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 8446789216e5..91666a58529d 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -55,6 +55,7 @@ struct pinmux_cfg_reg {
 struct pinmux_data_reg {
 	unsigned long reg, reg_width, reg_shadow;
 	pinmux_enum_t *enum_ids;
+	void __iomem *mapped_reg;
 };
 
 #define PINMUX_DATA_REG(name, r, r_width) \
@@ -75,6 +76,12 @@ struct pinmux_range {
 	pinmux_enum_t force;
 };
 
+struct pfc_window {
+	phys_addr_t phys;
+	void __iomem *virt;
+	unsigned long size;
+};
+
 struct pinmux_info {
 	char *name;
 	pinmux_enum_t reserved_id;
@@ -98,6 +105,10 @@ struct pinmux_info {
 	struct pinmux_irq *gpio_irq;
 	unsigned int gpio_irq_size;
 
+	struct resource *resource;
+	unsigned int num_resources;
+	struct pfc_window *window;
+
 	struct gpio_chip chip;
 };
 
-- 
cgit v1.2.3


From 83aeeada7c69f35e5100b27ec354335597a7a488 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Thu, 8 Dec 2011 14:33:54 -0800
Subject: vmscan: use atomic-long for shrinker batching

Use atomic-long operations instead of looping around cmpxchg().

[akpm@linux-foundation.org: massage atomic.h inclusions]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h       |  2 +-
 include/linux/mm.h       |  1 +
 include/linux/shrinker.h |  2 +-
 mm/vmscan.c              | 17 +++++++----------
 4 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 019dc558df1a..e0bc4ffb8e7f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -393,8 +393,8 @@ struct inodes_stat_t {
 #include <linux/semaphore.h>
 #include <linux/fiemap.h>
 #include <linux/rculist_bl.h>
-#include <linux/shrinker.h>
 #include <linux/atomic.h>
+#include <linux/shrinker.h>
 
 #include <asm/byteorder.h>
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3dc3a8c2c485..4baadd18f4ad 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -10,6 +10,7 @@
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
 #include <linux/prio_tree.h>
+#include <linux/atomic.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
 #include <linux/range.h>
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index a83833a1f7a2..07ceb97d53fa 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -35,7 +35,7 @@ struct shrinker {
 
 	/* These are for internal use */
 	struct list_head list;
-	long nr;	/* objs pending delete */
+	atomic_long_t nr_in_batch; /* objs pending delete */
 };
 #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
 extern void register_shrinker(struct shrinker *);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f5255442ae2b..f54a05b7a61d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone,
  */
 void register_shrinker(struct shrinker *shrinker)
 {
-	shrinker->nr = 0;
+	atomic_long_set(&shrinker->nr_in_batch, 0);
 	down_write(&shrinker_rwsem);
 	list_add_tail(&shrinker->list, &shrinker_list);
 	up_write(&shrinker_rwsem);
@@ -264,9 +264,7 @@ unsigned long shrink_slab(struct shrink_control *shrink,
 		 * and zero it so that other concurrent shrinker invocations
 		 * don't also do this scanning work.
 		 */
-		do {
-			nr = shrinker->nr;
-		} while (cmpxchg(&shrinker->nr, nr, 0) != nr);
+		nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
 
 		total_scan = nr;
 		delta = (4 * nr_pages_scanned) / shrinker->seeks;
@@ -328,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink,
 		 * manner that handles concurrent updates. If we exhausted the
 		 * scan, there is no need to do an update.
 		 */
-		do {
-			nr = shrinker->nr;
-			new_nr = total_scan + nr;
-			if (total_scan <= 0)
-				break;
-		} while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
+		if (total_scan > 0)
+			new_nr = atomic_long_add_return(total_scan,
+					&shrinker->nr_in_batch);
+		else
+			new_nr = atomic_long_read(&shrinker->nr_in_batch);
 
 		trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
 	}
-- 
cgit v1.2.3


From 25a0bc2dfc2ea732f40af2dae52426ead66ae76e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 7 Dec 2011 11:24:20 -0800
Subject: power_supply: add SCOPE attribute to power supplies

This adds a "scope" attribute to a power_supply, which indicates how
much of the system it powers.  It appears in sysfs as "scope" or in
the uevent file as POWER_SUPPLY_SCOPE=.  There are presently three
possible values:
	Unknown - unknown power topology
	System - the power supply powers the whole system
	Device - it powers a specific device, or tree of devices

A power supply which doesn't have a "scope" attribute should be assumed to
have "System" scope.

In general, usermode should assume that loss of all System-scoped power
supplies will power off the whole system, but any single one is sufficient
to power the system.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Richard Hughes <richard@hughsie.com>
---
 drivers/power/power_supply_sysfs.c | 6 ++++++
 include/linux/power_supply.h       | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index e15d4c9d3988..21178ebfe51a 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -63,6 +63,9 @@ static ssize_t power_supply_show_property(struct device *dev,
 	static char *capacity_level_text[] = {
 		"Unknown", "Critical", "Low", "Normal", "High", "Full"
 	};
+	static char *scope_text[] = {
+		"Unknown", "System", "Device"
+	};
 	ssize_t ret = 0;
 	struct power_supply *psy = dev_get_drvdata(dev);
 	const ptrdiff_t off = attr - power_supply_attrs;
@@ -95,6 +98,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 		return sprintf(buf, "%s\n", capacity_level_text[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_TYPE)
 		return sprintf(buf, "%s\n", type_text[value.intval]);
+	else if (off == POWER_SUPPLY_PROP_SCOPE)
+		return sprintf(buf, "%s\n", scope_text[value.intval]);
 	else if (off >= POWER_SUPPLY_PROP_MODEL_NAME)
 		return sprintf(buf, "%s\n", value.strval);
 
@@ -167,6 +172,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(time_to_full_now),
 	POWER_SUPPLY_ATTR(time_to_full_avg),
 	POWER_SUPPLY_ATTR(type),
+	POWER_SUPPLY_ATTR(scope),
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_ATTR(model_name),
 	POWER_SUPPLY_ATTR(manufacturer),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 204c18dfdc9e..040a7b08e7c7 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -74,6 +74,12 @@ enum {
 	POWER_SUPPLY_CAPACITY_LEVEL_FULL,
 };
 
+enum {
+	POWER_SUPPLY_SCOPE_UNKNOWN = 0,
+	POWER_SUPPLY_SCOPE_SYSTEM,
+	POWER_SUPPLY_SCOPE_DEVICE,
+};
+
 enum power_supply_property {
 	/* Properties of type `int' */
 	POWER_SUPPLY_PROP_STATUS = 0,
@@ -116,6 +122,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
 	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
 	POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */
+	POWER_SUPPLY_PROP_SCOPE,
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
-- 
cgit v1.2.3


From 8351665195cec6d2b73cce8b66f02d6dde246a8e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 7 Dec 2011 09:15:45 -0800
Subject: power_supply: allow a power supply to explicitly point to powered
 device

If a power supply has a scope of "Device", then allow the power supply
to indicate what device it actually powers. This is represented in the
power supply's sysfs directory as a symlink named "powers", which points to
the sysfs directory of the powered device.

If the device has children, then the sub-devices are also powered by
the same power supply.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Richard Hughes <richard@hughsie.com>
---
 drivers/power/power_supply_core.c | 7 +++++++
 include/linux/power_supply.h      | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 329b46b2327d..b10c121244e5 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -147,6 +147,12 @@ struct power_supply *power_supply_get_by_name(char *name)
 }
 EXPORT_SYMBOL_GPL(power_supply_get_by_name);
 
+int power_supply_powers(struct power_supply *psy, struct device *dev)
+{
+	return sysfs_create_link_nowarn(&psy->dev->kobj, &dev->kobj, "powers");
+}
+EXPORT_SYMBOL_GPL(power_supply_powers);
+
 static void power_supply_dev_release(struct device *dev)
 {
 	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
@@ -202,6 +208,7 @@ EXPORT_SYMBOL_GPL(power_supply_register);
 void power_supply_unregister(struct power_supply *psy)
 {
 	cancel_work_sync(&psy->changed_work);
+	sysfs_remove_link(&psy->dev->kobj, "powers");
 	power_supply_remove_triggers(psy);
 	device_unregister(psy->dev);
 }
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 040a7b08e7c7..2e3c8279b3b0 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -218,6 +218,7 @@ static inline int power_supply_is_system_supplied(void) { return -ENOSYS; }
 extern int power_supply_register(struct device *parent,
 				 struct power_supply *psy);
 extern void power_supply_unregister(struct power_supply *psy);
+extern int power_supply_powers(struct power_supply *psy, struct device *dev);
 
 /* For APM emulation, think legacy userspace. */
 extern struct class *power_supply_class;
-- 
cgit v1.2.3


From a73ed26bbae7327370c5bd298f07de78df9e3466 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 9 Dec 2011 02:46:45 +0000
Subject: sch_red: generalize accurate MAX_P support to RED/GRED/CHOKE

Now RED uses a Q0.32 number to store max_p (max probability), allow
RED/GRED/CHOKE to use/report full resolution at config/dump time.

Old tc binaries are non aware of new attributes, and still set/get Plog.

New tc binary set/get both Plog and max_p for backward compatibility,
they display "probability value" if they get max_p from new kernels.

# tc -d  qdisc show dev ...
...
qdisc red 10: parent 1:1 limit 360Kb min 30Kb max 90Kb ecn ewma 5
probability 0.09 Scell_log 15

Make sure we avoid potential divides by 0 in reciprocal_value(), if
(max_th - min_th) is big.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  2 ++
 include/net/red.h         | 16 +++++++++++-----
 net/sched/sch_choke.c     |  8 +++++++-
 net/sched/sch_gred.c      | 22 ++++++++++++++++++----
 net/sched/sch_red.c       |  9 +++++++--
 5 files changed, 45 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index e41e0d4de24b..8786ea741f52 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -216,6 +216,7 @@ enum {
        TCA_GRED_PARMS,
        TCA_GRED_STAB,
        TCA_GRED_DPS,
+       TCA_GRED_MAX_P,
 	   __TCA_GRED_MAX,
 };
 
@@ -255,6 +256,7 @@ enum {
 	TCA_CHOKE_UNSPEC,
 	TCA_CHOKE_PARMS,
 	TCA_CHOKE_STAB,
+	TCA_CHOKE_MAX_P,
 	__TCA_CHOKE_MAX,
 };
 
diff --git a/include/net/red.h b/include/net/red.h
index 24606b22d01e..ef715a16cce4 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -155,9 +155,10 @@ static inline u32 red_maxp(u8 Plog)
 
 static inline void red_set_parms(struct red_parms *p,
 				 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
-				 u8 Scell_log, u8 *stab)
+				 u8 Scell_log, u8 *stab, u32 max_P)
 {
 	int delta = qth_max - qth_min;
+	u32 max_p_delta;
 
 	/* Reset average queue length, the value is strictly bound
 	 * to the parameters below, reseting hurts a bit but leaving
@@ -173,10 +174,14 @@ static inline void red_set_parms(struct red_parms *p,
 	if (delta < 0)
 		delta = 1;
 	p->qth_delta	= delta;
-	p->max_P	= red_maxp(Plog);
-	p->max_P	*= delta; /* max_P = (qth_max-qth_min)/2^Plog */
-
-	p->max_P_reciprocal  = reciprocal_value(p->max_P / delta);
+	if (!max_P) {
+		max_P = red_maxp(Plog);
+		max_P *= delta; /* max_P = (qth_max - qth_min)/2^Plog */
+	}
+	p->max_P = max_P;
+	max_p_delta = max_P / delta;
+	max_p_delta = max(max_p_delta, 1U);
+	p->max_P_reciprocal  = reciprocal_value(max_p_delta);
 
 	/* RED Adaptative target :
 	 * [min_th + 0.4*(min_th - max_th),
@@ -380,6 +385,7 @@ static inline void red_adaptative_algo(struct red_parms *p)
 		p->max_P = (p->max_P/10)*9; /* maxp = maxp * Beta */
 
 	max_p_delta = DIV_ROUND_CLOSEST(p->max_P, p->qth_delta);
+	max_p_delta = max(max_p_delta, 1U);
 	p->max_P_reciprocal = reciprocal_value(max_p_delta);
 }
 #endif
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 205d369a217c..bef00acb8bd2 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -394,6 +394,7 @@ static void choke_reset(struct Qdisc *sch)
 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
 	[TCA_CHOKE_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 	[TCA_CHOKE_STAB]	= { .len = RED_STAB_SIZE },
+	[TCA_CHOKE_MAX_P]	= { .type = NLA_U32 },
 };
 
 
@@ -415,6 +416,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 	int err;
 	struct sk_buff **old = NULL;
 	unsigned int mask;
+	u32 max_P;
 
 	if (opt == NULL)
 		return -EINVAL;
@@ -427,6 +429,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 	    tb[TCA_CHOKE_STAB] == NULL)
 		return -EINVAL;
 
+	max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
+
 	ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 
 	if (ctl->limit > CHOKE_MAX_QUEUE)
@@ -476,7 +480,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 
 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 		      ctl->Plog, ctl->Scell_log,
-		      nla_data(tb[TCA_CHOKE_STAB]));
+		      nla_data(tb[TCA_CHOKE_STAB]),
+		      max_P);
 
 	if (q->head == q->tail)
 		red_end_of_idle_period(&q->parms);
@@ -510,6 +515,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
+	NLA_PUT_U32(skb, TCA_CHOKE_MAX_P, q->parms.max_P);
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index b9493a09a870..a1b7407ac2a4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -34,7 +34,7 @@ struct gred_sched;
 
 struct gred_sched_data {
 	u32		limit;		/* HARD maximal queue length	*/
-	u32      	DP;		/* the drop pramaters */
+	u32		DP;		/* the drop parameters */
 	u32		bytesin;	/* bytes seen on virtualQ so far*/
 	u32		packetsin;	/* packets seen on virtualQ so far*/
 	u32		backlog;	/* bytes on the virtualQ */
@@ -379,7 +379,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 }
 
 static inline int gred_change_vq(struct Qdisc *sch, int dp,
-				 struct tc_gred_qopt *ctl, int prio, u8 *stab)
+				 struct tc_gred_qopt *ctl, int prio,
+				 u8 *stab, u32 max_P)
 {
 	struct gred_sched *table = qdisc_priv(sch);
 	struct gred_sched_data *q;
@@ -400,7 +401,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 
 	red_set_parms(&q->parms,
 		      ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
-		      ctl->Scell_log, stab);
+		      ctl->Scell_log, stab, max_P);
 
 	return 0;
 }
@@ -409,6 +410,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
 	[TCA_GRED_PARMS]	= { .len = sizeof(struct tc_gred_qopt) },
 	[TCA_GRED_STAB]		= { .len = 256 },
 	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
+	[TCA_GRED_MAX_P]	= { .type = NLA_U32 },
 };
 
 static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -418,6 +420,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	struct nlattr *tb[TCA_GRED_MAX + 1];
 	int err, prio = GRED_DEF_PRIO;
 	u8 *stab;
+	u32 max_P;
 
 	if (opt == NULL)
 		return -EINVAL;
@@ -433,6 +436,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	    tb[TCA_GRED_STAB] == NULL)
 		return -EINVAL;
 
+	max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
+
 	err = -EINVAL;
 	ctl = nla_data(tb[TCA_GRED_PARMS]);
 	stab = nla_data(tb[TCA_GRED_STAB]);
@@ -457,7 +462,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 
 	sch_tree_lock(sch);
 
-	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab);
+	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P);
 	if (err < 0)
 		goto errout_locked;
 
@@ -498,6 +503,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct gred_sched *table = qdisc_priv(sch);
 	struct nlattr *parms, *opts = NULL;
 	int i;
+	u32 max_p[MAX_DPs];
 	struct tc_gred_sopt sopt = {
 		.DPs	= table->DPs,
 		.def_DP	= table->def,
@@ -509,6 +515,14 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 	NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
+
+	for (i = 0; i < MAX_DPs; i++) {
+		struct gred_sched_data *q = table->tab[i];
+
+		max_p[i] = q ? q->parms.max_P : 0;
+	}
+	NLA_PUT(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p);
+
 	parms = nla_nest_start(skb, TCA_GRED_PARMS);
 	if (parms == NULL)
 		goto nla_put_failure;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 8f5a85bf9d10..ce2256a17d7e 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -170,6 +170,7 @@ static void red_destroy(struct Qdisc *sch)
 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
+	[TCA_RED_MAX_P] = { .type = NLA_U32 },
 };
 
 static int red_change(struct Qdisc *sch, struct nlattr *opt)
@@ -179,6 +180,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_red_qopt *ctl;
 	struct Qdisc *child = NULL;
 	int err;
+	u32 max_P;
 
 	if (opt == NULL)
 		return -EINVAL;
@@ -191,6 +193,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	    tb[TCA_RED_STAB] == NULL)
 		return -EINVAL;
 
+	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
+
 	ctl = nla_data(tb[TCA_RED_PARMS]);
 
 	if (ctl->limit > 0) {
@@ -209,8 +213,9 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
-				 ctl->Plog, ctl->Scell_log,
-				 nla_data(tb[TCA_RED_STAB]));
+		      ctl->Plog, ctl->Scell_log,
+		      nla_data(tb[TCA_RED_STAB]),
+		      max_P);
 
 	del_timer(&q->adapt_timer);
 	if (ctl->flags & TC_RED_ADAPTATIVE)
-- 
cgit v1.2.3


From 7b35eadd7eee2e0b42421ce3efbc30f1c3c745e5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:21:16 +0000
Subject: inet_diag: Remove indirect sizeof from inet diag handlers

There's an info_size value stored on inet_diag_handler, but for existing
code this value is effectively constant, so just use sizeof(struct tcp_info)
where required.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 1 -
 net/dccp/diag.c           | 1 -
 net/ipv4/inet_diag.c      | 5 ++---
 net/ipv4/tcp_diag.c       | 1 -
 4 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index defe8ff36df8..851feff0747f 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -141,7 +141,6 @@ struct inet_diag_handler {
 	void			(*idiag_get_info)(struct sock *sk,
 						  struct inet_diag_msg *r,
 						  void *info);
-	__u16                   idiag_info_size;
 	__u16                   idiag_type;
 };
 
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 424dcd8415d7..9343f52db284 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -52,7 +52,6 @@ static const struct inet_diag_handler dccp_diag_handler = {
 	.idiag_hashinfo	 = &dccp_hashinfo,
 	.idiag_get_info	 = dccp_diag_get_info,
 	.idiag_type	 = IPPROTO_DCCP,
-	.idiag_info_size = sizeof(struct tcp_info),
 };
 
 static int __init dccp_diag_init(void)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b56b7ba8beeb..a247f85571c4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -98,8 +98,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 		minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
 
 	if (ext & (1 << (INET_DIAG_INFO - 1)))
-		info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
-				     handler->idiag_info_size);
+		info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info));
 
 	if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
 		const size_t len = strlen(icsk->icsk_ca_ops->name);
@@ -299,7 +298,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	err = -ENOMEM;
 	rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
 				     sizeof(struct inet_diag_meminfo) +
-				     handler->idiag_info_size + 64)),
+				     sizeof(struct tcp_info) + 64)),
 			GFP_KERNEL);
 	if (!rep)
 		goto out;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 981497795d49..42e6bec7bd3e 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -38,7 +38,6 @@ static const struct inet_diag_handler tcp_diag_handler = {
 	.idiag_hashinfo	 = &tcp_hashinfo,
 	.idiag_get_info	 = tcp_diag_get_info,
 	.idiag_type	 = IPPROTO_TCP,
-	.idiag_info_size = sizeof(struct tcp_info),
 };
 
 static int __init tcp_diag_init(void)
-- 
cgit v1.2.3


From b005ab4ef8805dc4604848c9d2ccca9d71f8fc46 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:21:53 +0000
Subject: inet_diag: Export inet diag cookie checking routine

The netlink diag susbsys stores sk address bits in the nl message
as a "cookie" and uses one when dumps details about particular
socket.

The same will be required for udp diag module, so introduce a heler
in inet_diag module

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  2 ++
 net/ipv4/inet_diag.c      | 19 ++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 851feff0747f..503674738368 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -144,6 +144,8 @@ struct inet_diag_handler {
 	__u16                   idiag_type;
 };
 
+int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
+
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
 #endif /* __KERNEL__ */
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index bd3f661803a7..ba3ae1f73abf 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -246,6 +246,18 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 	return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh);
 }
 
+int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req)
+{
+	if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
+	     req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
+	    ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
+	     (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
+		return -ESTALE;
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(inet_diag_check_cookie);
+
 static int inet_diag_get_exact(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
 			       struct inet_diag_req *req)
@@ -288,11 +300,8 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	if (sk == NULL)
 		goto unlock;
 
-	err = -ESTALE;
-	if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
-	     req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
-	    ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
-	     (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
+	err = inet_diag_check_cookie(sk, req);
+	if (err)
 		goto out;
 
 	err = -ENOMEM;
-- 
cgit v1.2.3


From 8d07d1518a074a08b90be02eee5ee15e60ac9779 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:22:44 +0000
Subject: inet_diag: Introduce the byte-code run on an inet socket

The upcoming UDP module will require exactly this ability, so just
move the existing code to provide one.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  2 ++
 net/ipv4/inet_diag.c      | 55 ++++++++++++++++++++++++++---------------------
 2 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 503674738368..907c899bd41b 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -135,6 +135,7 @@ struct tcpvegas_info {
 #ifdef __KERNEL__
 struct sock;
 struct inet_hashinfo;
+struct nlattr;
 
 struct inet_diag_handler {
 	struct inet_hashinfo    *idiag_hashinfo;
@@ -144,6 +145,7 @@ struct inet_diag_handler {
 	__u16                   idiag_type;
 };
 
+int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f50df2ed9af5..08e54989b041 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -449,6 +449,35 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
 	return len == 0;
 }
 
+int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
+{
+	struct inet_diag_entry entry;
+	struct inet_sock *inet = inet_sk(sk);
+
+	if (bc == NULL)
+		return 1;
+
+	entry.family = sk->sk_family;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (entry.family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		entry.saddr = np->rcv_saddr.s6_addr32;
+		entry.daddr = np->daddr.s6_addr32;
+	} else
+#endif
+	{
+		entry.saddr = &inet->inet_rcv_saddr;
+		entry.daddr = &inet->inet_daddr;
+	}
+	entry.sport = inet->inet_num;
+	entry.dport = ntohs(inet->inet_dport);
+	entry.userlocks = sk->sk_userlocks;
+
+	return inet_diag_bc_run(bc, &entry);
+}
+EXPORT_SYMBOL_GPL(inet_diag_bc_sk);
+
 static int valid_cc(const void *bc, int len, int cc)
 {
 	while (len >= 0) {
@@ -509,30 +538,8 @@ static int inet_csk_diag_dump(struct sock *sk,
 			      struct inet_diag_req *r,
 			      const struct nlattr *bc)
 {
-	if (bc != NULL) {
-		struct inet_diag_entry entry;
-		struct inet_sock *inet = inet_sk(sk);
-
-		entry.family = sk->sk_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-		if (entry.family == AF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
-
-			entry.saddr = np->rcv_saddr.s6_addr32;
-			entry.daddr = np->daddr.s6_addr32;
-		} else
-#endif
-		{
-			entry.saddr = &inet->inet_rcv_saddr;
-			entry.daddr = &inet->inet_daddr;
-		}
-		entry.sport = inet->inet_num;
-		entry.dport = ntohs(inet->inet_dport);
-		entry.userlocks = sk->sk_userlocks;
-
-		if (!inet_diag_bc_run(bc, &entry))
-			return 0;
-	}
+	if (!inet_diag_bc_sk(bc, sk))
+		return 0;
 
 	return inet_csk_diag_fill(sk, skb, r,
 				  NETLINK_CB(cb->skb).pid,
-- 
cgit v1.2.3


From 3c4d05c8056724aff3abc20650807dd828fded54 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:23:00 +0000
Subject: inet_diag: Introduce the inet socket dumping routine

The existing inet_csk_diag_fill dumps the inet connection sock info
into the netlink inet_diag_message. Prepare this routine to be able
to dump only the inet_sock part of a socket if the icsk part is missing.

This will be used by UDP diag module when dumping UDP sockets.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  7 +++++++
 net/ipv4/inet_diag.c      | 53 ++++++++++++++++++++++++++++++-----------------
 2 files changed, 41 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 907c899bd41b..eaf5865c9e8a 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -136,6 +136,8 @@ struct tcpvegas_info {
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
+struct nlmsghdr;
+struct sk_buff;
 
 struct inet_diag_handler {
 	struct inet_hashinfo    *idiag_hashinfo;
@@ -145,6 +147,11 @@ struct inet_diag_handler {
 	__u16                   idiag_type;
 };
 
+struct inet_connection_sock;
+int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
+			      struct sk_buff *skb, struct inet_diag_req *req,
+			      u32 pid, u32 seq, u16 nlmsg_flags,
+			      const struct nlmsghdr *unlh);
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 08e54989b041..dc8611e3e66f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -70,13 +70,12 @@ static inline void inet_diag_unlock_handler(
 	mutex_unlock(&inet_diag_table_mutex);
 }
 
-static int inet_csk_diag_fill(struct sock *sk,
+int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			      struct sk_buff *skb, struct inet_diag_req *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
 	const struct inet_sock *inet = inet_sk(sk);
-	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_diag_msg *r;
 	struct nlmsghdr  *nlh;
 	void *info = NULL;
@@ -97,16 +96,6 @@ static int inet_csk_diag_fill(struct sock *sk,
 	if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
 		minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
 
-	if (ext & (1 << (INET_DIAG_INFO - 1)))
-		info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info));
-
-	if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
-		const size_t len = strlen(icsk->icsk_ca_ops->name);
-
-		strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
-		       icsk->icsk_ca_ops->name);
-	}
-
 	r->idiag_family = sk->sk_family;
 	r->idiag_state = sk->sk_state;
 	r->idiag_timer = 0;
@@ -138,6 +127,21 @@ static int inet_csk_diag_fill(struct sock *sk,
 	}
 #endif
 
+	r->idiag_uid = sock_i_uid(sk);
+	r->idiag_inode = sock_i_ino(sk);
+
+	if (minfo) {
+		minfo->idiag_rmem = sk_rmem_alloc_get(sk);
+		minfo->idiag_wmem = sk->sk_wmem_queued;
+		minfo->idiag_fmem = sk->sk_forward_alloc;
+		minfo->idiag_tmem = sk_wmem_alloc_get(sk);
+	}
+
+	if (icsk == NULL) {
+		r->idiag_rqueue = r->idiag_wqueue = 0;
+		goto out;
+	}
+
 #define EXPIRES_IN_MS(tmo)  DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
@@ -158,14 +162,14 @@ static int inet_csk_diag_fill(struct sock *sk,
 	}
 #undef EXPIRES_IN_MS
 
-	r->idiag_uid = sock_i_uid(sk);
-	r->idiag_inode = sock_i_ino(sk);
+	if (ext & (1 << (INET_DIAG_INFO - 1)))
+		info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info));
 
-	if (minfo) {
-		minfo->idiag_rmem = sk_rmem_alloc_get(sk);
-		minfo->idiag_wmem = sk->sk_wmem_queued;
-		minfo->idiag_fmem = sk->sk_forward_alloc;
-		minfo->idiag_tmem = sk_wmem_alloc_get(sk);
+	if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+		const size_t len = strlen(icsk->icsk_ca_ops->name);
+
+		strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+		       icsk->icsk_ca_ops->name);
 	}
 
 	handler->idiag_get_info(sk, r, info);
@@ -174,6 +178,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 	    icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
 		icsk->icsk_ca_ops->get_info(sk, ext, skb);
 
+out:
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
@@ -182,6 +187,16 @@ nlmsg_failure:
 	nlmsg_trim(skb, b);
 	return -EMSGSIZE;
 }
+EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
+
+static int inet_csk_diag_fill(struct sock *sk,
+			      struct sk_buff *skb, struct inet_diag_req *req,
+			      u32 pid, u32 seq, u16 nlmsg_flags,
+			      const struct nlmsghdr *unlh)
+{
+	return inet_sk_diag_fill(sk, inet_csk(sk),
+			skb, req, pid, seq, nlmsg_flags, unlh);
+}
 
 static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 			       struct sk_buff *skb, struct inet_diag_req *req,
-- 
cgit v1.2.3


From 1942c518ca017f376b267a7c5e78c15d37202442 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:23:18 +0000
Subject: inet_diag: Generalize inet_diag dump and get_exact calls

Introduce two callbacks in inet_diag_handler -- one for dumping all
sockets (with filters) and the other one for dumping a single sk.

Replace direct calls to icsk handlers with indirect calls to callbacks
provided by handlers.

Make existing TCP and DCCP handlers use provided helpers for icsk-s.

The UDP diag module will provide its own.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 18 +++++++++++++++++-
 net/dccp/diag.c           | 15 ++++++++++++++-
 net/ipv4/inet_diag.c      | 11 ++++++-----
 net/ipv4/tcp_diag.c       | 15 ++++++++++++++-
 4 files changed, 51 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index eaf5865c9e8a..78972a149dff 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -138,9 +138,18 @@ struct inet_hashinfo;
 struct nlattr;
 struct nlmsghdr;
 struct sk_buff;
+struct netlink_callback;
 
 struct inet_diag_handler {
-	struct inet_hashinfo    *idiag_hashinfo;
+	void			(*dump)(struct sk_buff *skb,
+					struct netlink_callback *cb,
+					struct inet_diag_req *r,
+					struct nlattr *bc);
+
+	int			(*dump_one)(struct sk_buff *in_skb,
+					const struct nlmsghdr *nlh,
+					struct inet_diag_req *req);
+
 	void			(*idiag_get_info)(struct sock *sk,
 						  struct inet_diag_msg *r,
 						  void *info);
@@ -152,6 +161,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			      struct sk_buff *skb, struct inet_diag_req *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh);
+void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
+		struct netlink_callback *cb, struct inet_diag_req *r,
+		struct nlattr *bc);
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+		struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+		struct inet_diag_req *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
 
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 9343f52db284..e29214d193d6 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -48,8 +48,21 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 		dccp_get_info(sk, _info);
 }
 
+static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+		struct inet_diag_req *r, struct nlattr *bc)
+{
+	inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc);
+}
+
+static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+		struct inet_diag_req *req)
+{
+	return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req);
+}
+
 static const struct inet_diag_handler dccp_diag_handler = {
-	.idiag_hashinfo	 = &dccp_hashinfo,
+	.dump		 = dccp_diag_dump,
+	.dump_one	 = dccp_diag_dump_one,
 	.idiag_get_info	 = dccp_diag_get_info,
 	.idiag_type	 = IPPROTO_DCCP,
 };
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index dc8611e3e66f..9b3e0b179cd2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -273,7 +273,7 @@ int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req)
 }
 EXPORT_SYMBOL_GPL(inet_diag_check_cookie);
 
-static int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
 		const struct nlmsghdr *nlh, struct inet_diag_req *req)
 {
 	int err;
@@ -339,6 +339,7 @@ out:
 out_nosk:
 	return err;
 }
+EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
 static int inet_diag_get_exact(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
@@ -351,8 +352,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	if (IS_ERR(handler))
 		err = PTR_ERR(handler);
 	else
-		err = inet_diag_dump_one_icsk(handler->idiag_hashinfo,
-				in_skb, nlh, req);
+		err = handler->dump_one(in_skb, nlh, req);
 	inet_diag_unlock_handler(handler);
 
 	return err;
@@ -731,7 +731,7 @@ out:
 	return err;
 }
 
-static void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
+void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 		struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc)
 {
 	int i, num;
@@ -880,6 +880,7 @@ done:
 out:
 	;
 }
+EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
 
 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		struct inet_diag_req *r, struct nlattr *bc)
@@ -888,7 +889,7 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 
 	handler = inet_diag_lock_handler(r->sdiag_protocol);
 	if (!IS_ERR(handler))
-		inet_diag_dump_icsk(handler->idiag_hashinfo, skb, cb, r, bc);
+		handler->dump(skb, cb, r, bc);
 	inet_diag_unlock_handler(handler);
 
 	return skb->len;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 42e6bec7bd3e..6334b1f71f2d 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -34,8 +34,21 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 		tcp_get_info(sk, info);
 }
 
+static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+		struct inet_diag_req *r, struct nlattr *bc)
+{
+	inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
+}
+
+static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+		struct inet_diag_req *req)
+{
+	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
+}
+
 static const struct inet_diag_handler tcp_diag_handler = {
-	.idiag_hashinfo	 = &tcp_hashinfo,
+	.dump		 = tcp_diag_dump,
+	.dump_one	 = tcp_diag_dump_one,
 	.idiag_get_info	 = tcp_diag_get_info,
 	.idiag_type	 = IPPROTO_TCP,
 };
-- 
cgit v1.2.3


From 925b44a273aa8c4c23c006c1228aacd538eead09 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 8 Dec 2011 23:27:28 +0100
Subject: PM / Domains: Provide an always on power domain governor

Since systems are likely to have power domains that can't be turned off
for various reasons at least temporarily while implementing power domain
support provide a default governor which will always refuse to power off
the domain, saving platforms having to implement their own.

Since the code is so tiny don't bother with a Kconfig symbol for it.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/domain_governor.c | 13 +++++++++++++
 include/linux/pm_domain.h            |  2 ++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index da78540e9b40..51527ee92d10 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -141,3 +141,16 @@ struct dev_power_governor simple_qos_governor = {
 	.stop_ok = default_stop_ok,
 	.power_down_ok = default_power_down_ok,
 };
+
+static bool always_on_power_down_ok(struct dev_pm_domain *domain)
+{
+	return false;
+}
+
+/**
+ * pm_genpd_gov_always_on - A governor implementing an always-on policy
+ */
+struct dev_power_governor pm_domain_always_on_gov = {
+	.power_down_ok = always_on_power_down_ok,
+	.stop_ok = default_stop_ok,
+};
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index fb809b904891..a03a0ad998b8 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -140,6 +140,7 @@ extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
 
 extern bool default_stop_ok(struct device *dev);
 
+extern struct dev_power_governor pm_domain_always_on_gov;
 #else
 
 static inline struct generic_pm_domain *dev_to_genpd(struct device *dev)
@@ -193,6 +194,7 @@ static inline bool default_stop_ok(struct device *dev)
 {
 	return false;
 }
+#define pm_domain_always_on_gov NULL
 #endif
 
 static inline int pm_genpd_remove_callbacks(struct device *dev)
-- 
cgit v1.2.3


From b298d289c79211508f11cb50749b0d1d54eb244a Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Fri, 9 Dec 2011 23:36:36 +0100
Subject: PM / Sleep: Fix freezer failures due to racy
 usermodehelper_is_disabled()

Commit a144c6a (PM: Print a warning if firmware is requested when tasks
are frozen) introduced usermodehelper_is_disabled() to warn and exit
immediately if firmware is requested when usermodehelpers are disabled.

However, it is racy. Consider the following scenario, currently used in
drivers/base/firmware_class.c:

...
if (usermodehelper_is_disabled())
        goto out;

/* Do actual work */
...

out:
        return err;

Nothing prevents someone from disabling usermodehelpers just after the check
in the 'if' condition, which means that it is quite possible to try doing the
"actual work" with usermodehelpers disabled, leading to undesirable
consequences.

In particular, this race condition in _request_firmware() causes task freezing
failures whenever suspend/hibernation is in progress because, it wrongly waits
to get the firmware/microcode image from userspace when actually the
usermodehelpers are disabled or userspace has been frozen.
Some of the example scenarios that cause freezing failures due to this race
are those that depend on userspace via request_firmware(), such as x86
microcode module initialization and microcode image reload.

Previous discussions about this issue can be found at:
http://thread.gmane.org/gmane.linux.kernel/1198291/focus=1200591

This patch adds proper synchronization to fix this issue.

It is to be noted that this patchset fixes the freezing failures but doesn't
remove the warnings. IOW, it does not attempt to add explicit synchronization
to x86 microcode driver to avoid requesting microcode image at inopportune
moments. Because, the warnings were introduced to highlight such cases, in the
first place. And we need not silence the warnings, since we take care of the
*real* problem (freezing failure) and hence, after that, the warnings are
pretty harmless anyway.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/firmware_class.c |  4 ++++
 include/linux/kmod.h          |  2 ++
 kernel/kmod.c                 | 23 ++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 06ed6b4e7df5..d5585da14c8a 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -534,6 +534,8 @@ static int _request_firmware(const struct firmware **firmware_p,
 		return 0;
 	}
 
+	read_lock_usermodehelper();
+
 	if (WARN_ON(usermodehelper_is_disabled())) {
 		dev_err(device, "firmware: %s will not be loaded\n", name);
 		retval = -EBUSY;
@@ -572,6 +574,8 @@ static int _request_firmware(const struct firmware **firmware_p,
 	fw_destroy_instance(fw_priv);
 
 out:
+	read_unlock_usermodehelper();
+
 	if (retval) {
 		release_firmware(firmware);
 		*firmware_p = NULL;
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index b16f65390734..722f477c4ef7 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -117,5 +117,7 @@ extern void usermodehelper_init(void);
 extern int usermodehelper_disable(void);
 extern void usermodehelper_enable(void);
 extern bool usermodehelper_is_disabled(void);
+extern void read_lock_usermodehelper(void);
+extern void read_unlock_usermodehelper(void);
 
 #endif /* __LINUX_KMOD_H__ */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index a4bea97c75b6..81b4a27261b2 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -36,6 +36,7 @@
 #include <linux/resource.h>
 #include <linux/notifier.h>
 #include <linux/suspend.h>
+#include <linux/rwsem.h>
 #include <asm/uaccess.h>
 
 #include <trace/events/module.h>
@@ -50,6 +51,7 @@ static struct workqueue_struct *khelper_wq;
 static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
 static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
 static DEFINE_SPINLOCK(umh_sysctl_lock);
+static DECLARE_RWSEM(umhelper_sem);
 
 #ifdef CONFIG_MODULES
 
@@ -275,6 +277,7 @@ static void __call_usermodehelper(struct work_struct *work)
  * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
  * (used for preventing user land processes from being created after the user
  * land has been frozen during a system-wide hibernation or suspend operation).
+ * Should always be manipulated under umhelper_sem acquired for write.
  */
 static int usermodehelper_disabled = 1;
 
@@ -293,6 +296,18 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
  */
 #define RUNNING_HELPERS_TIMEOUT	(5 * HZ)
 
+void read_lock_usermodehelper(void)
+{
+	down_read(&umhelper_sem);
+}
+EXPORT_SYMBOL_GPL(read_lock_usermodehelper);
+
+void read_unlock_usermodehelper(void)
+{
+	up_read(&umhelper_sem);
+}
+EXPORT_SYMBOL_GPL(read_unlock_usermodehelper);
+
 /**
  * usermodehelper_disable - prevent new helpers from being started
  */
@@ -300,8 +315,10 @@ int usermodehelper_disable(void)
 {
 	long retval;
 
+	down_write(&umhelper_sem);
 	usermodehelper_disabled = 1;
-	smp_mb();
+	up_write(&umhelper_sem);
+
 	/*
 	 * From now on call_usermodehelper_exec() won't start any new
 	 * helpers, so it is sufficient if running_helpers turns out to
@@ -314,7 +331,9 @@ int usermodehelper_disable(void)
 	if (retval)
 		return 0;
 
+	down_write(&umhelper_sem);
 	usermodehelper_disabled = 0;
+	up_write(&umhelper_sem);
 	return -EAGAIN;
 }
 
@@ -323,7 +342,9 @@ int usermodehelper_disable(void)
  */
 void usermodehelper_enable(void)
 {
+	down_write(&umhelper_sem);
 	usermodehelper_disabled = 0;
+	up_write(&umhelper_sem);
 }
 
 /**
-- 
cgit v1.2.3


From bc677d5b64644c399cd3db6a905453e611f402ab Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Sat, 3 Dec 2011 23:41:31 +0100
Subject: usb: fix number of mapped SG DMA entries

Add a new field num_mapped_sgs to struct urb so that we have a place to
store the number of mapped entries and can also retain the original
value of entries in num_sgs.  Previously, usb_hcd_map_urb_for_dma()
would overwrite this with the number of mapped entries, which would
break dma_unmap_sg() because it requires the original number of entries.

This fixes warnings like the following when using USB storage devices:
 ------------[ cut here ]------------
 WARNING: at lib/dma-debug.c:902 check_unmap+0x4e4/0x695()
 ehci_hcd 0000:00:12.2: DMA-API: device driver frees DMA sg list with different entry count [map count=4] [unmap count=1]
 Modules linked in: ohci_hcd ehci_hcd
 Pid: 0, comm: kworker/0:1 Not tainted 3.2.0-rc2+ #319
 Call Trace:
  <IRQ>  [<ffffffff81036d3b>] warn_slowpath_common+0x80/0x98
  [<ffffffff81036de7>] warn_slowpath_fmt+0x41/0x43
  [<ffffffff811fa5ae>] check_unmap+0x4e4/0x695
  [<ffffffff8105e92c>] ? trace_hardirqs_off+0xd/0xf
  [<ffffffff8147208b>] ? _raw_spin_unlock_irqrestore+0x33/0x50
  [<ffffffff811fa84a>] debug_dma_unmap_sg+0xeb/0x117
  [<ffffffff8137b02f>] usb_hcd_unmap_urb_for_dma+0x71/0x188
  [<ffffffff8137b166>] unmap_urb_for_dma+0x20/0x22
  [<ffffffff8137b1c5>] usb_hcd_giveback_urb+0x5d/0xc0
  [<ffffffffa0000d02>] ehci_urb_done+0xf7/0x10c [ehci_hcd]
  [<ffffffffa0001140>] qh_completions+0x429/0x4bd [ehci_hcd]
  [<ffffffffa000340a>] ehci_work+0x95/0x9c0 [ehci_hcd]
  ...
 ---[ end trace f29ac88a5a48c580 ]---
 Mapped at:
  [<ffffffff811faac4>] debug_dma_map_sg+0x45/0x139
  [<ffffffff8137bc0b>] usb_hcd_map_urb_for_dma+0x22e/0x478
  [<ffffffff8137c494>] usb_hcd_submit_urb+0x63f/0x6fa
  [<ffffffff8137d01c>] usb_submit_urb+0x2c7/0x2de
  [<ffffffff8137dcd4>] usb_sg_wait+0x55/0x161

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c       | 5 ++---
 drivers/usb/host/ehci-q.c    | 2 +-
 drivers/usb/host/uhci-q.c    | 2 +-
 drivers/usb/host/whci/qset.c | 4 ++--
 drivers/usb/host/xhci-ring.c | 4 ++--
 include/linux/usb.h          | 1 +
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 43a89e4ba928..2cec49d1773d 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1398,11 +1398,10 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 					ret = -EAGAIN;
 				else
 					urb->transfer_flags |= URB_DMA_MAP_SG;
-				if (n != urb->num_sgs) {
-					urb->num_sgs = n;
+				urb->num_mapped_sgs = n;
+				if (n != urb->num_sgs)
 					urb->transfer_flags |=
 							URB_DMA_SG_COMBINED;
-				}
 			} else if (urb->sg) {
 				struct scatterlist *sg = urb->sg;
 				urb->transfer_dma = dma_map_page(
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index f136f7f1c4f4..36ca5077cdf7 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -658,7 +658,7 @@ qh_urb_transaction (
 	/*
 	 * data transfer stage:  buffer setup
 	 */
-	i = urb->num_sgs;
+	i = urb->num_mapped_sgs;
 	if (len > 0 && i > 0) {
 		sg = urb->sg;
 		buf = sg_dma_address(sg);
diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c
index f6ca80ee4cec..d2c6f5ac4626 100644
--- a/drivers/usb/host/uhci-q.c
+++ b/drivers/usb/host/uhci-q.c
@@ -943,7 +943,7 @@ static int uhci_submit_common(struct uhci_hcd *uhci, struct urb *urb,
 	if (usb_pipein(urb->pipe))
 		status |= TD_CTRL_SPD;
 
-	i = urb->num_sgs;
+	i = urb->num_mapped_sgs;
 	if (len > 0 && i > 0) {
 		sg = urb->sg;
 		data = sg_dma_address(sg);
diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
index d6e175428618..a91712c8bff5 100644
--- a/drivers/usb/host/whci/qset.c
+++ b/drivers/usb/host/whci/qset.c
@@ -443,7 +443,7 @@ static int qset_add_urb_sg(struct whc *whc, struct whc_qset *qset, struct urb *u
 
 	remaining = urb->transfer_buffer_length;
 
-	for_each_sg(urb->sg, sg, urb->num_sgs, i) {
+	for_each_sg(urb->sg, sg, urb->num_mapped_sgs, i) {
 		dma_addr_t dma_addr;
 		size_t dma_remaining;
 		dma_addr_t sp, ep;
@@ -561,7 +561,7 @@ static int qset_add_urb_sg_linearize(struct whc *whc, struct whc_qset *qset,
 
 	remaining = urb->transfer_buffer_length;
 
-	for_each_sg(urb->sg, sg, urb->num_sgs, i) {
+	for_each_sg(urb->sg, sg, urb->num_mapped_sgs, i) {
 		size_t len;
 		size_t sg_remaining;
 		void *orig;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 133ce302c860..d030f0b2bfa2 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2551,7 +2551,7 @@ static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb *urb)
 	struct scatterlist *sg;
 
 	sg = NULL;
-	num_sgs = urb->num_sgs;
+	num_sgs = urb->num_mapped_sgs;
 	temp = urb->transfer_buffer_length;
 
 	xhci_dbg(xhci, "count sg list trbs: \n");
@@ -2735,7 +2735,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		return -EINVAL;
 
 	num_trbs = count_sg_trbs_needed(xhci, urb);
-	num_sgs = urb->num_sgs;
+	num_sgs = urb->num_mapped_sgs;
 	total_packet_count = roundup(urb->transfer_buffer_length,
 			usb_endpoint_maxp(&urb->ep->desc));
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 2f05a7fa1ecf..4269c3f88148 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1221,6 +1221,7 @@ struct urb {
 	void *transfer_buffer;		/* (in) associated data buffer */
 	dma_addr_t transfer_dma;	/* (in) dma addr for transfer_buffer */
 	struct scatterlist *sg;		/* (in) scatter gather buffer list */
+	int num_mapped_sgs;		/* (internal) mapped sg entries */
 	int num_sgs;			/* (in) number of entries in the sg list */
 	u32 transfer_buffer_length;	/* (in) data buffer length */
 	u32 actual_length;		/* (return) actual transfer length */
-- 
cgit v1.2.3


From 7bf01185c5e9ec19f739f7208646dc2e2cf1904b Mon Sep 17 00:00:00 2001
From: Aman Deep <amandeep3986@gmail.com>
Date: Thu, 8 Dec 2011 12:05:22 +0530
Subject: USB: Adding #define in hub_configure() and hcd.c file

This patch is in succession of previous patch
commit c8421147926fcacf53081a36438a0bed394da9f5
        xHCI: Adding #define values used for hub descriptor

Hub descriptors characteristics #defines values are added in
hub_configure() in place of magic numbers as asked by Alan Stern.
And the indentation for switch and case is changed to be same.

Some #defines values are added in ch11.h for defining hub class
protocols and used in hub.c and hcd.c in which magic values were
used for hub class protocols.

Signed-off-by: Aman Deep <amandeep3986@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c   |  2 +-
 drivers/usb/core/hub.c   | 88 ++++++++++++++++++++++++------------------------
 include/linux/usb/ch11.h | 11 ++++++
 3 files changed, 56 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 2cec49d1773d..eb19cba34ac9 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -658,7 +658,7 @@ error:
 				len > offsetof(struct usb_device_descriptor,
 						bDeviceProtocol))
 			((struct usb_device_descriptor *) ubuf)->
-					bDeviceProtocol = 1;
+				bDeviceProtocol = USB_HUB_PR_HS_SINGLE_TT;
 	}
 
 	/* any errors get returned through the urb completion */
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 29d0669227ec..79d339e2e700 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -84,7 +84,7 @@ struct usb_hub {
 
 static inline int hub_is_superspeed(struct usb_device *hdev)
 {
-	return (hdev->descriptor.bDeviceProtocol == 3);
+	return (hdev->descriptor.bDeviceProtocol == USB_HUB_PR_SS);
 }
 
 /* Protect struct usb_device->state and ->children members
@@ -1041,58 +1041,58 @@ static int hub_configure(struct usb_hub *hub,
 		dev_dbg(hub_dev, "standalone hub\n");
 
 	switch (wHubCharacteristics & HUB_CHAR_LPSM) {
-		case 0x00:
-			dev_dbg(hub_dev, "ganged power switching\n");
-			break;
-		case 0x01:
-			dev_dbg(hub_dev, "individual port power switching\n");
-			break;
-		case 0x02:
-		case 0x03:
-			dev_dbg(hub_dev, "no power switching (usb 1.0)\n");
-			break;
+	case HUB_CHAR_COMMON_LPSM:
+		dev_dbg(hub_dev, "ganged power switching\n");
+		break;
+	case HUB_CHAR_INDV_PORT_LPSM:
+		dev_dbg(hub_dev, "individual port power switching\n");
+		break;
+	case HUB_CHAR_NO_LPSM:
+	case HUB_CHAR_LPSM:
+		dev_dbg(hub_dev, "no power switching (usb 1.0)\n");
+		break;
 	}
 
 	switch (wHubCharacteristics & HUB_CHAR_OCPM) {
-		case 0x00:
-			dev_dbg(hub_dev, "global over-current protection\n");
-			break;
-		case 0x08:
-			dev_dbg(hub_dev, "individual port over-current protection\n");
-			break;
-		case 0x10:
-		case 0x18:
-			dev_dbg(hub_dev, "no over-current protection\n");
-                        break;
+	case HUB_CHAR_COMMON_OCPM:
+		dev_dbg(hub_dev, "global over-current protection\n");
+		break;
+	case HUB_CHAR_INDV_PORT_OCPM:
+		dev_dbg(hub_dev, "individual port over-current protection\n");
+		break;
+	case HUB_CHAR_NO_OCPM:
+	case HUB_CHAR_OCPM:
+		dev_dbg(hub_dev, "no over-current protection\n");
+		break;
 	}
 
 	spin_lock_init (&hub->tt.lock);
 	INIT_LIST_HEAD (&hub->tt.clear_list);
 	INIT_WORK(&hub->tt.clear_work, hub_tt_work);
 	switch (hdev->descriptor.bDeviceProtocol) {
-		case 0:
-			break;
-		case 1:
-			dev_dbg(hub_dev, "Single TT\n");
-			hub->tt.hub = hdev;
-			break;
-		case 2:
-			ret = usb_set_interface(hdev, 0, 1);
-			if (ret == 0) {
-				dev_dbg(hub_dev, "TT per port\n");
-				hub->tt.multi = 1;
-			} else
-				dev_err(hub_dev, "Using single TT (err %d)\n",
-					ret);
-			hub->tt.hub = hdev;
-			break;
-		case 3:
-			/* USB 3.0 hubs don't have a TT */
-			break;
-		default:
-			dev_dbg(hub_dev, "Unrecognized hub protocol %d\n",
-				hdev->descriptor.bDeviceProtocol);
-			break;
+	case USB_HUB_PR_FS:
+		break;
+	case USB_HUB_PR_HS_SINGLE_TT:
+		dev_dbg(hub_dev, "Single TT\n");
+		hub->tt.hub = hdev;
+		break;
+	case USB_HUB_PR_HS_MULTI_TT:
+		ret = usb_set_interface(hdev, 0, 1);
+		if (ret == 0) {
+			dev_dbg(hub_dev, "TT per port\n");
+			hub->tt.multi = 1;
+		} else
+			dev_err(hub_dev, "Using single TT (err %d)\n",
+				ret);
+		hub->tt.hub = hdev;
+		break;
+	case USB_HUB_PR_SS:
+		/* USB 3.0 hubs don't have a TT */
+		break;
+	default:
+		dev_dbg(hub_dev, "Unrecognized hub protocol %d\n",
+			hdev->descriptor.bDeviceProtocol);
+		break;
 	}
 
 	/* Note 8 FS bit times == (8 bits / 12000000 bps) ~= 666ns */
diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
index 55e7325926c1..0832eb841a30 100644
--- a/include/linux/usb/ch11.h
+++ b/include/linux/usb/ch11.h
@@ -207,6 +207,17 @@ struct usb_hub_status {
 #define USB_DT_HUB_NONVAR_SIZE		7
 #define USB_DT_SS_HUB_SIZE              12
 
+/*
+ * Hub Device descriptor
+ * USB Hub class device protocols
+ */
+
+#define USB_HUB_PR_FS		0 /* Full speed hub */
+#define USB_HUB_PR_HS_NO_TT	0 /* Hi-speed hub without TT */
+#define USB_HUB_PR_HS_SINGLE_TT	1 /* Hi-speed hub with single TT */
+#define USB_HUB_PR_HS_MULTI_TT	2 /* Hi-speed hub with multiple TT */
+#define USB_HUB_PR_SS		3 /* Super speed hub */
+
 struct usb_hub_descriptor {
 	__u8  bDescLength;
 	__u8  bDescriptorType;
-- 
cgit v1.2.3


From 5a3072be6ce00b10565c78da05ad78df41310045 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 8 Dec 2011 22:53:29 +0100
Subject: drivers_base: make argument to platform_device_register_full const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

platform_device_register_full doesn't modify *pdevinfo so it can be
marked as const without further adaptions.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 2 +-
 include/linux/platform_device.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 7a24895543e7..a7c06374062e 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -383,7 +383,7 @@ EXPORT_SYMBOL_GPL(platform_device_unregister);
  * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
  */
 struct platform_device *platform_device_register_full(
-		struct platform_device_info *pdevinfo)
+		const struct platform_device_info *pdevinfo)
 {
 	int ret = -ENOMEM;
 	struct platform_device *pdev;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 165a8d175370..5622fa24e97b 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -63,7 +63,7 @@ struct platform_device_info {
 		u64 dma_mask;
 };
 extern struct platform_device *platform_device_register_full(
-		struct platform_device_info *pdevinfo);
+		const struct platform_device_info *pdevinfo);
 
 /**
  * platform_device_register_resndata - add a platform-level device with
-- 
cgit v1.2.3


From f30ca6ba0bb2b7d050f24682bb8639c939c79859 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:56:32 +0000
Subject: efi.h: Add struct definition for boot time services

With the forthcoming efi stub code we're gonna need to access boot
time services so let's define a struct so we can access the functions.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 2362a0bc7f0d..9547597ad6be 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -138,6 +138,57 @@ typedef struct {
 	u8 sets_to_zero;
 } efi_time_cap_t;
 
+/*
+ * EFI Boot Services table
+ */
+typedef struct {
+	efi_table_hdr_t hdr;
+	void *raise_tpl;
+	void *restore_tpl;
+	void *allocate_pages;
+	void *free_pages;
+	void *get_memory_map;
+	void *allocate_pool;
+	void *free_pool;
+	void *create_event;
+	void *set_timer;
+	void *wait_for_event;
+	void *signal_event;
+	void *close_event;
+	void *check_event;
+	void *install_protocol_interface;
+	void *reinstall_protocol_interface;
+	void *uninstall_protocol_interface;
+	void *handle_protocol;
+	void *__reserved;
+	void *register_protocol_notify;
+	void *locate_handle;
+	void *locate_device_path;
+	void *install_configuration_table;
+	void *load_image;
+	void *start_image;
+	void *exit;
+	void *unload_image;
+	void *exit_boot_services;
+	void *get_next_monotonic_count;
+	void *stall;
+	void *set_watchdog_timer;
+	void *connect_controller;
+	void *disconnect_controller;
+	void *open_protocol;
+	void *close_protocol;
+	void *open_protocol_information;
+	void *protocols_per_handle;
+	void *locate_handle_buffer;
+	void *locate_protocol;
+	void *install_multiple_protocol_interfaces;
+	void *uninstall_multiple_protocol_interfaces;
+	void *calculate_crc32;
+	void *copy_mem;
+	void *set_mem;
+	void *create_event_ex;
+} efi_boot_services_t;
+
 /*
  * Types and defines for EFI ResetSystem
  */
@@ -261,7 +312,7 @@ typedef struct {
 	unsigned long stderr_handle;
 	unsigned long stderr;
 	efi_runtime_services_t *runtime;
-	unsigned long boottime;
+	efi_boot_services_t *boottime;
 	unsigned long nr_tables;
 	unsigned long tables;
 } efi_system_table_t;
-- 
cgit v1.2.3


From 8e84f345e2f2189a37492c77c566c7494b7b6b23 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:56:50 +0000
Subject: efi.h: Add efi_image_loaded_t

Add the EFI loaded image structure and protocol guid which are
required by the x86 EFI boot stub. The EFI boot stub uses the
structure to figure out where it was loaded in memory and to pass
command line arguments to the kernel.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 9547597ad6be..e35005f451db 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -287,6 +287,9 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
 #define LINUX_EFI_CRASH_GUID \
     EFI_GUID(  0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 )
 
+#define LOADED_IMAGE_PROTOCOL_GUID \
+    EFI_GUID(  0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
+
 typedef struct {
 	efi_guid_t guid;
 	unsigned long table;
@@ -326,6 +329,22 @@ struct efi_memory_map {
 	unsigned long desc_size;
 };
 
+typedef struct {
+	u32 revision;
+	void *parent_handle;
+	efi_system_table_t *system_table;
+	void *device_handle;
+	void *file_path;
+	void *reserved;
+	u32 load_options_size;
+	void *load_options;
+	void *image_base;
+	__aligned_u64 image_size;
+	unsigned int image_code_type;
+	unsigned int image_data_type;
+	unsigned long unload;
+} efi_loaded_image_t;
+
 #define EFI_INVALID_TABLE_ADDR		(~0UL)
 
 /*
-- 
cgit v1.2.3


From bb05e4ba452ada7966fbced4e829aa029f546445 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:57:03 +0000
Subject: efi.h: Add allocation types for boottime->allocate_pages()

Add the allocation types detailed in section 6.2 - "AllocatePages()"
of the UEFI 2.3 specification. These definitions will be used by the
x86 EFI boot stub which needs to allocate memory during boot.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index e35005f451db..378f2cd1f7c3 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -109,6 +109,14 @@ typedef struct {
 	u32 imagesize;
 } efi_capsule_header_t;
 
+/*
+ * Allocation types for calls to boottime->allocate_pages.
+ */
+#define EFI_ALLOCATE_ANY_PAGES		0
+#define EFI_ALLOCATE_MAX_ADDRESS	1
+#define EFI_ALLOCATE_ADDRESS		2
+#define EFI_MAX_ALLOCATE_TYPE		3
+
 typedef int (*efi_freemem_callback_t) (u64 start, u64 end, void *arg);
 
 /*
-- 
cgit v1.2.3


From 0f7c5d477f2ce552997831d80e2c872cca1b9054 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:57:16 +0000
Subject: efi.h: Add graphics protocol guids

The x86 EFI boot stub uses the Graphics Output Protocol and Universal
Graphics Adapter (UGA) protocol guids when initialising graphics
during boot.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 378f2cd1f7c3..e46d771f87e5 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -298,6 +298,15 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
 #define LOADED_IMAGE_PROTOCOL_GUID \
     EFI_GUID(  0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
 
+#define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID \
+    EFI_GUID(  0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a )
+
+#define EFI_UGA_PROTOCOL_GUID \
+    EFI_GUID(  0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39 )
+
+#define EFI_PCI_IO_PROTOCOL_GUID \
+    EFI_GUID(  0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x2, 0x9a )
+
 typedef struct {
 	efi_guid_t guid;
 	unsigned long table;
-- 
cgit v1.2.3


From e2527a7cbec073b69a251193f200a88efbced7ad Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:57:26 +0000
Subject: efi.h: Add boottime->locate_handle search types

The x86 EFI boot stub needs to locate handles for various protocols.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index e46d771f87e5..d407c88f955f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -471,6 +471,13 @@ extern int __init efi_setup_pcdp_console(char *);
 #define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x0000000000000002
 #define EFI_VARIABLE_RUNTIME_ACCESS     0x0000000000000004
 
+/*
+ * The type of search to perform when calling boottime->locate_handle
+ */
+#define EFI_LOCATE_ALL_HANDLES			0
+#define EFI_LOCATE_BY_REGISTER_NOTIFY		1
+#define EFI_LOCATE_BY_PROTOCOL			2
+
 /*
  * EFI Device Path information
  */
-- 
cgit v1.2.3


From 55839d515495e766605d7aaabd9c2758370a8d27 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Thu, 11 Aug 2011 10:28:06 +0100
Subject: efi: Add EFI file I/O data types

The x86 EFI stub needs to access files, for example when loading
initrd's. Add the required data types.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index d407c88f955f..37c300712e02 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -307,6 +307,12 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
 #define EFI_PCI_IO_PROTOCOL_GUID \
     EFI_GUID(  0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x2, 0x9a )
 
+#define EFI_FILE_INFO_ID \
+    EFI_GUID(  0x9576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
+
+#define EFI_FILE_SYSTEM_GUID \
+    EFI_GUID(  0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
+
 typedef struct {
 	efi_guid_t guid;
 	unsigned long table;
@@ -362,6 +368,40 @@ typedef struct {
 	unsigned long unload;
 } efi_loaded_image_t;
 
+typedef struct {
+	u64 revision;
+	void *open_volume;
+} efi_file_io_interface_t;
+
+typedef struct {
+	u64 size;
+	u64 file_size;
+	u64 phys_size;
+	efi_time_t create_time;
+	efi_time_t last_access_time;
+	efi_time_t modification_time;
+	__aligned_u64 attribute;
+	efi_char16_t filename[1];
+} efi_file_info_t;
+
+typedef struct {
+	u64 revision;
+	void *open;
+	void *close;
+	void *delete;
+	void *read;
+	void *write;
+	void *get_position;
+	void *set_position;
+	void *get_info;
+	void *set_info;
+	void *flush;
+} efi_file_handle_t;
+
+#define EFI_FILE_MODE_READ	0x0000000000000001
+#define EFI_FILE_MODE_WRITE	0x0000000000000002
+#define EFI_FILE_MODE_CREATE	0x8000000000000000
+
 #define EFI_INVALID_TABLE_ADDR		(~0UL)
 
 /*
-- 
cgit v1.2.3


From 3986fb2ba67bb30cac18b0cff48c88d69ad37681 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 4 Dec 2011 18:42:20 -0500
Subject: serial: export the key functions for an 8250 IRQ handler

For drivers that need to construct their own IRQ handler, the
three components are seen in the current handle_port -- i.e.
Rx, Tx and modem_status.

Make these exported symbols so that "almost" 8250 UARTs can
construct their own IRQ handler with these shared components,
while working around their own unique errata issues.

The function names are given a serial8250 prefix, since they
are now entering the global namespace.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250.c   | 29 +++++++++++++++--------------
 include/linux/serial_8250.h |  4 ++++
 2 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c
index 5274228fa03c..9c0396fa3915 100644
--- a/drivers/tty/serial/8250.c
+++ b/drivers/tty/serial/8250.c
@@ -1300,8 +1300,6 @@ static void serial8250_stop_tx(struct uart_port *port)
 	}
 }
 
-static void transmit_chars(struct uart_8250_port *up);
-
 static void serial8250_start_tx(struct uart_port *port)
 {
 	struct uart_8250_port *up =
@@ -1318,7 +1316,7 @@ static void serial8250_start_tx(struct uart_port *port)
 			if ((up->port.type == PORT_RM9000) ?
 				(lsr & UART_LSR_THRE) :
 				(lsr & UART_LSR_TEMT))
-				transmit_chars(up);
+				serial8250_tx_chars(up);
 		}
 	}
 
@@ -1376,12 +1374,12 @@ static void clear_rx_fifo(struct uart_8250_port *up)
 }
 
 /*
- * receive_chars: processes according to the passed in LSR
+ * serial8250_rx_chars: processes according to the passed in LSR
  * value, and returns the remaining LSR bits not handled
  * by this Rx routine.
  */
-static unsigned char
-receive_chars(struct uart_8250_port *up, unsigned char lsr)
+unsigned char
+serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr)
 {
 	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch;
@@ -1462,8 +1460,9 @@ ignore_char:
 	spin_lock(&up->port.lock);
 	return lsr;
 }
+EXPORT_SYMBOL_GPL(serial8250_rx_chars);
 
-static void transmit_chars(struct uart_8250_port *up)
+void serial8250_tx_chars(struct uart_8250_port *up)
 {
 	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
@@ -1500,8 +1499,9 @@ static void transmit_chars(struct uart_8250_port *up)
 	if (uart_circ_empty(xmit))
 		__stop_tx(up);
 }
+EXPORT_SYMBOL_GPL(serial8250_tx_chars);
 
-static unsigned int check_modem_status(struct uart_8250_port *up)
+unsigned int serial8250_modem_status(struct uart_8250_port *up)
 {
 	unsigned int status = serial_in(up, UART_MSR);
 
@@ -1523,6 +1523,7 @@ static unsigned int check_modem_status(struct uart_8250_port *up)
 
 	return status;
 }
+EXPORT_SYMBOL_GPL(serial8250_modem_status);
 
 /*
  * This handles the interrupt from one port.
@@ -1539,10 +1540,10 @@ static void serial8250_handle_port(struct uart_8250_port *up)
 	DEBUG_INTR("status = %x...", status);
 
 	if (status & (UART_LSR_DR | UART_LSR_BI))
-		status = receive_chars(up, status);
-	check_modem_status(up);
+		status = serial8250_rx_chars(up, status);
+	serial8250_modem_status(up);
 	if (status & UART_LSR_THRE)
-		transmit_chars(up);
+		serial8250_tx_chars(up);
 
 	spin_unlock_irqrestore(&up->port.lock, flags);
 }
@@ -1782,7 +1783,7 @@ static void serial8250_backup_timeout(unsigned long data)
 	}
 
 	if (!(iir & UART_IIR_NO_INT))
-		transmit_chars(up);
+		serial8250_tx_chars(up);
 
 	if (is_real_interrupt(up->port.irq))
 		serial_out(up, UART_IER, ier);
@@ -1816,7 +1817,7 @@ static unsigned int serial8250_get_mctrl(struct uart_port *port)
 	unsigned int status;
 	unsigned int ret;
 
-	status = check_modem_status(up);
+	status = serial8250_modem_status(up);
 
 	ret = 0;
 	if (status & UART_MSR_DCD)
@@ -2863,7 +2864,7 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count)
 	 *	while processing with interrupts off.
 	 */
 	if (up->msr_saved_flags)
-		check_modem_status(up);
+		serial8250_modem_status(up);
 
 	if (locked)
 		spin_unlock(&up->port.lock);
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 1f05bbeac01e..b44034eca123 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -66,6 +66,7 @@ enum {
  * dependent on the 8250 driver.
  */
 struct uart_port;
+struct uart_8250_port;
 
 int serial8250_register_port(struct uart_port *);
 void serial8250_unregister_port(int line);
@@ -82,6 +83,9 @@ extern void serial8250_do_set_termios(struct uart_port *port,
 extern void serial8250_do_pm(struct uart_port *port, unsigned int state,
 			     unsigned int oldstate);
 int serial8250_handle_irq(struct uart_port *port, unsigned int iir);
+unsigned char serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr);
+void serial8250_tx_chars(struct uart_8250_port *up);
+unsigned int serial8250_modem_status(struct uart_8250_port *up);
 
 extern void serial8250_set_isa_configurator(void (*v)
 					(int port, struct uart_port *up,
-- 
cgit v1.2.3


From 9deaa53ac7fa373623123aa4f18828dd62292b1a Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 4 Dec 2011 18:42:23 -0500
Subject: serial: add irq handler for Freescale 16550 errata.

Sending a break on the SOC UARTs found in some MPC83xx/85xx/86xx
chips seems to cause a short lived IRQ storm (/proc/interrupts
typically shows somewhere between 300 and 1500 events).  Unfortunately
this renders SysRQ over the serial console completely inoperable.

The suggested workaround in the errata is to read the Rx register,
wait one character period, and then read the Rx register again.
We achieve this by tracking the old LSR value, and on the subsequent
interrupt event after a break, we don't read LSR, instead we just
read the RBR again and return immediately.

The "fsl,ns16550" is used in the compatible field of the serial
device to mark UARTs known to have this issue.

Thanks to Scott Wood for providing the errata data which led to
a much cleaner fix.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/powerpc/kernel/legacy_serial.c |  3 ++
 drivers/tty/serial/8250_fsl.c       | 63 +++++++++++++++++++++++++++++++++++++
 drivers/tty/serial/Kconfig          |  5 +++
 drivers/tty/serial/Makefile         |  1 +
 include/linux/serial_8250.h         |  1 +
 5 files changed, 73 insertions(+)
 create mode 100644 drivers/tty/serial/8250_fsl.c

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index c7b5afeecaf2..3fea3689527e 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -441,6 +441,9 @@ static void __init fixup_port_irq(int index,
 		return;
 
 	port->irq = virq;
+
+	if (of_device_is_compatible(np, "fsl,ns16550"))
+		port->handle_irq = fsl8250_handle_irq;
 }
 
 static void __init fixup_port_pio(int index,
diff --git a/drivers/tty/serial/8250_fsl.c b/drivers/tty/serial/8250_fsl.c
new file mode 100644
index 000000000000..f4d3c47b88e8
--- /dev/null
+++ b/drivers/tty/serial/8250_fsl.c
@@ -0,0 +1,63 @@
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+
+#include "8250.h"
+
+/*
+ * Freescale 16550 UART "driver", Copyright (C) 2011 Paul Gortmaker.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This isn't a full driver; it just provides an alternate IRQ
+ * handler to deal with an errata.  Everything else is just
+ * using the bog standard 8250 support.
+ *
+ * We follow code flow of serial8250_default_handle_irq() but add
+ * a check for a break and insert a dummy read on the Rx for the
+ * immediately following IRQ event.
+ *
+ * We re-use the already existing "bug handling" lsr_saved_flags
+ * field to carry the "what we just did" information from the one
+ * IRQ event to the next one.
+ */
+
+int fsl8250_handle_irq(struct uart_port *port)
+{
+	unsigned char lsr, orig_lsr;
+	unsigned long flags;
+	unsigned int iir;
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	iir = port->serial_in(port, UART_IIR);
+	if (iir & UART_IIR_NO_INT) {
+		spin_unlock_irqrestore(&up->port.lock, flags);
+		return 0;
+	}
+
+	/* This is the WAR; if last event was BRK, then read and return */
+	if (unlikely(up->lsr_saved_flags & UART_LSR_BI)) {
+		up->lsr_saved_flags &= ~UART_LSR_BI;
+		port->serial_in(port, UART_RX);
+		spin_unlock_irqrestore(&up->port.lock, flags);
+		return 1;
+	}
+
+	lsr = orig_lsr = up->port.serial_in(&up->port, UART_LSR);
+
+	if (lsr & (UART_LSR_DR | UART_LSR_BI))
+		lsr = serial8250_rx_chars(up, lsr);
+
+	serial8250_modem_status(up);
+
+	if (lsr & UART_LSR_THRE)
+		serial8250_tx_chars(up);
+
+	up->lsr_saved_flags = orig_lsr;
+	spin_unlock_irqrestore(&up->port.lock, flags);
+	return 1;
+}
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 705d2dc39c9a..fee9e04f42e7 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -97,6 +97,11 @@ config SERIAL_8250_PNP
 	  This builds standard PNP serial support. You may be able to
 	  disable this feature if you only need legacy serial support.
 
+config SERIAL_8250_FSL
+	bool
+	depends on SERIAL_8250 && PPC
+	default PPC
+
 config SERIAL_8250_HP300
 	tristate
 	depends on SERIAL_8250 && HP300
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index af57089ddb67..75eadb8d7178 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_SERIAL_8250_BOCA) += 8250_boca.o
 obj-$(CONFIG_SERIAL_8250_EXAR_ST16C554) += 8250_exar_st16c554.o
 obj-$(CONFIG_SERIAL_8250_HUB6) += 8250_hub6.o
 obj-$(CONFIG_SERIAL_8250_MCA) += 8250_mca.o
+obj-$(CONFIG_SERIAL_8250_FSL) += 8250_fsl.o
 obj-$(CONFIG_SERIAL_8250_DW) += 8250_dw.o
 obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o
 obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index b44034eca123..8f012f8ac8e9 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -82,6 +82,7 @@ extern void serial8250_do_set_termios(struct uart_port *port,
 		struct ktermios *termios, struct ktermios *old);
 extern void serial8250_do_pm(struct uart_port *port, unsigned int state,
 			     unsigned int oldstate);
+extern int fsl8250_handle_irq(struct uart_port *port);
 int serial8250_handle_irq(struct uart_port *port, unsigned int iir);
 unsigned char serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr);
 void serial8250_tx_chars(struct uart_8250_port *up);
-- 
cgit v1.2.3


From 6de5fc9cf7de334912de4cfd2d06eb2d744d2afe Mon Sep 17 00:00:00 2001
From: Stefan Nilsson XK <stefan.xk.nilsson@stericsson.com>
Date: Thu, 3 Nov 2011 09:44:12 +0100
Subject: mmc: core: Add quirk for long data read time

Adds a quirk that sets the data read timeout to a fixed value instead
of relying on the information in the CSD. The timeout value chosen
is 300ms since that has proven enough for the problematic cards found,
but could be increased if other cards require this.

This patch also enables this quirk for certain Micron cards known to
have this problem.

Signed-off-by: Stefan Nilsson XK <stefan.xk.nilsson@stericsson.com>
Signed-off-by: Ulf Hansson <ulf.hansson@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <stable@kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c |  8 ++++++++
 drivers/mmc/core/core.c  | 12 ++++++++++++
 include/linux/mmc/card.h |  6 ++++++
 3 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index a1cb21f95302..1e0e27cbe987 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1606,6 +1606,14 @@ static const struct mmc_fixup blk_fixups[] =
 		  MMC_QUIRK_BLK_NO_CMD23),
 	MMC_FIXUP("MMC32G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_BLK_NO_CMD23),
+
+	/*
+	 * Some Micron MMC cards needs longer data read timeout than
+	 * indicated in CSD.
+	 */
+	MMC_FIXUP(CID_NAME_ANY, 0x13, 0x200, add_quirk_mmc,
+		  MMC_QUIRK_LONG_READ_TIME),
+
 	END_FIXUP
 };
 
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5278ffb20e74..74a012ad2bab 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -529,6 +529,18 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card)
 			data->timeout_clks = 0;
 		}
 	}
+
+	/*
+	 * Some cards require longer data read timeout than indicated in CSD.
+	 * Address this by setting the read timeout to a "reasonably high"
+	 * value. For the cards tested, 300ms has proven enough. If necessary,
+	 * this value can be increased if other problematic cards require this.
+	 */
+	if (mmc_card_long_read_time(card) && data->flags & MMC_DATA_READ) {
+		data->timeout_ns = 300000000;
+		data->timeout_clks = 0;
+	}
+
 	/*
 	 * Some cards need very high timeouts if driven in SPI mode.
 	 * The worst observed timeout was 900ms after writing a
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 415f2db414e1..c8ef9bc54d50 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -218,6 +218,7 @@ struct mmc_card {
 #define MMC_QUIRK_INAND_CMD38	(1<<6)		/* iNAND devices have broken CMD38 */
 #define MMC_QUIRK_BLK_NO_CMD23	(1<<7)		/* Avoid CMD23 for regular multiblock */
 #define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8)	/* Avoid sending 512 bytes in */
+#define MMC_QUIRK_LONG_READ_TIME (1<<9)		/* Data read time > CSD says */
 						/* byte mode */
 	unsigned int    poweroff_notify_state;	/* eMMC4.5 notify feature */
 #define MMC_NO_POWER_NOTIFICATION	0
@@ -433,6 +434,11 @@ static inline int mmc_card_broken_byte_mode_512(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_BROKEN_BYTE_MODE_512;
 }
 
+static inline int mmc_card_long_read_time(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_LONG_READ_TIME;
+}
+
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
-- 
cgit v1.2.3


From 9b2e4f1880b789be1f24f9684f7a54b90310b5c0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Fri, 30 Sep 2011 12:10:22 -0700
Subject: rcu: Track idleness independent of idle tasks

Earlier versions of RCU used the scheduling-clock tick to detect idleness
by checking for the idle task, but handled idleness differently for
CONFIG_NO_HZ=y.  But there are now a number of uses of RCU read-side
critical sections in the idle task, for example, for tracing.  A more
fine-grained detection of idleness is therefore required.

This commit presses the old dyntick-idle code into full-time service,
so that rcu_idle_enter(), previously known as rcu_enter_nohz(), is
always invoked at the beginning of an idle loop iteration.  Similarly,
rcu_idle_exit(), previously known as rcu_exit_nohz(), is always invoked
at the end of an idle-loop iteration.  This allows the idle task to
use RCU everywhere except between consecutive rcu_idle_enter() and
rcu_idle_exit() calls, in turn allowing architecture maintainers to
specify exactly where in the idle loop that RCU may be used.

Because some of the userspace upcall uses can result in what looks
to RCU like half of an interrupt, it is not possible to expect that
the irq_enter() and irq_exit() hooks will give exact counts.  This
patch therefore expands the ->dynticks_nesting counter to 64 bits
and uses two separate bitfields to count process/idle transitions
and interrupt entry/exit transitions.  It is presumed that userspace
upcalls do not happen in the idle loop or from usermode execution
(though usermode might do a system call that results in an upcall).
The counter is hard-reset on each process/idle transition, which
avoids the interrupt entry/exit error from accumulating.  Overflow
is avoided by the 64-bitness of the ->dyntick_nesting counter.

This commit also adds warnings if a non-idle task asks RCU to enter
idle state (and these checks will need some adjustment before applying
Frederic's OS-jitter patches (http://lkml.org/lkml/2011/10/7/246).
In addition, validation of ->dynticks and ->dynticks_nesting is added.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 Documentation/RCU/trace.txt |   4 -
 include/linux/hardirq.h     |  21 ----
 include/linux/rcupdate.h    |  21 +---
 include/linux/tick.h        |  11 ++-
 include/trace/events/rcu.h  |  10 +-
 kernel/rcutiny.c            | 124 ++++++++++++++++++++----
 kernel/rcutree.c            | 229 +++++++++++++++++++++++++++++++-------------
 kernel/rcutree.h            |  15 +--
 kernel/rcutree_trace.c      |  10 +-
 kernel/time/tick-sched.c    |   6 +-
 10 files changed, 297 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index aaf65f6c6cd7..49587abfc2f7 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -105,14 +105,10 @@ o	"dt" is the current value of the dyntick counter that is incremented
 	or one greater than the interrupt-nesting depth otherwise.
 	The number after the second "/" is the NMI nesting depth.
 
-	This field is displayed only for CONFIG_NO_HZ kernels.
-
 o	"df" is the number of times that some other CPU has forced a
 	quiescent state on behalf of this CPU due to this CPU being in
 	dynticks-idle state.
 
-	This field is displayed only for CONFIG_NO_HZ kernels.
-
 o	"of" is the number of times that some other CPU has forced a
 	quiescent state on behalf of this CPU due to this CPU being
 	offline.  In a perfect world, this might never happen, but it
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f743883f769e..bb7f30971858 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,20 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
 extern void account_system_vtime(struct task_struct *tsk);
 #endif
 
-#if defined(CONFIG_NO_HZ)
 #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
-extern void rcu_enter_nohz(void);
-extern void rcu_exit_nohz(void);
-
-static inline void rcu_irq_enter(void)
-{
-	rcu_exit_nohz();
-}
-
-static inline void rcu_irq_exit(void)
-{
-	rcu_enter_nohz();
-}
 
 static inline void rcu_nmi_enter(void)
 {
@@ -163,17 +150,9 @@ static inline void rcu_nmi_exit(void)
 }
 
 #else
-extern void rcu_irq_enter(void);
-extern void rcu_irq_exit(void);
 extern void rcu_nmi_enter(void);
 extern void rcu_nmi_exit(void);
 #endif
-#else
-# define rcu_irq_enter() do { } while (0)
-# define rcu_irq_exit() do { } while (0)
-# define rcu_nmi_enter() do { } while (0)
-# define rcu_nmi_exit() do { } while (0)
-#endif /* #if defined(CONFIG_NO_HZ) */
 
 /*
  * It is safe to do non-atomic ops on ->hardirq_context,
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2cf4226ade7e..cd1ad4b04c6d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -177,23 +177,10 @@ extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern void rcu_check_callbacks(int cpu, int user);
 struct notifier_block;
-
-#ifdef CONFIG_NO_HZ
-
-extern void rcu_enter_nohz(void);
-extern void rcu_exit_nohz(void);
-
-#else /* #ifdef CONFIG_NO_HZ */
-
-static inline void rcu_enter_nohz(void)
-{
-}
-
-static inline void rcu_exit_nohz(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_NO_HZ */
+extern void rcu_idle_enter(void);
+extern void rcu_idle_exit(void);
+extern void rcu_irq_enter(void);
+extern void rcu_irq_exit(void);
 
 /*
  * Infrastructure to implement the synchronize_() primitives in
diff --git a/include/linux/tick.h b/include/linux/tick.h
index b232ccc0ee29..ca40838fdfb7 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -127,8 +127,15 @@ extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 # else
-static inline void tick_nohz_stop_sched_tick(int inidle) { }
-static inline void tick_nohz_restart_sched_tick(void) { }
+static inline void tick_nohz_stop_sched_tick(int inidle)
+{
+	if (inidle)
+		rcu_idle_enter();
+}
+static inline void tick_nohz_restart_sched_tick(void)
+{
+	rcu_idle_exit();
+}
 static inline ktime_t tick_nohz_get_sleep_length(void)
 {
 	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 669fbd62ec25..e5771804c507 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -246,19 +246,21 @@ TRACE_EVENT(rcu_fqs,
  */
 TRACE_EVENT(rcu_dyntick,
 
-	TP_PROTO(char *polarity),
+	TP_PROTO(char *polarity, int nesting),
 
-	TP_ARGS(polarity),
+	TP_ARGS(polarity, nesting),
 
 	TP_STRUCT__entry(
 		__field(char *, polarity)
+		__field(int, nesting)
 	),
 
 	TP_fast_assign(
 		__entry->polarity = polarity;
+		__entry->nesting = nesting;
 	),
 
-	TP_printk("%s", __entry->polarity)
+	TP_printk("%s %d", __entry->polarity, __entry->nesting)
 );
 
 /*
@@ -443,7 +445,7 @@ TRACE_EVENT(rcu_batch_end,
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
 #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
-#define trace_rcu_dyntick(polarity) do { } while (0)
+#define trace_rcu_dyntick(polarity, nesting) do { } while (0)
 #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
 #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
 #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 636af6d9c6e5..3ab77bdc90c4 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -53,31 +53,122 @@ static void __call_rcu(struct rcu_head *head,
 
 #include "rcutiny_plugin.h"
 
-#ifdef CONFIG_NO_HZ
+static long long rcu_dynticks_nesting = LLONG_MAX / 2;
 
-static long rcu_dynticks_nesting = 1;
+/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
+static void rcu_idle_enter_common(void)
+{
+	if (rcu_dynticks_nesting) {
+		RCU_TRACE(trace_rcu_dyntick("--=", rcu_dynticks_nesting));
+		return;
+	}
+	RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting));
+	if (!idle_cpu(smp_processor_id())) {
+		WARN_ON_ONCE(1);	/* must be idle task! */
+		RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
+					    rcu_dynticks_nesting));
+		ftrace_dump(DUMP_ALL);
+	}
+	rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+}
 
 /*
- * Enter dynticks-idle mode, which is an extended quiescent state
- * if we have fully entered that mode (i.e., if the new value of
- * dynticks_nesting is zero).
+ * Enter idle, which is an extended quiescent state if we have fully
+ * entered that mode (i.e., if the new value of dynticks_nesting is zero).
  */
-void rcu_enter_nohz(void)
+void rcu_idle_enter(void)
 {
-	if (--rcu_dynticks_nesting == 0)
-		rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+	unsigned long flags;
+
+	local_irq_save(flags);
+	rcu_dynticks_nesting = 0;
+	rcu_idle_enter_common();
+	local_irq_restore(flags);
 }
 
 /*
- * Exit dynticks-idle mode, so that we are no longer in an extended
- * quiescent state.
+ * Exit an interrupt handler towards idle.
+ */
+void rcu_irq_exit(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	rcu_dynticks_nesting--;
+	WARN_ON_ONCE(rcu_dynticks_nesting < 0);
+	rcu_idle_enter_common();
+	local_irq_restore(flags);
+}
+
+/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
+static void rcu_idle_exit_common(long long oldval)
+{
+	if (oldval) {
+		RCU_TRACE(trace_rcu_dyntick("++=", rcu_dynticks_nesting));
+		return;
+	}
+	RCU_TRACE(trace_rcu_dyntick("End", oldval));
+	if (!idle_cpu(smp_processor_id())) {
+		WARN_ON_ONCE(1);	/* must be idle task! */
+		RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
+			  oldval));
+		ftrace_dump(DUMP_ALL);
+	}
+}
+
+/*
+ * Exit idle, so that we are no longer in an extended quiescent state.
  */
-void rcu_exit_nohz(void)
+void rcu_idle_exit(void)
 {
+	unsigned long flags;
+	long long oldval;
+
+	local_irq_save(flags);
+	oldval = rcu_dynticks_nesting;
+	WARN_ON_ONCE(oldval != 0);
+	rcu_dynticks_nesting = LLONG_MAX / 2;
+	rcu_idle_exit_common(oldval);
+	local_irq_restore(flags);
+}
+
+/*
+ * Enter an interrupt handler, moving away from idle.
+ */
+void rcu_irq_enter(void)
+{
+	unsigned long flags;
+	long long oldval;
+
+	local_irq_save(flags);
+	oldval = rcu_dynticks_nesting;
 	rcu_dynticks_nesting++;
+	WARN_ON_ONCE(rcu_dynticks_nesting == 0);
+	rcu_idle_exit_common(oldval);
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_PROVE_RCU
+
+/*
+ * Test whether RCU thinks that the current CPU is idle.
+ */
+int rcu_is_cpu_idle(void)
+{
+	return !rcu_dynticks_nesting;
 }
 
-#endif /* #ifdef CONFIG_NO_HZ */
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
+/*
+ * Test whether the current CPU was interrupted from idle.  Nested
+ * interrupts don't count, we must be running at the first interrupt
+ * level.
+ */
+int rcu_is_cpu_rrupt_from_idle(void)
+{
+	return rcu_dynticks_nesting <= 0;
+}
 
 /*
  * Helper function for rcu_sched_qs() and rcu_bh_qs().
@@ -126,14 +217,13 @@ void rcu_bh_qs(int cpu)
 
 /*
  * Check to see if the scheduling-clock interrupt came from an extended
- * quiescent state, and, if so, tell RCU about it.
+ * quiescent state, and, if so, tell RCU about it.  This function must
+ * be called from hardirq context.  It is normally called from the
+ * scheduling-clock interrupt.
  */
 void rcu_check_callbacks(int cpu, int user)
 {
-	if (user ||
-	    (idle_cpu(cpu) &&
-	     !in_softirq() &&
-	     hardirq_count() <= (1 << HARDIRQ_SHIFT)))
+	if (user || rcu_is_cpu_rrupt_from_idle())
 		rcu_sched_qs(cpu);
 	else if (!in_softirq())
 		rcu_bh_qs(cpu);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5d0b55a3a8c0..1c40326724f6 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -195,12 +195,10 @@ void rcu_note_context_switch(int cpu)
 }
 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
-#ifdef CONFIG_NO_HZ
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
-	.dynticks_nesting = 1,
+	.dynticks_nesting = LLONG_MAX / 2,
 	.dynticks = ATOMIC_INIT(1),
 };
-#endif /* #ifdef CONFIG_NO_HZ */
 
 static int blimit = 10;		/* Maximum callbacks per rcu_do_batch. */
 static int qhimark = 10000;	/* If this many pending, ignore blimit. */
@@ -328,11 +326,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
 		return 1;
 	}
 
-	/* If preemptible RCU, no point in sending reschedule IPI. */
-	if (rdp->preemptible)
-		return 0;
-
-	/* The CPU is online, so send it a reschedule IPI. */
+	/*
+	 * The CPU is online, so send it a reschedule IPI.  This forces
+	 * it through the scheduler, and (inefficiently) also handles cases
+	 * where idle loops fail to inform RCU about the CPU being idle.
+	 */
 	if (rdp->cpu != smp_processor_id())
 		smp_send_reschedule(rdp->cpu);
 	else
@@ -343,51 +341,97 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
 
 #endif /* #ifdef CONFIG_SMP */
 
-#ifdef CONFIG_NO_HZ
+/*
+ * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
+ *
+ * If the new value of the ->dynticks_nesting counter now is zero,
+ * we really have entered idle, and must do the appropriate accounting.
+ * The caller must have disabled interrupts.
+ */
+static void rcu_idle_enter_common(struct rcu_dynticks *rdtp)
+{
+	if (rdtp->dynticks_nesting) {
+		trace_rcu_dyntick("--=", rdtp->dynticks_nesting);
+		return;
+	}
+	trace_rcu_dyntick("Start", rdtp->dynticks_nesting);
+	if (!idle_cpu(smp_processor_id())) {
+		WARN_ON_ONCE(1);	/* must be idle task! */
+		trace_rcu_dyntick("Error on entry: not idle task",
+				   rdtp->dynticks_nesting);
+		ftrace_dump(DUMP_ALL);
+	}
+	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+	smp_mb__before_atomic_inc();  /* See above. */
+	atomic_inc(&rdtp->dynticks);
+	smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */
+	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+}
 
 /**
- * rcu_enter_nohz - inform RCU that current CPU is entering nohz
+ * rcu_idle_enter - inform RCU that current CPU is entering idle
  *
- * Enter nohz mode, in other words, -leave- the mode in which RCU
+ * Enter idle mode, in other words, -leave- the mode in which RCU
  * read-side critical sections can occur.  (Though RCU read-side
- * critical sections can occur in irq handlers in nohz mode, a possibility
- * handled by rcu_irq_enter() and rcu_irq_exit()).
+ * critical sections can occur in irq handlers in idle, a possibility
+ * handled by irq_enter() and irq_exit().)
+ *
+ * We crowbar the ->dynticks_nesting field to zero to allow for
+ * the possibility of usermode upcalls having messed up our count
+ * of interrupt nesting level during the prior busy period.
  */
-void rcu_enter_nohz(void)
+void rcu_idle_enter(void)
 {
 	unsigned long flags;
 	struct rcu_dynticks *rdtp;
 
 	local_irq_save(flags);
 	rdtp = &__get_cpu_var(rcu_dynticks);
-	if (--rdtp->dynticks_nesting) {
-		local_irq_restore(flags);
-		return;
-	}
-	trace_rcu_dyntick("Start");
-	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
-	smp_mb__before_atomic_inc();  /* See above. */
-	atomic_inc(&rdtp->dynticks);
-	smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */
-	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+	rdtp->dynticks_nesting = 0;
+	rcu_idle_enter_common(rdtp);
 	local_irq_restore(flags);
 }
 
-/*
- * rcu_exit_nohz - inform RCU that current CPU is leaving nohz
+/**
+ * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
+ *
+ * Exit from an interrupt handler, which might possibly result in entering
+ * idle mode, in other words, leaving the mode in which read-side critical
+ * sections can occur.
  *
- * Exit nohz mode, in other words, -enter- the mode in which RCU
- * read-side critical sections normally occur.
+ * This code assumes that the idle loop never does anything that might
+ * result in unbalanced calls to irq_enter() and irq_exit().  If your
+ * architecture violates this assumption, RCU will give you what you
+ * deserve, good and hard.  But very infrequently and irreproducibly.
+ *
+ * Use things like work queues to work around this limitation.
+ *
+ * You have been warned.
  */
-void rcu_exit_nohz(void)
+void rcu_irq_exit(void)
 {
 	unsigned long flags;
 	struct rcu_dynticks *rdtp;
 
 	local_irq_save(flags);
 	rdtp = &__get_cpu_var(rcu_dynticks);
-	if (rdtp->dynticks_nesting++) {
-		local_irq_restore(flags);
+	rdtp->dynticks_nesting--;
+	WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
+	rcu_idle_enter_common(rdtp);
+	local_irq_restore(flags);
+}
+
+/*
+ * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
+ *
+ * If the new value of the ->dynticks_nesting counter was previously zero,
+ * we really have exited idle, and must do the appropriate accounting.
+ * The caller must have disabled interrupts.
+ */
+static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
+{
+	if (oldval) {
+		trace_rcu_dyntick("++=", rdtp->dynticks_nesting);
 		return;
 	}
 	smp_mb__before_atomic_inc();  /* Force ordering w/previous sojourn. */
@@ -395,7 +439,71 @@ void rcu_exit_nohz(void)
 	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
 	smp_mb__after_atomic_inc();  /* See above. */
 	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
-	trace_rcu_dyntick("End");
+	trace_rcu_dyntick("End", oldval);
+	if (!idle_cpu(smp_processor_id())) {
+		WARN_ON_ONCE(1);	/* must be idle task! */
+		trace_rcu_dyntick("Error on exit: not idle task", oldval);
+		ftrace_dump(DUMP_ALL);
+	}
+}
+
+/**
+ * rcu_idle_exit - inform RCU that current CPU is leaving idle
+ *
+ * Exit idle mode, in other words, -enter- the mode in which RCU
+ * read-side critical sections can occur.
+ *
+ * We crowbar the ->dynticks_nesting field to LLONG_MAX/2 to allow for
+ * the possibility of usermode upcalls messing up our count
+ * of interrupt nesting level during the busy period that is just
+ * now starting.
+ */
+void rcu_idle_exit(void)
+{
+	unsigned long flags;
+	struct rcu_dynticks *rdtp;
+	long long oldval;
+
+	local_irq_save(flags);
+	rdtp = &__get_cpu_var(rcu_dynticks);
+	oldval = rdtp->dynticks_nesting;
+	WARN_ON_ONCE(oldval != 0);
+	rdtp->dynticks_nesting = LLONG_MAX / 2;
+	rcu_idle_exit_common(rdtp, oldval);
+	local_irq_restore(flags);
+}
+
+/**
+ * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
+ *
+ * Enter an interrupt handler, which might possibly result in exiting
+ * idle mode, in other words, entering the mode in which read-side critical
+ * sections can occur.
+ *
+ * Note that the Linux kernel is fully capable of entering an interrupt
+ * handler that it never exits, for example when doing upcalls to
+ * user mode!  This code assumes that the idle loop never does upcalls to
+ * user mode.  If your architecture does do upcalls from the idle loop (or
+ * does anything else that results in unbalanced calls to the irq_enter()
+ * and irq_exit() functions), RCU will give you what you deserve, good
+ * and hard.  But very infrequently and irreproducibly.
+ *
+ * Use things like work queues to work around this limitation.
+ *
+ * You have been warned.
+ */
+void rcu_irq_enter(void)
+{
+	unsigned long flags;
+	struct rcu_dynticks *rdtp;
+	long long oldval;
+
+	local_irq_save(flags);
+	rdtp = &__get_cpu_var(rcu_dynticks);
+	oldval = rdtp->dynticks_nesting;
+	rdtp->dynticks_nesting++;
+	WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
+	rcu_idle_exit_common(rdtp, oldval);
 	local_irq_restore(flags);
 }
 
@@ -442,27 +550,32 @@ void rcu_nmi_exit(void)
 	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
 }
 
+#ifdef CONFIG_PROVE_RCU
+
 /**
- * rcu_irq_enter - inform RCU of entry to hard irq context
+ * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
  *
- * If the CPU was idle with dynamic ticks active, this updates the
- * rdtp->dynticks to let the RCU handling know that the CPU is active.
+ * If the current CPU is in its idle loop and is neither in an interrupt
+ * or NMI handler, return true.  The caller must have at least disabled
+ * preemption.
  */
-void rcu_irq_enter(void)
+int rcu_is_cpu_idle(void)
 {
-	rcu_exit_nohz();
+	return (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
 }
 
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
 /**
- * rcu_irq_exit - inform RCU of exit from hard irq context
+ * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
  *
- * If the CPU was idle with dynamic ticks active, update the rdp->dynticks
- * to put let the RCU handling be aware that the CPU is going back to idle
- * with no ticks.
+ * If the current CPU is idle or running at a first-level (not nested)
+ * interrupt from idle, return true.  The caller must have at least
+ * disabled preemption.
  */
-void rcu_irq_exit(void)
+int rcu_is_cpu_rrupt_from_idle(void)
 {
-	rcu_enter_nohz();
+	return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
 }
 
 #ifdef CONFIG_SMP
@@ -512,24 +625,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 
 #endif /* #ifdef CONFIG_SMP */
 
-#else /* #ifdef CONFIG_NO_HZ */
-
-#ifdef CONFIG_SMP
-
-static int dyntick_save_progress_counter(struct rcu_data *rdp)
-{
-	return 0;
-}
-
-static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
-{
-	return rcu_implicit_offline_qs(rdp);
-}
-
-#endif /* #ifdef CONFIG_SMP */
-
-#endif /* #else #ifdef CONFIG_NO_HZ */
-
 int rcu_cpu_stall_suppress __read_mostly;
 
 static void record_gp_stall_check_time(struct rcu_state *rsp)
@@ -1334,16 +1429,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
  * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
  * Also schedule RCU core processing.
  *
- * This function must be called with hardirqs disabled.  It is normally
+ * This function must be called from hardirq context.  It is normally
  * invoked from the scheduling-clock interrupt.  If rcu_pending returns
  * false, there is no point in invoking rcu_check_callbacks().
  */
 void rcu_check_callbacks(int cpu, int user)
 {
 	trace_rcu_utilization("Start scheduler-tick");
-	if (user ||
-	    (idle_cpu(cpu) && rcu_scheduler_active &&
-	     !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+	if (user || rcu_is_cpu_rrupt_from_idle()) {
 
 		/*
 		 * Get here if this CPU took its interrupt from user
@@ -1913,9 +2006,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 	for (i = 0; i < RCU_NEXT_SIZE; i++)
 		rdp->nxttail[i] = &rdp->nxtlist;
 	rdp->qlen = 0;
-#ifdef CONFIG_NO_HZ
 	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
-#endif /* #ifdef CONFIG_NO_HZ */
+	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2);
+	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
 	rdp->cpu = cpu;
 	rdp->rsp = rsp;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1942,6 +2035,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
 	rdp->qlen_last_fqs_check = 0;
 	rdp->n_force_qs_snap = rsp->n_force_qs;
 	rdp->blimit = blimit;
+	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2);
+	WARN_ON_ONCE((atomic_read(&rdp->dynticks->dynticks) & 0x1) != 1);
 	raw_spin_unlock(&rnp->lock);		/* irqs remain disabled. */
 
 	/*
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 517f2f89a293..0963fa1541ac 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,9 +84,10 @@
  * Dynticks per-CPU state.
  */
 struct rcu_dynticks {
-	int dynticks_nesting;	/* Track irq/process nesting level. */
-	int dynticks_nmi_nesting; /* Track NMI nesting level. */
-	atomic_t dynticks;	/* Even value for dynticks-idle, else odd. */
+	long long dynticks_nesting; /* Track irq/process nesting level. */
+				    /* Process level is worth LLONG_MAX/2. */
+	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
+	atomic_t dynticks;	    /* Even value for idle, else odd. */
 };
 
 /* RCU's kthread states for tracing. */
@@ -274,16 +275,12 @@ struct rcu_data {
 					/* did other CPU force QS recently? */
 	long		blimit;		/* Upper limit on a processed batch */
 
-#ifdef CONFIG_NO_HZ
 	/* 3) dynticks interface. */
 	struct rcu_dynticks *dynticks;	/* Shared per-CPU dynticks state. */
 	int dynticks_snap;		/* Per-GP tracking for dynticks. */
-#endif /* #ifdef CONFIG_NO_HZ */
 
 	/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
-#ifdef CONFIG_NO_HZ
 	unsigned long dynticks_fqs;	/* Kicked due to dynticks idle. */
-#endif /* #ifdef CONFIG_NO_HZ */
 	unsigned long offline_fqs;	/* Kicked due to being offline. */
 	unsigned long resched_ipi;	/* Sent a resched IPI. */
 
@@ -307,11 +304,7 @@ struct rcu_data {
 #define RCU_GP_INIT		1	/* Grace period being initialized. */
 #define RCU_SAVE_DYNTICK	2	/* Need to scan dyntick state. */
 #define RCU_FORCE_QS		3	/* Need to force quiescent state. */
-#ifdef CONFIG_NO_HZ
 #define RCU_SIGNAL_INIT		RCU_SAVE_DYNTICK
-#else /* #ifdef CONFIG_NO_HZ */
-#define RCU_SIGNAL_INIT		RCU_FORCE_QS
-#endif /* #else #ifdef CONFIG_NO_HZ */
 
 #define RCU_JIFFIES_TILL_FORCE_QS	 3	/* for rsp->jiffies_force_qs */
 
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 59c7bee4ce0f..654cfe67f0d1 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -67,13 +67,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 		   rdp->completed, rdp->gpnum,
 		   rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
 		   rdp->qs_pending);
-#ifdef CONFIG_NO_HZ
-	seq_printf(m, " dt=%d/%d/%d df=%lu",
+	seq_printf(m, " dt=%d/%llx/%d df=%lu",
 		   atomic_read(&rdp->dynticks->dynticks),
 		   rdp->dynticks->dynticks_nesting,
 		   rdp->dynticks->dynticks_nmi_nesting,
 		   rdp->dynticks_fqs);
-#endif /* #ifdef CONFIG_NO_HZ */
 	seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
 	seq_printf(m, " ql=%ld qs=%c%c%c%c",
 		   rdp->qlen,
@@ -141,13 +139,11 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
 		   rdp->completed, rdp->gpnum,
 		   rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
 		   rdp->qs_pending);
-#ifdef CONFIG_NO_HZ
-	seq_printf(m, ",%d,%d,%d,%lu",
+	seq_printf(m, ",%d,%llx,%d,%lu",
 		   atomic_read(&rdp->dynticks->dynticks),
 		   rdp->dynticks->dynticks_nesting,
 		   rdp->dynticks->dynticks_nmi_nesting,
 		   rdp->dynticks_fqs);
-#endif /* #ifdef CONFIG_NO_HZ */
 	seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
 	seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen,
 		   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
@@ -171,9 +167,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
 static int show_rcudata_csv(struct seq_file *m, void *unused)
 {
 	seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
-#ifdef CONFIG_NO_HZ
 	seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
-#endif /* #ifdef CONFIG_NO_HZ */
 	seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\"");
 #ifdef CONFIG_RCU_BOOST
 	seq_puts(m, "\"kt\",\"ktl\"");
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 40420644d0ba..5d9d23665f12 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -434,7 +434,6 @@ void tick_nohz_stop_sched_tick(int inidle)
 			ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
 			ts->tick_stopped = 1;
 			ts->idle_jiffies = last_jiffies;
-			rcu_enter_nohz();
 		}
 
 		ts->idle_sleeps++;
@@ -473,6 +472,8 @@ out:
 	ts->last_jiffies = last_jiffies;
 	ts->sleep_length = ktime_sub(dev->next_event, now);
 end:
+	if (inidle)
+		rcu_idle_enter();
 	local_irq_restore(flags);
 }
 
@@ -529,6 +530,7 @@ void tick_nohz_restart_sched_tick(void)
 	ktime_t now;
 
 	local_irq_disable();
+	rcu_idle_exit();
 	if (ts->idle_active || (ts->inidle && ts->tick_stopped))
 		now = ktime_get();
 
@@ -543,8 +545,6 @@ void tick_nohz_restart_sched_tick(void)
 
 	ts->inidle = 0;
 
-	rcu_exit_nohz();
-
 	/* Update jiffies first */
 	select_nohz_load_balancer(0);
 	tick_do_update_jiffies64(now);
-- 
cgit v1.2.3


From 91afaf300269aa99a4d646969b3258b74294ac4d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Sun, 2 Oct 2011 07:44:32 -0700
Subject: rcu: Add failure tracing to rcutorture

Trace the rcutorture RCU accesses and dump the trace buffer when the
first failure is detected.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h   |  8 ++++++++
 include/trace/events/rcu.h | 26 ++++++++++++++++++++++++++
 kernel/rcupdate.c          | 10 ++++++++++
 kernel/rcutorture.c        | 18 ++++++++++++++++++
 4 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index cd1ad4b04c6d..8d315b013e37 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -51,6 +51,8 @@ extern int rcutorture_runnable; /* for sysctl */
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 extern void rcutorture_record_test_transition(void);
 extern void rcutorture_record_progress(unsigned long vernum);
+extern void do_trace_rcu_torture_read(char *rcutorturename,
+				      struct rcu_head *rhp);
 #else
 static inline void rcutorture_record_test_transition(void)
 {
@@ -58,6 +60,12 @@ static inline void rcutorture_record_test_transition(void)
 static inline void rcutorture_record_progress(unsigned long vernum)
 {
 }
+#ifdef CONFIG_RCU_TRACE
+extern void do_trace_rcu_torture_read(char *rcutorturename,
+				      struct rcu_head *rhp);
+#else
+#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#endif
 #endif
 
 #define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index e5771804c507..172620a92b1a 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -437,6 +437,31 @@ TRACE_EVENT(rcu_batch_end,
 		  __entry->rcuname, __entry->callbacks_invoked)
 );
 
+/*
+ * Tracepoint for rcutorture readers.  The first argument is the name
+ * of the RCU flavor from rcutorture's viewpoint and the second argument
+ * is the callback address.
+ */
+TRACE_EVENT(rcu_torture_read,
+
+	TP_PROTO(char *rcutorturename, struct rcu_head *rhp),
+
+	TP_ARGS(rcutorturename, rhp),
+
+	TP_STRUCT__entry(
+		__field(char *, rcutorturename)
+		__field(struct rcu_head *, rhp)
+	),
+
+	TP_fast_assign(
+		__entry->rcutorturename = rcutorturename;
+		__entry->rhp = rhp;
+	),
+
+	TP_printk("%s torture read %p",
+		  __entry->rcutorturename, __entry->rhp)
+);
+
 #else /* #ifdef CONFIG_RCU_TRACE */
 
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
@@ -452,6 +477,7 @@ TRACE_EVENT(rcu_batch_end,
 #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
 #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
 #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0)
+#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
 
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
 
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c5b98e565aee..92e771d7b44b 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -316,3 +316,13 @@ struct debug_obj_descr rcuhead_debug_descr = {
 };
 EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
+void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp)
+{
+	trace_rcu_torture_read(rcutorturename, rhp);
+}
+EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
+#else
+#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#endif
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 764825c2685c..df35228e743b 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -913,6 +913,18 @@ rcu_torture_fakewriter(void *arg)
 	return 0;
 }
 
+void rcutorture_trace_dump(void)
+{
+	static atomic_t beenhere = ATOMIC_INIT(0);
+
+	if (atomic_read(&beenhere))
+		return;
+	if (atomic_xchg(&beenhere, 1) != 0)
+		return;
+	do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
+	ftrace_dump(DUMP_ALL);
+}
+
 /*
  * RCU torture reader from timer handler.  Dereferences rcu_torture_current,
  * incrementing the corresponding element of the pipeline array.  The
@@ -934,6 +946,7 @@ static void rcu_torture_timer(unsigned long unused)
 				  rcu_read_lock_bh_held() ||
 				  rcu_read_lock_sched_held() ||
 				  srcu_read_lock_held(&srcu_ctl));
+	do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
 	if (p == NULL) {
 		/* Leave because rcu_torture_writer is not yet underway */
 		cur_ops->readunlock(idx);
@@ -951,6 +964,8 @@ static void rcu_torture_timer(unsigned long unused)
 		/* Should not happen, but... */
 		pipe_count = RCU_TORTURE_PIPE_LEN;
 	}
+	if (pipe_count > 1)
+		rcutorture_trace_dump();
 	__this_cpu_inc(rcu_torture_count[pipe_count]);
 	completed = cur_ops->completed() - completed;
 	if (completed > RCU_TORTURE_PIPE_LEN) {
@@ -994,6 +1009,7 @@ rcu_torture_reader(void *arg)
 					  rcu_read_lock_bh_held() ||
 					  rcu_read_lock_sched_held() ||
 					  srcu_read_lock_held(&srcu_ctl));
+		do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
 		if (p == NULL) {
 			/* Wait for rcu_torture_writer to get underway */
 			cur_ops->readunlock(idx);
@@ -1009,6 +1025,8 @@ rcu_torture_reader(void *arg)
 			/* Should not happen, but... */
 			pipe_count = RCU_TORTURE_PIPE_LEN;
 		}
+		if (pipe_count > 1)
+			rcutorture_trace_dump();
 		__this_cpu_inc(rcu_torture_count[pipe_count]);
 		completed = cur_ops->completed() - completed;
 		if (completed > RCU_TORTURE_PIPE_LEN) {
-- 
cgit v1.2.3


From e6b80a3b0994ea6c3d876d72464f2debbfcfeb05 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 7 Oct 2011 16:25:18 -0700
Subject: rcu: Detect illegal rcu dereference in extended quiescent state

Report that none of the rcu read lock maps are held while in an RCU
extended quiescent state (the section between rcu_idle_enter()
and rcu_idle_exit()). This helps detect any use of rcu_dereference()
and friends from within the section in idle where RCU is not allowed.

This way we can guarantee an extended quiescent window where the CPU
can be put in dyntick idle mode or can simply aoid to be part of any
global grace period completion while in the idle loop.

Uses of RCU from such mode are totally ignored by RCU, hence the
importance of these checks.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 26 ++++++++++++++++++++++++++
 kernel/rcupdate.c        |  2 ++
 kernel/rcutiny.c         |  1 +
 kernel/rcutree.c         |  1 +
 4 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8d315b013e37..bf91fcfe181c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -228,6 +228,15 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
+#ifdef CONFIG_PROVE_RCU
+extern int rcu_is_cpu_idle(void);
+#else /* !CONFIG_PROVE_RCU */
+static inline int rcu_is_cpu_idle(void)
+{
+	return 0;
+}
+#endif /* else !CONFIG_PROVE_RCU */
+
 extern struct lockdep_map rcu_lock_map;
 # define rcu_read_acquire() \
 		lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
@@ -262,6 +271,8 @@ static inline int rcu_read_lock_held(void)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
+	if (rcu_is_cpu_idle())
+		return 0;
 	return lock_is_held(&rcu_lock_map);
 }
 
@@ -285,6 +296,19 @@ extern int rcu_read_lock_bh_held(void);
  *
  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
+ *
+ * Note that if the CPU is in the idle loop from an RCU point of
+ * view (ie: that we are in the section between rcu_idle_enter() and
+ * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
+ * did an rcu_read_lock().  The reason for this is that RCU ignores CPUs
+ * that are in such a section, considering these as in extended quiescent
+ * state, so such a CPU is effectively never in an RCU read-side critical
+ * section regardless of what RCU primitives it invokes.  This state of
+ * affairs is required --- we need to keep an RCU-free window in idle
+ * where the CPU may possibly enter into low power mode. This way we can
+ * notice an extended quiescent state to other CPUs that started a grace
+ * period. Otherwise we would delay any grace period as long as we run in
+ * the idle task.
  */
 #ifdef CONFIG_PREEMPT_COUNT
 static inline int rcu_read_lock_sched_held(void)
@@ -293,6 +317,8 @@ static inline int rcu_read_lock_sched_held(void)
 
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
+	if (rcu_is_cpu_idle())
+		return 0;
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
 	return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 92e771d7b44b..2bc4e135ff23 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -93,6 +93,8 @@ int rcu_read_lock_bh_held(void)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
+	if (rcu_is_cpu_idle())
+		return 0;
 	return in_softirq() || irqs_disabled();
 }
 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 3ab77bdc90c4..b4e0b4981768 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -157,6 +157,7 @@ int rcu_is_cpu_idle(void)
 {
 	return !rcu_dynticks_nesting;
 }
+EXPORT_SYMBOL(rcu_is_cpu_idle);
 
 #endif /* #ifdef CONFIG_PROVE_RCU */
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 8afb2e89745b..489b62a67d35 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -567,6 +567,7 @@ int rcu_is_cpu_idle(void)
 	preempt_enable();
 	return ret;
 }
+EXPORT_SYMBOL(rcu_is_cpu_idle);
 
 #endif /* #ifdef CONFIG_PROVE_RCU */
 
-- 
cgit v1.2.3


From 00f49e5729af602deb559b0cf293a00b625e8636 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 7 Oct 2011 18:22:02 +0200
Subject: rcu: Warn when rcu_read_lock() is used in extended quiescent state

We are currently able to detect uses of rcu_dereference_check() inside
extended quiescent states (such as the RCU-free window in idle).
But rcu_read_lock() and friends can be used without rcu_dereference(),
so that the earlier commit checking for use of rcu_dereference() and
friends while in RCU idle mode miss some error conditions.  This commit
therefore adds extended quiescent state checking to rcu_read_lock() and
friends.

Uses of RCU from within RCU-idle mode are totally ignored by
RCU, hence the importance of these checks.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 52 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index bf91fcfe181c..d201c155f70c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -237,21 +237,53 @@ static inline int rcu_is_cpu_idle(void)
 }
 #endif /* else !CONFIG_PROVE_RCU */
 
+static inline void rcu_lock_acquire(struct lockdep_map *map)
+{
+	WARN_ON_ONCE(rcu_is_cpu_idle());
+	lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
+}
+
+static inline void rcu_lock_release(struct lockdep_map *map)
+{
+	WARN_ON_ONCE(rcu_is_cpu_idle());
+	lock_release(map, 1, _THIS_IP_);
+}
+
 extern struct lockdep_map rcu_lock_map;
-# define rcu_read_acquire() \
-		lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define rcu_read_release()	lock_release(&rcu_lock_map, 1, _THIS_IP_)
+
+static inline void rcu_read_acquire(void)
+{
+	rcu_lock_acquire(&rcu_lock_map);
+}
+
+static inline void rcu_read_release(void)
+{
+	rcu_lock_release(&rcu_lock_map);
+}
 
 extern struct lockdep_map rcu_bh_lock_map;
-# define rcu_read_acquire_bh() \
-		lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define rcu_read_release_bh()	lock_release(&rcu_bh_lock_map, 1, _THIS_IP_)
+
+static inline void rcu_read_acquire_bh(void)
+{
+	rcu_lock_acquire(&rcu_bh_lock_map);
+}
+
+static inline void rcu_read_release_bh(void)
+{
+	rcu_lock_release(&rcu_bh_lock_map);
+}
 
 extern struct lockdep_map rcu_sched_lock_map;
-# define rcu_read_acquire_sched() \
-		lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define rcu_read_release_sched() \
-		lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
+
+static inline void rcu_read_acquire_sched(void)
+{
+	rcu_lock_acquire(&rcu_sched_lock_map);
+}
+
+static inline void rcu_read_release_sched(void)
+{
+	rcu_lock_release(&rcu_sched_lock_map);
+}
 
 extern int debug_lockdep_rcu_enabled(void);
 
-- 
cgit v1.2.3


From d8ab29f8be918b34a1ccd174569a53f0eb04b0a5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Fri, 7 Oct 2011 18:22:03 +0200
Subject: rcu: Remove one layer of abstraction from PROVE_RCU checking

Simplify things a bit by substituting the definitions of the single-line
rcu_read_acquire(), rcu_read_release(), rcu_read_acquire_bh(),
rcu_read_release_bh(), rcu_read_acquire_sched(), and
rcu_read_release_sched() functions at their call points.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 53 ++++++++----------------------------------------
 1 file changed, 8 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d201c155f70c..5dd6fd8b3203 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -250,41 +250,8 @@ static inline void rcu_lock_release(struct lockdep_map *map)
 }
 
 extern struct lockdep_map rcu_lock_map;
-
-static inline void rcu_read_acquire(void)
-{
-	rcu_lock_acquire(&rcu_lock_map);
-}
-
-static inline void rcu_read_release(void)
-{
-	rcu_lock_release(&rcu_lock_map);
-}
-
 extern struct lockdep_map rcu_bh_lock_map;
-
-static inline void rcu_read_acquire_bh(void)
-{
-	rcu_lock_acquire(&rcu_bh_lock_map);
-}
-
-static inline void rcu_read_release_bh(void)
-{
-	rcu_lock_release(&rcu_bh_lock_map);
-}
-
 extern struct lockdep_map rcu_sched_lock_map;
-
-static inline void rcu_read_acquire_sched(void)
-{
-	rcu_lock_acquire(&rcu_sched_lock_map);
-}
-
-static inline void rcu_read_release_sched(void)
-{
-	rcu_lock_release(&rcu_sched_lock_map);
-}
-
 extern int debug_lockdep_rcu_enabled(void);
 
 /**
@@ -364,12 +331,8 @@ static inline int rcu_read_lock_sched_held(void)
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-# define rcu_read_acquire()		do { } while (0)
-# define rcu_read_release()		do { } while (0)
-# define rcu_read_acquire_bh()		do { } while (0)
-# define rcu_read_release_bh()		do { } while (0)
-# define rcu_read_acquire_sched()	do { } while (0)
-# define rcu_read_release_sched()	do { } while (0)
+# define rcu_lock_acquire(a)		do { } while (0)
+# define rcu_lock_release(a)		do { } while (0)
 
 static inline int rcu_read_lock_held(void)
 {
@@ -690,7 +653,7 @@ static inline void rcu_read_lock(void)
 {
 	__rcu_read_lock();
 	__acquire(RCU);
-	rcu_read_acquire();
+	rcu_lock_acquire(&rcu_lock_map);
 }
 
 /*
@@ -710,7 +673,7 @@ static inline void rcu_read_lock(void)
  */
 static inline void rcu_read_unlock(void)
 {
-	rcu_read_release();
+	rcu_lock_release(&rcu_lock_map);
 	__release(RCU);
 	__rcu_read_unlock();
 }
@@ -731,7 +694,7 @@ static inline void rcu_read_lock_bh(void)
 {
 	local_bh_disable();
 	__acquire(RCU_BH);
-	rcu_read_acquire_bh();
+	rcu_lock_acquire(&rcu_bh_lock_map);
 }
 
 /*
@@ -741,7 +704,7 @@ static inline void rcu_read_lock_bh(void)
  */
 static inline void rcu_read_unlock_bh(void)
 {
-	rcu_read_release_bh();
+	rcu_lock_release(&rcu_bh_lock_map);
 	__release(RCU_BH);
 	local_bh_enable();
 }
@@ -758,7 +721,7 @@ static inline void rcu_read_lock_sched(void)
 {
 	preempt_disable();
 	__acquire(RCU_SCHED);
-	rcu_read_acquire_sched();
+	rcu_lock_acquire(&rcu_sched_lock_map);
 }
 
 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -775,7 +738,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
  */
 static inline void rcu_read_unlock_sched(void)
 {
-	rcu_read_release_sched();
+	rcu_lock_release(&rcu_sched_lock_map);
 	__release(RCU_SCHED);
 	preempt_enable();
 }
-- 
cgit v1.2.3


From ff195cb69ba8d2af9b891be3a26db95fe1999d43 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Fri, 7 Oct 2011 18:22:04 +0200
Subject: rcu: Warn when srcu_read_lock() is used in an extended quiescent
 state

Catch SRCU up to the other variants of RCU by making PROVE_RCU
complain if either srcu_read_lock() or srcu_read_lock_held() are
used from within RCU-idle mode.

Frederic reworked this to allow for the new versions of his patches
that check for extended quiescent states.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/srcu.h | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 58971e891f48..4e0a3d41dae3 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -28,6 +28,7 @@
 #define _LINUX_SRCU_H
 
 #include <linux/mutex.h>
+#include <linux/rcupdate.h>
 
 struct srcu_struct_array {
 	int c[2];
@@ -60,18 +61,10 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
 	__init_srcu_struct((sp), #sp, &__srcu_key); \
 })
 
-# define srcu_read_acquire(sp) \
-		lock_acquire(&(sp)->dep_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define srcu_read_release(sp) \
-		lock_release(&(sp)->dep_map, 1, _THIS_IP_)
-
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 int init_srcu_struct(struct srcu_struct *sp);
 
-# define srcu_read_acquire(sp)  do { } while (0)
-# define srcu_read_release(sp)  do { } while (0)
-
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 void cleanup_srcu_struct(struct srcu_struct *sp);
@@ -90,12 +83,29 @@ long srcu_batches_completed(struct srcu_struct *sp);
  * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
  * this assumes we are in an SRCU read-side critical section unless it can
  * prove otherwise.
+ *
+ * Note that if the CPU is in the idle loop from an RCU point of view
+ * (ie: that we are in the section between rcu_idle_enter() and
+ * rcu_idle_exit()) then srcu_read_lock_held() returns false even if
+ * the CPU did an srcu_read_lock().  The reason for this is that RCU
+ * ignores CPUs that are in such a section, considering these as in
+ * extended quiescent state, so such a CPU is effectively never in an
+ * RCU read-side critical section regardless of what RCU primitives it
+ * invokes.  This state of affairs is required --- we need to keep an
+ * RCU-free window in idle where the CPU may possibly enter into low
+ * power mode. This way we can notice an extended quiescent state to
+ * other CPUs that started a grace period. Otherwise we would delay any
+ * grace period as long as we run in the idle task.
  */
 static inline int srcu_read_lock_held(struct srcu_struct *sp)
 {
-	if (debug_locks)
-		return lock_is_held(&sp->dep_map);
-	return 1;
+	if (rcu_is_cpu_idle())
+		return 0;
+
+	if (!debug_locks)
+		return 1;
+
+	return lock_is_held(&sp->dep_map);
 }
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -150,7 +160,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
 	int retval = __srcu_read_lock(sp);
 
-	srcu_read_acquire(sp);
+	rcu_lock_acquire(&(sp)->dep_map);
 	return retval;
 }
 
@@ -164,7 +174,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__releases(sp)
 {
-	srcu_read_release(sp);
+	rcu_lock_release(&(sp)->dep_map);
 	__srcu_read_unlock(sp, idx);
 }
 
-- 
cgit v1.2.3


From 867f236bd12f5091df6dc7cc75f94d7fd982d78a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Fri, 7 Oct 2011 18:22:05 +0200
Subject: rcu: Make srcu_read_lock_held() call common lockdep-enabled function

A common debug_lockdep_rcu_enabled() function is used to check whether
RCU lockdep splats should be reported, but srcu_read_lock() does not
use it.  This commit therefore brings srcu_read_lock_held() up to date.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/srcu.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4e0a3d41dae3..d4b12443b2ef 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -84,6 +84,9 @@ long srcu_batches_completed(struct srcu_struct *sp);
  * this assumes we are in an SRCU read-side critical section unless it can
  * prove otherwise.
  *
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
+ *
  * Note that if the CPU is in the idle loop from an RCU point of view
  * (ie: that we are in the section between rcu_idle_enter() and
  * rcu_idle_exit()) then srcu_read_lock_held() returns false even if
@@ -102,7 +105,7 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
 	if (rcu_is_cpu_idle())
 		return 0;
 
-	if (!debug_locks)
+	if (!debug_lockdep_rcu_enabled())
 		return 1;
 
 	return lock_is_held(&sp->dep_map);
-- 
cgit v1.2.3


From 280f06774afedf849f0b34248ed6aff57d0f6908 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 7 Oct 2011 18:22:06 +0200
Subject: nohz: Separate out irq exit and idle loop dyntick logic

The tick_nohz_stop_sched_tick() function, which tries to delay
the next timer tick as long as possible, can be called from two
places:

- From the idle loop to start the dytick idle mode
- From interrupt exit if we have interrupted the dyntick
idle mode, so that we reprogram the next tick event in
case the irq changed some internal state that requires this
action.

There are only few minor differences between both that
are handled by that function, driven by the ts->inidle
cpu variable and the inidle parameter. The whole guarantees
that we only update the dyntick mode on irq exit if we actually
interrupted the dyntick idle mode, and that we enter in RCU extended
quiescent state from idle loop entry only.

Split this function into:

- tick_nohz_idle_enter(), which sets ts->inidle to 1, enters
dynticks idle mode unconditionally if it can, and enters into RCU
extended quiescent state.

- tick_nohz_irq_exit() which only updates the dynticks idle mode
when ts->inidle is set (ie: if tick_nohz_idle_enter() has been called).

To maintain symmetry, tick_nohz_restart_sched_tick() has been renamed
into tick_nohz_idle_exit().

This simplifies the code and micro-optimize the irq exit path (no need
for local_irq_save there). This also prepares for the split between
dynticks and rcu extended quiescent state logics. We'll need this split to
further fix illegal uses of RCU in extended quiescent states in the idle
loop.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: David Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/arm/kernel/process.c              |  4 +-
 arch/avr32/kernel/process.c            |  4 +-
 arch/blackfin/kernel/process.c         |  4 +-
 arch/microblaze/kernel/process.c       |  4 +-
 arch/mips/kernel/process.c             |  4 +-
 arch/openrisc/kernel/idle.c            |  4 +-
 arch/powerpc/kernel/idle.c             |  4 +-
 arch/powerpc/platforms/iseries/setup.c |  8 +--
 arch/s390/kernel/process.c             |  4 +-
 arch/sh/kernel/idle.c                  |  4 +-
 arch/sparc/kernel/process_64.c         |  4 +-
 arch/tile/kernel/process.c             |  4 +-
 arch/um/kernel/process.c               |  4 +-
 arch/unicore32/kernel/process.c        |  4 +-
 arch/x86/kernel/process_32.c           |  4 +-
 arch/x86/kernel/process_64.c           |  4 +-
 include/linux/tick.h                   | 13 ++---
 kernel/softirq.c                       |  2 +-
 kernel/time/tick-sched.c               | 93 +++++++++++++++++++++-------------
 19 files changed, 99 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 3d0c6fb74ae4..3f1f8daf703c 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -183,7 +183,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		leds_event(led_idle_start);
 		while (!need_resched()) {
 #ifdef CONFIG_HOTPLUG_CPU
@@ -213,7 +213,7 @@ void cpu_idle(void)
 			}
 		}
 		leds_event(led_idle_end);
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index ef5a2a08fcca..6ee7952248db 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -34,10 +34,10 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched())
 			cpu_idle_sleep();
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 6a80a9e9fc4a..7b141b5c9e8d 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -88,10 +88,10 @@ void cpu_idle(void)
 #endif
 		if (!idle)
 			idle = default_idle;
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched())
 			idle();
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 95cc295976a7..5407f09b4be4 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -103,10 +103,10 @@ void cpu_idle(void)
 		if (!idle)
 			idle = default_idle;
 
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched())
 			idle();
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 
 		preempt_enable_no_resched();
 		schedule();
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index c47f96e453c0..c11e5ca2a434 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -56,7 +56,7 @@ void __noreturn cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched() && cpu_online(cpu)) {
 #ifdef CONFIG_MIPS_MT_SMTC
 			extern void smtc_idle_loop_hook(void);
@@ -77,7 +77,7 @@ void __noreturn cpu_idle(void)
 		     system_state == SYSTEM_BOOTING))
 			play_dead();
 #endif
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c
index d5bc5f813e89..fb6a9bf40006 100644
--- a/arch/openrisc/kernel/idle.c
+++ b/arch/openrisc/kernel/idle.c
@@ -51,7 +51,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 
 		while (!need_resched()) {
 			check_pgt_cache();
@@ -69,7 +69,7 @@ void cpu_idle(void)
 			set_thread_flag(TIF_POLLING_NRFLAG);
 		}
 
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 39a2baa6ad58..878572f70ac5 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -56,7 +56,7 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
@@ -93,7 +93,7 @@ void cpu_idle(void)
 
 		HMT_medium();
 		ppc64_runlatch_on();
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		if (cpu_should_die())
 			cpu_die();
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index ea0acbd8966d..e83dfaf89f69 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -563,7 +563,7 @@ static void yield_shared_processor(void)
 static void iseries_shared_idle(void)
 {
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched() && !hvlpevent_is_pending()) {
 			local_irq_disable();
 			ppc64_runlatch_off();
@@ -577,7 +577,7 @@ static void iseries_shared_idle(void)
 		}
 
 		ppc64_runlatch_on();
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 
 		if (hvlpevent_is_pending())
 			process_iSeries_events();
@@ -593,7 +593,7 @@ static void iseries_dedicated_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		if (!need_resched()) {
 			while (!need_resched()) {
 				ppc64_runlatch_off();
@@ -610,7 +610,7 @@ static void iseries_dedicated_idle(void)
 		}
 
 		ppc64_runlatch_on();
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 9451b210a1b4..6224f9dbbc1f 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -91,10 +91,10 @@ static void default_idle(void)
 void cpu_idle(void)
 {
 	for (;;) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched())
 			default_idle();
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index db4ecd731a00..6015743020a0 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -89,7 +89,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 
 		while (!need_resched()) {
 			check_pgt_cache();
@@ -111,7 +111,7 @@ void cpu_idle(void)
 			start_critical_timings();
 		}
 
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 3739a06a76cb..9c2795ba2cfe 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -95,12 +95,12 @@ void cpu_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while(1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 
 		while (!need_resched() && !cpu_is_offline(cpu))
 			sparc64_yield(cpu);
 
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 
 		preempt_enable_no_resched();
 
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 9c45d8bbdf57..920e674aedb9 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -85,7 +85,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched()) {
 			if (cpu_is_offline(cpu))
 				BUG();  /* no HOTPLUG_CPU */
@@ -105,7 +105,7 @@ void cpu_idle(void)
 				local_irq_enable();
 			current_thread_info()->status |= TS_POLLING;
 		}
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5338351aecd..cfb657e92849 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -246,10 +246,10 @@ void default_idle(void)
 		if (need_resched())
 			schedule();
 
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		nsecs = disable_timer();
 		idle_sleep(nsecs);
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 	}
 }
 
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index ba401df971ed..9999b9a84d46 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -55,7 +55,7 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched()) {
 			local_irq_disable();
 			stop_critical_timings();
@@ -63,7 +63,7 @@ void cpu_idle(void)
 			local_irq_enable();
 			start_critical_timings();
 		}
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 795b79f984c2..6d9d4d52cac5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -99,7 +99,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched()) {
 
 			check_pgt_cache();
@@ -116,7 +116,7 @@ void cpu_idle(void)
 				pm_idle();
 			start_critical_timings();
 		}
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3bd7e6eebf31..b069e9d7875f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -122,7 +122,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched()) {
 
 			rmb();
@@ -149,7 +149,7 @@ void cpu_idle(void)
 			__exit_idle();
 		}
 
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/include/linux/tick.h b/include/linux/tick.h
index ca40838fdfb7..0df1d50a408a 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -121,21 +121,22 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
-extern void tick_nohz_stop_sched_tick(int inidle);
-extern void tick_nohz_restart_sched_tick(void);
+extern void tick_nohz_idle_enter(void);
+extern void tick_nohz_idle_exit(void);
+extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 # else
-static inline void tick_nohz_stop_sched_tick(int inidle)
+static inline void tick_nohz_idle_enter(void)
 {
-	if (inidle)
-		rcu_idle_enter();
+	rcu_idle_enter();
 }
-static inline void tick_nohz_restart_sched_tick(void)
+static inline void tick_nohz_idle_exit(void)
 {
 	rcu_idle_exit();
 }
+
 static inline ktime_t tick_nohz_get_sleep_length(void)
 {
 	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2c71d91efff0..f9f2aa81ce53 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -351,7 +351,7 @@ void irq_exit(void)
 #ifdef CONFIG_NO_HZ
 	/* Make sure that timer wheel updates are propagated */
 	if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
-		tick_nohz_stop_sched_tick(0);
+		tick_nohz_irq_exit();
 #endif
 	preempt_enable_no_resched();
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5d9d23665f12..266c242dc354 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -275,42 +275,17 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
 }
 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
 
-/**
- * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
- *
- * When the next event is more than a tick into the future, stop the idle tick
- * Called either from the idle loop or from irq_exit() when an idle period was
- * just interrupted by an interrupt which did not cause a reschedule.
- */
-void tick_nohz_stop_sched_tick(int inidle)
+static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
 {
-	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
-	struct tick_sched *ts;
+	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
 	ktime_t last_update, expires, now;
 	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
 	u64 time_delta;
 	int cpu;
 
-	local_irq_save(flags);
-
 	cpu = smp_processor_id();
 	ts = &per_cpu(tick_cpu_sched, cpu);
 
-	/*
-	 * Call to tick_nohz_start_idle stops the last_update_time from being
-	 * updated. Thus, it must not be called in the event we are called from
-	 * irq_exit() with the prior state different than idle.
-	 */
-	if (!inidle && !ts->inidle)
-		goto end;
-
-	/*
-	 * Set ts->inidle unconditionally. Even if the system did not
-	 * switch to NOHZ mode the cpu frequency governers rely on the
-	 * update of the idle time accounting in tick_nohz_start_idle().
-	 */
-	ts->inidle = 1;
-
 	now = tick_nohz_start_idle(cpu, ts);
 
 	/*
@@ -326,10 +301,10 @@ void tick_nohz_stop_sched_tick(int inidle)
 	}
 
 	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
-		goto end;
+		return;
 
 	if (need_resched())
-		goto end;
+		return;
 
 	if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
 		static int ratelimit;
@@ -339,7 +314,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 			       (unsigned int) local_softirq_pending());
 			ratelimit++;
 		}
-		goto end;
+		return;
 	}
 
 	ts->idle_calls++;
@@ -471,10 +446,54 @@ out:
 	ts->next_jiffies = next_jiffies;
 	ts->last_jiffies = last_jiffies;
 	ts->sleep_length = ktime_sub(dev->next_event, now);
-end:
-	if (inidle)
-		rcu_idle_enter();
-	local_irq_restore(flags);
+}
+
+/**
+ * tick_nohz_idle_enter - stop the idle tick from the idle task
+ *
+ * When the next event is more than a tick into the future, stop the idle tick
+ * Called when we start the idle loop.
+ * This also enters into RCU extended quiescent state so that this CPU doesn't
+ * need anymore to be part of any global grace period completion. This way
+ * the tick can be stopped safely as we don't need to report quiescent states.
+ */
+void tick_nohz_idle_enter(void)
+{
+	struct tick_sched *ts;
+
+	WARN_ON_ONCE(irqs_disabled());
+
+	local_irq_disable();
+
+	ts = &__get_cpu_var(tick_cpu_sched);
+	/*
+	 * set ts->inidle unconditionally. even if the system did not
+	 * switch to nohz mode the cpu frequency governers rely on the
+	 * update of the idle time accounting in tick_nohz_start_idle().
+	 */
+	ts->inidle = 1;
+	tick_nohz_stop_sched_tick(ts);
+	rcu_idle_enter();
+
+	local_irq_enable();
+}
+
+/**
+ * tick_nohz_irq_exit - update next tick event from interrupt exit
+ *
+ * When an interrupt fires while we are idle and it doesn't cause
+ * a reschedule, it may still add, modify or delete a timer, enqueue
+ * an RCU callback, etc...
+ * So we need to re-calculate and reprogram the next tick event.
+ */
+void tick_nohz_irq_exit(void)
+{
+	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+
+	if (!ts->inidle)
+		return;
+
+	tick_nohz_stop_sched_tick(ts);
 }
 
 /**
@@ -516,11 +535,13 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 }
 
 /**
- * tick_nohz_restart_sched_tick - restart the idle tick from the idle task
+ * tick_nohz_idle_exit - restart the idle tick from the idle task
  *
  * Restart the idle tick when the CPU is woken up from idle
+ * This also exit the RCU extended quiescent state. The CPU
+ * can use RCU again after this function is called.
  */
-void tick_nohz_restart_sched_tick(void)
+void tick_nohz_idle_exit(void)
 {
 	int cpu = smp_processor_id();
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
-- 
cgit v1.2.3


From 2bbb6817c0ac1b5f2a68d720f364f98eeb1ac4fd Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 8 Oct 2011 16:01:00 +0200
Subject: nohz: Allow rcu extended quiescent state handling seperately from
 tick stop

It is assumed that rcu won't be used once we switch to tickless
mode and until we restart the tick. However this is not always
true, as in x86-64 where we dereference the idle notifiers after
the tick is stopped.

To prepare for fixing this, add two new APIs:
tick_nohz_idle_enter_norcu() and tick_nohz_idle_exit_norcu().

If no use of RCU is made in the idle loop between
tick_nohz_enter_idle() and tick_nohz_exit_idle() calls, the arch
must instead call the new *_norcu() version such that the arch doesn't
need to call rcu_idle_enter() and rcu_idle_exit().

Otherwise the arch must call tick_nohz_enter_idle() and
tick_nohz_exit_idle() and also call explicitly:

- rcu_idle_enter() after its last use of RCU before the CPU is put
to sleep.
- rcu_idle_exit() before the first use of RCU after the CPU is woken
up.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: David Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/arm/kernel/process.c              |  4 +--
 arch/avr32/kernel/process.c            |  4 +--
 arch/blackfin/kernel/process.c         |  4 +--
 arch/microblaze/kernel/process.c       |  4 +--
 arch/mips/kernel/process.c             |  4 +--
 arch/openrisc/kernel/idle.c            |  4 +--
 arch/powerpc/kernel/idle.c             |  4 +--
 arch/powerpc/platforms/iseries/setup.c |  8 +++---
 arch/s390/kernel/process.c             |  4 +--
 arch/sh/kernel/idle.c                  |  4 +--
 arch/sparc/kernel/process_64.c         |  4 +--
 arch/tile/kernel/process.c             |  4 +--
 arch/um/kernel/process.c               |  4 +--
 arch/unicore32/kernel/process.c        |  4 +--
 arch/x86/kernel/process_32.c           |  4 +--
 arch/x86/kernel/process_64.c           |  4 +--
 include/linux/tick.h                   | 46 +++++++++++++++++++++++++++++++---
 kernel/time/tick-sched.c               | 25 +++++++++---------
 18 files changed, 90 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 3f1f8daf703c..47e34c091276 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -183,7 +183,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		leds_event(led_idle_start);
 		while (!need_resched()) {
 #ifdef CONFIG_HOTPLUG_CPU
@@ -213,7 +213,7 @@ void cpu_idle(void)
 			}
 		}
 		leds_event(led_idle_end);
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 6ee7952248db..34c8c703bb16 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -34,10 +34,10 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched())
 			cpu_idle_sleep();
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 7b141b5c9e8d..57e07498a0e7 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -88,10 +88,10 @@ void cpu_idle(void)
 #endif
 		if (!idle)
 			idle = default_idle;
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched())
 			idle();
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 5407f09b4be4..13d59f34b94e 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -103,10 +103,10 @@ void cpu_idle(void)
 		if (!idle)
 			idle = default_idle;
 
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched())
 			idle();
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 
 		preempt_enable_no_resched();
 		schedule();
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index c11e5ca2a434..17fb3a270160 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -56,7 +56,7 @@ void __noreturn cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched() && cpu_online(cpu)) {
 #ifdef CONFIG_MIPS_MT_SMTC
 			extern void smtc_idle_loop_hook(void);
@@ -77,7 +77,7 @@ void __noreturn cpu_idle(void)
 		     system_state == SYSTEM_BOOTING))
 			play_dead();
 #endif
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c
index fb6a9bf40006..2e82cd0fa5e1 100644
--- a/arch/openrisc/kernel/idle.c
+++ b/arch/openrisc/kernel/idle.c
@@ -51,7 +51,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 
 		while (!need_resched()) {
 			check_pgt_cache();
@@ -69,7 +69,7 @@ void cpu_idle(void)
 			set_thread_flag(TIF_POLLING_NRFLAG);
 		}
 
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 878572f70ac5..2e782a36d8f2 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -56,7 +56,7 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
@@ -93,7 +93,7 @@ void cpu_idle(void)
 
 		HMT_medium();
 		ppc64_runlatch_on();
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		if (cpu_should_die())
 			cpu_die();
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index e83dfaf89f69..d69d3d185e89 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -563,7 +563,7 @@ static void yield_shared_processor(void)
 static void iseries_shared_idle(void)
 {
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched() && !hvlpevent_is_pending()) {
 			local_irq_disable();
 			ppc64_runlatch_off();
@@ -577,7 +577,7 @@ static void iseries_shared_idle(void)
 		}
 
 		ppc64_runlatch_on();
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 
 		if (hvlpevent_is_pending())
 			process_iSeries_events();
@@ -593,7 +593,7 @@ static void iseries_dedicated_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		if (!need_resched()) {
 			while (!need_resched()) {
 				ppc64_runlatch_off();
@@ -610,7 +610,7 @@ static void iseries_dedicated_idle(void)
 		}
 
 		ppc64_runlatch_on();
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 6224f9dbbc1f..6fa987367ae6 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -91,10 +91,10 @@ static void default_idle(void)
 void cpu_idle(void)
 {
 	for (;;) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched())
 			default_idle();
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 6015743020a0..ad58e7535a7c 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -89,7 +89,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 
 		while (!need_resched()) {
 			check_pgt_cache();
@@ -111,7 +111,7 @@ void cpu_idle(void)
 			start_critical_timings();
 		}
 
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 9c2795ba2cfe..4a0e7d79cb92 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -95,12 +95,12 @@ void cpu_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while(1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 
 		while (!need_resched() && !cpu_is_offline(cpu))
 			sparc64_yield(cpu);
 
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 
 		preempt_enable_no_resched();
 
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 920e674aedb9..53ac89595ab1 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -85,7 +85,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched()) {
 			if (cpu_is_offline(cpu))
 				BUG();  /* no HOTPLUG_CPU */
@@ -105,7 +105,7 @@ void cpu_idle(void)
 				local_irq_enable();
 			current_thread_info()->status |= TS_POLLING;
 		}
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index cfb657e92849..55d2cf455f63 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -246,10 +246,10 @@ void default_idle(void)
 		if (need_resched())
 			schedule();
 
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		nsecs = disable_timer();
 		idle_sleep(nsecs);
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 	}
 }
 
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index 9999b9a84d46..095ff5a57928 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -55,7 +55,7 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched()) {
 			local_irq_disable();
 			stop_critical_timings();
@@ -63,7 +63,7 @@ void cpu_idle(void)
 			local_irq_enable();
 			start_critical_timings();
 		}
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 6d9d4d52cac5..f94da3920c36 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -99,7 +99,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched()) {
 
 			check_pgt_cache();
@@ -116,7 +116,7 @@ void cpu_idle(void)
 				pm_idle();
 			start_critical_timings();
 		}
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b069e9d7875f..18e8cf3581f6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -122,7 +122,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter();
+		tick_nohz_idle_enter_norcu();
 		while (!need_resched()) {
 
 			rmb();
@@ -149,7 +149,7 @@ void cpu_idle(void)
 			__exit_idle();
 		}
 
-		tick_nohz_idle_exit();
+		tick_nohz_idle_exit_norcu();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 0df1d50a408a..327434a05757 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -7,6 +7,7 @@
 #define _LINUX_TICK_H
 
 #include <linux/clockchips.h>
+#include <linux/irqflags.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
@@ -121,18 +122,57 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
-extern void tick_nohz_idle_enter(void);
+extern void __tick_nohz_idle_enter(void);
+static inline void tick_nohz_idle_enter(void)
+{
+	local_irq_disable();
+	__tick_nohz_idle_enter();
+	local_irq_enable();
+}
 extern void tick_nohz_idle_exit(void);
+
+/*
+ * Call this pair of function if the arch doesn't make any use
+ * of RCU in-between. You won't need to call rcu_idle_enter() and
+ * rcu_idle_exit().
+ * Otherwise you need to call tick_nohz_idle_enter() and tick_nohz_idle_exit()
+ * and explicitly tell RCU about the window around the place the CPU enters low
+ * power mode where no RCU use is made. This is done by calling rcu_idle_enter()
+ * after the last use of RCU before the CPU is put to sleep and by calling
+ * rcu_idle_exit() before the first use of RCU after the CPU woke up.
+ */
+static inline void tick_nohz_idle_enter_norcu(void)
+{
+	/*
+	 * Also call rcu_idle_enter() in the irq disabled section even
+	 * if it disables irq itself.
+	 * Just an optimization that prevents from an interrupt happening
+	 * between it and __tick_nohz_idle_enter() to lose time to help
+	 * completing a grace period while we could be in extended grace
+	 * period already.
+	 */
+	local_irq_disable();
+	__tick_nohz_idle_enter();
+	rcu_idle_enter();
+	local_irq_enable();
+}
+static inline void tick_nohz_idle_exit_norcu(void)
+{
+	rcu_idle_exit();
+	tick_nohz_idle_exit();
+}
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 # else
-static inline void tick_nohz_idle_enter(void)
+static inline void tick_nohz_idle_enter(void) { }
+static inline void tick_nohz_idle_exit(void) { }
+static inline void tick_nohz_idle_enter_norcu(void)
 {
 	rcu_idle_enter();
 }
-static inline void tick_nohz_idle_exit(void)
+static inline void tick_nohz_idle_exit_norcu(void)
 {
 	rcu_idle_exit();
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 266c242dc354..c76aefe764b0 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -453,18 +453,22 @@ out:
  *
  * When the next event is more than a tick into the future, stop the idle tick
  * Called when we start the idle loop.
- * This also enters into RCU extended quiescent state so that this CPU doesn't
- * need anymore to be part of any global grace period completion. This way
- * the tick can be stopped safely as we don't need to report quiescent states.
+ *
+ * If no use of RCU is made in the idle loop between
+ * tick_nohz_idle_enter() and tick_nohz_idle_exit() calls, then
+ * tick_nohz_idle_enter_norcu() should be called instead and the arch
+ * doesn't need to call rcu_idle_enter() and rcu_idle_exit() explicitly.
+ *
+ * Otherwise the arch is responsible of calling:
+ *
+ * - rcu_idle_enter() after its last use of RCU before the CPU is put
+ *  to sleep.
+ * - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
  */
-void tick_nohz_idle_enter(void)
+void __tick_nohz_idle_enter(void)
 {
 	struct tick_sched *ts;
 
-	WARN_ON_ONCE(irqs_disabled());
-
-	local_irq_disable();
-
 	ts = &__get_cpu_var(tick_cpu_sched);
 	/*
 	 * set ts->inidle unconditionally. even if the system did not
@@ -473,9 +477,6 @@ void tick_nohz_idle_enter(void)
 	 */
 	ts->inidle = 1;
 	tick_nohz_stop_sched_tick(ts);
-	rcu_idle_enter();
-
-	local_irq_enable();
 }
 
 /**
@@ -551,7 +552,7 @@ void tick_nohz_idle_exit(void)
 	ktime_t now;
 
 	local_irq_disable();
-	rcu_idle_exit();
+
 	if (ts->idle_active || (ts->inidle && ts->tick_stopped))
 		now = ktime_get();
 
-- 
cgit v1.2.3


From 0c53dd8b31404c1d7fd15be8f065ebaec615a562 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 9 Oct 2011 15:13:11 -0700
Subject: rcu: Introduce raw SRCU read-side primitives

The RCU implementations, including SRCU, are designed to be used in a
lock-like fashion, so that the read-side lock and unlock primitives must
execute in the same context for any given read-side critical section.
This constraint is enforced by lockdep-RCU.  However, there is a need
to enter an SRCU read-side critical section within the context of an
exception and then exit in the context of the task that encountered the
exception.  The cost of this capability is that the read-side operations
incur the overhead of disabling interrupts.

Note that although the current implementation allows a given read-side
critical section to be entered by one task and then exited by another, all
known possible implementations that allow this have scalability problems.
Therefore, a given read-side critical section must be exited by the same
task that entered it, though perhaps from an interrupt or exception
handler running within that task's context.  But if you are thinking
in terms of interrupt handlers, make sure that you have considered the
possibility of threaded interrupt handlers.

Credit goes to Peter Zijlstra for suggesting use of the existing _raw
suffix to indicate disabling lockdep over the earlier "bulkref" names.

Requested-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index d4b12443b2ef..1eb520cd1680 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -181,4 +181,47 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__srcu_read_unlock(sp, idx);
 }
 
+/**
+ * srcu_read_lock_raw - register a new reader for an SRCU-protected structure.
+ * @sp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section.  Similar to srcu_read_lock(),
+ * but avoids the RCU-lockdep checking.  This means that it is legal to
+ * use srcu_read_lock_raw() in one context, for example, in an exception
+ * handler, and then have the matching srcu_read_unlock_raw() in another
+ * context, for example in the task that took the exception.
+ *
+ * However, the entire SRCU read-side critical section must reside within a
+ * single task.  For example, beware of using srcu_read_lock_raw() in
+ * a device interrupt handler and srcu_read_unlock() in the interrupted
+ * task:  This will not work if interrupts are threaded.
+ */
+static inline int srcu_read_lock_raw(struct srcu_struct *sp)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret =  __srcu_read_lock(sp);
+	local_irq_restore(flags);
+	return ret;
+}
+
+/**
+ * srcu_read_unlock_raw - unregister reader from an SRCU-protected structure.
+ * @sp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock_raw().
+ *
+ * Exit an SRCU read-side critical section without lockdep-RCU checking.
+ * See srcu_read_lock_raw() for more details.
+ */
+static inline void srcu_read_unlock_raw(struct srcu_struct *sp, int idx)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__srcu_read_unlock(sp, idx);
+	local_irq_restore(flags);
+}
+
 #endif
-- 
cgit v1.2.3


From c4f3060843506ba6d473ab9a0afe5bd5dc93a00d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 10 Nov 2011 12:41:56 -0800
Subject: sched: Add is_idle_task() to handle invalidated uses of idle_cpu()

Commit 908a3283 (Fix idle_cpu()) invalidated some uses of idle_cpu(),
which used to say whether or not the CPU was running the idle task,
but now instead says whether or not the CPU is running the idle task
in the absence of pending wakeups.  Although this new implementation
gives a better answer to the question "is this CPU idle?", it also
invalidates other uses that were made of idle_cpu().

This commit therefore introduces a new is_idle_task() API member
that determines whether or not the specified task is one of the
idle tasks, allowing open-coded "->pid == 0" sequences to be replaced
by something more meaningful.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c4f3e9b9bc5..4a7e4d333a27 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2070,6 +2070,14 @@ extern int sched_setscheduler(struct task_struct *, int,
 extern int sched_setscheduler_nocheck(struct task_struct *, int,
 				      const struct sched_param *);
 extern struct task_struct *idle_task(int cpu);
+/**
+ * is_idle_task - is the specified task an idle task?
+ * @tsk: the task in question.
+ */
+static inline bool is_idle_task(struct task_struct *p)
+{
+	return p->pid == 0;
+}
 extern struct task_struct *curr_task(int cpu);
 extern void set_curr_task(int cpu, struct task_struct *p);
 
-- 
cgit v1.2.3


From 1268fbc746ea1cd279886a740dcbad4ba5232225 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 17 Nov 2011 18:48:14 +0100
Subject: nohz: Remove tick_nohz_idle_enter_norcu() /
 tick_nohz_idle_exit_norcu()

Those two APIs were provided to optimize the calls of
tick_nohz_idle_enter() and rcu_idle_enter() into a single
irq disabled section. This way no interrupt happening in-between would
needlessly process any RCU job.

Now we are talking about an optimization for which benefits
have yet to be measured. Let's start simple and completely decouple
idle rcu and dyntick idle logics to simplify.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/arm/kernel/process.c              |  6 +++--
 arch/avr32/kernel/process.c            |  6 +++--
 arch/blackfin/kernel/process.c         |  6 +++--
 arch/microblaze/kernel/process.c       |  6 +++--
 arch/mips/kernel/process.c             |  6 +++--
 arch/openrisc/kernel/idle.c            |  6 +++--
 arch/powerpc/kernel/idle.c             | 15 +++++------
 arch/powerpc/platforms/iseries/setup.c | 12 ++++++---
 arch/s390/kernel/process.c             |  6 +++--
 arch/sh/kernel/idle.c                  |  6 +++--
 arch/sparc/kernel/process_64.c         |  6 +++--
 arch/tile/kernel/process.c             |  6 +++--
 arch/um/kernel/process.c               |  6 +++--
 arch/unicore32/kernel/process.c        |  6 +++--
 arch/x86/kernel/process_32.c           |  6 +++--
 include/linux/tick.h                   | 47 +---------------------------------
 kernel/time/tick-sched.c               | 15 ++++++-----
 17 files changed, 76 insertions(+), 91 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 47e34c091276..e8e8fe505df1 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -183,7 +183,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		leds_event(led_idle_start);
 		while (!need_resched()) {
 #ifdef CONFIG_HOTPLUG_CPU
@@ -213,7 +214,8 @@ void cpu_idle(void)
 			}
 		}
 		leds_event(led_idle_end);
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 34c8c703bb16..ea3395750324 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -34,10 +34,12 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched())
 			cpu_idle_sleep();
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 57e07498a0e7..8dd0416673cb 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -88,10 +88,12 @@ void cpu_idle(void)
 #endif
 		if (!idle)
 			idle = default_idle;
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched())
 			idle();
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 13d59f34b94e..7dcb5bfffb75 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -103,10 +103,12 @@ void cpu_idle(void)
 		if (!idle)
 			idle = default_idle;
 
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched())
 			idle();
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 
 		preempt_enable_no_resched();
 		schedule();
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 17fb3a270160..7955409051c4 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -56,7 +56,8 @@ void __noreturn cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched() && cpu_online(cpu)) {
 #ifdef CONFIG_MIPS_MT_SMTC
 			extern void smtc_idle_loop_hook(void);
@@ -77,7 +78,8 @@ void __noreturn cpu_idle(void)
 		     system_state == SYSTEM_BOOTING))
 			play_dead();
 #endif
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c
index 2e82cd0fa5e1..e5fc78877830 100644
--- a/arch/openrisc/kernel/idle.c
+++ b/arch/openrisc/kernel/idle.c
@@ -51,7 +51,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 
 		while (!need_resched()) {
 			check_pgt_cache();
@@ -69,7 +70,8 @@ void cpu_idle(void)
 			set_thread_flag(TIF_POLLING_NRFLAG);
 		}
 
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 3cd73d1fc427..9c3cd490b1bd 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -62,10 +62,10 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		if (idle_uses_rcu)
-			tick_nohz_idle_enter();
-		else
-			tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		if (!idle_uses_rcu)
+			rcu_idle_enter();
+
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
@@ -102,10 +102,9 @@ void cpu_idle(void)
 
 		HMT_medium();
 		ppc64_runlatch_on();
-		if (idle_uses_rcu)
-			tick_nohz_idle_exit();
-		else
-			tick_nohz_idle_exit_norcu();
+		if (!idle_uses_rcu)
+			rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		if (cpu_should_die())
 			cpu_die();
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index d69d3d185e89..8fc62586a973 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -563,7 +563,8 @@ static void yield_shared_processor(void)
 static void iseries_shared_idle(void)
 {
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched() && !hvlpevent_is_pending()) {
 			local_irq_disable();
 			ppc64_runlatch_off();
@@ -577,7 +578,8 @@ static void iseries_shared_idle(void)
 		}
 
 		ppc64_runlatch_on();
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 
 		if (hvlpevent_is_pending())
 			process_iSeries_events();
@@ -593,7 +595,8 @@ static void iseries_dedicated_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		if (!need_resched()) {
 			while (!need_resched()) {
 				ppc64_runlatch_off();
@@ -610,7 +613,8 @@ static void iseries_dedicated_idle(void)
 		}
 
 		ppc64_runlatch_on();
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 6fa987367ae6..3201ae447990 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -91,10 +91,12 @@ static void default_idle(void)
 void cpu_idle(void)
 {
 	for (;;) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched())
 			default_idle();
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index ad58e7535a7c..406508d4ce74 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -89,7 +89,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 
 		while (!need_resched()) {
 			check_pgt_cache();
@@ -111,7 +112,8 @@ void cpu_idle(void)
 			start_critical_timings();
 		}
 
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 4a0e7d79cb92..39d8b05201a2 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -95,12 +95,14 @@ void cpu_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while(1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 
 		while (!need_resched() && !cpu_is_offline(cpu))
 			sparc64_yield(cpu);
 
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 
 		preempt_enable_no_resched();
 
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 53ac89595ab1..4c1ac6e5347a 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -85,7 +85,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched()) {
 			if (cpu_is_offline(cpu))
 				BUG();  /* no HOTPLUG_CPU */
@@ -105,7 +106,8 @@ void cpu_idle(void)
 				local_irq_enable();
 			current_thread_info()->status |= TS_POLLING;
 		}
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 55d2cf455f63..69f24905abdc 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -246,10 +246,12 @@ void default_idle(void)
 		if (need_resched())
 			schedule();
 
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		nsecs = disable_timer();
 		idle_sleep(nsecs);
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 	}
 }
 
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index 095ff5a57928..52edc2b62873 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -55,7 +55,8 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched()) {
 			local_irq_disable();
 			stop_critical_timings();
@@ -63,7 +64,8 @@ void cpu_idle(void)
 			local_irq_enable();
 			start_critical_timings();
 		}
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f94da3920c36..485204f58cda 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -99,7 +99,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_idle_enter_norcu();
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched()) {
 
 			check_pgt_cache();
@@ -116,7 +117,8 @@ void cpu_idle(void)
 				pm_idle();
 			start_critical_timings();
 		}
-		tick_nohz_idle_exit_norcu();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 327434a05757..ab8be90b5cc9 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -122,45 +122,8 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
-extern void __tick_nohz_idle_enter(void);
-static inline void tick_nohz_idle_enter(void)
-{
-	local_irq_disable();
-	__tick_nohz_idle_enter();
-	local_irq_enable();
-}
+extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
-
-/*
- * Call this pair of function if the arch doesn't make any use
- * of RCU in-between. You won't need to call rcu_idle_enter() and
- * rcu_idle_exit().
- * Otherwise you need to call tick_nohz_idle_enter() and tick_nohz_idle_exit()
- * and explicitly tell RCU about the window around the place the CPU enters low
- * power mode where no RCU use is made. This is done by calling rcu_idle_enter()
- * after the last use of RCU before the CPU is put to sleep and by calling
- * rcu_idle_exit() before the first use of RCU after the CPU woke up.
- */
-static inline void tick_nohz_idle_enter_norcu(void)
-{
-	/*
-	 * Also call rcu_idle_enter() in the irq disabled section even
-	 * if it disables irq itself.
-	 * Just an optimization that prevents from an interrupt happening
-	 * between it and __tick_nohz_idle_enter() to lose time to help
-	 * completing a grace period while we could be in extended grace
-	 * period already.
-	 */
-	local_irq_disable();
-	__tick_nohz_idle_enter();
-	rcu_idle_enter();
-	local_irq_enable();
-}
-static inline void tick_nohz_idle_exit_norcu(void)
-{
-	rcu_idle_exit();
-	tick_nohz_idle_exit();
-}
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
@@ -168,14 +131,6 @@ extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 # else
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
-static inline void tick_nohz_idle_enter_norcu(void)
-{
-	rcu_idle_enter();
-}
-static inline void tick_nohz_idle_exit_norcu(void)
-{
-	rcu_idle_exit();
-}
 
 static inline ktime_t tick_nohz_get_sleep_length(void)
 {
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c76aefe764b0..0ec8b832ab6b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -454,21 +454,20 @@ out:
  * When the next event is more than a tick into the future, stop the idle tick
  * Called when we start the idle loop.
  *
- * If no use of RCU is made in the idle loop between
- * tick_nohz_idle_enter() and tick_nohz_idle_exit() calls, then
- * tick_nohz_idle_enter_norcu() should be called instead and the arch
- * doesn't need to call rcu_idle_enter() and rcu_idle_exit() explicitly.
- *
- * Otherwise the arch is responsible of calling:
+ * The arch is responsible of calling:
  *
  * - rcu_idle_enter() after its last use of RCU before the CPU is put
  *  to sleep.
  * - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
  */
-void __tick_nohz_idle_enter(void)
+void tick_nohz_idle_enter(void)
 {
 	struct tick_sched *ts;
 
+	WARN_ON_ONCE(irqs_disabled());
+
+	local_irq_disable();
+
 	ts = &__get_cpu_var(tick_cpu_sched);
 	/*
 	 * set ts->inidle unconditionally. even if the system did not
@@ -477,6 +476,8 @@ void __tick_nohz_idle_enter(void)
 	 */
 	ts->inidle = 1;
 	tick_nohz_stop_sched_tick(ts);
+
+	local_irq_enable();
 }
 
 /**
-- 
cgit v1.2.3


From 3842a0832a1d6eb0b31421f8810a813135967512 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 28 Nov 2011 10:42:42 -0800
Subject: rcu: Document same-context read-side constraints

The intent is that a given RCU read-side critical section be confined
to a single context.  For example, it is illegal to invoke rcu_read_lock()
in an exception handler and then invoke rcu_read_unlock() from the
context of the task that received the exception.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 15 +++++++++++++++
 include/linux/srcu.h     |  5 +++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5dd6fd8b3203..81c04f4348ec 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -265,6 +265,11 @@ extern int debug_lockdep_rcu_enabled(void);
  *
  * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
+ *
+ * Note that rcu_read_lock() and the matching rcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock() in process context if the matching rcu_read_lock()
+ * was invoked from within an irq handler.
  */
 static inline int rcu_read_lock_held(void)
 {
@@ -689,6 +694,11 @@ static inline void rcu_read_unlock(void)
  * critical sections in interrupt context can use just rcu_read_lock(),
  * though this should at least be commented to avoid confusing people
  * reading the code.
+ *
+ * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
+ * must occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh()
+ * was invoked from some other task.
  */
 static inline void rcu_read_lock_bh(void)
 {
@@ -716,6 +726,11 @@ static inline void rcu_read_unlock_bh(void)
  * are being done using call_rcu_sched() or synchronize_rcu_sched().
  * Read-side critical sections can also be introduced by anything that
  * disables preemption, including local_irq_disable() and friends.
+ *
+ * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
+ * must occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock_sched() from process context if the matching
+ * rcu_read_lock_sched() was invoked from an NMI handler.
  */
 static inline void rcu_read_lock_sched(void)
 {
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 1eb520cd1680..e1b005918bbb 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -158,6 +158,11 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
  * one way to indirectly wait on an SRCU grace period is to acquire
  * a mutex that is held elsewhere while calling synchronize_srcu() or
  * synchronize_srcu_expedited().
+ *
+ * Note that srcu_read_lock() and the matching srcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * srcu_read_unlock() in an irq handler if the matching srcu_read_lock()
+ * was invoked in process context.
  */
 static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
-- 
cgit v1.2.3


From 2987557f52b97f679f0c324d8f51b8d66e1f2084 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sat, 3 Dec 2011 13:06:50 -0800
Subject: driver-core/cpu: Expose hotpluggability to the rest of the kernel

When architectures register CPUs, they indicate whether the CPU allows
hotplugging; notably, x86 and ARM don't allow hotplugging CPU 0.
Userspace can easily query the hotpluggability of a CPU via sysfs;
however, the kernel has no convenient way of accessing that property in
an architecture-independent way.  While the kernel can simply try it and
see, some code needs to distinguish between "hotplug failed" and
"hotplug has no hope of working on this CPU"; for example, rcutorture's
CPU hotplug tests want to avoid drowning out real hotplug failures with
expected failures.

Expose this property via a new cpu_is_hotpluggable function, so that the
rest of the kernel can access it in an architecture-independent way.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 drivers/base/cpu.c  | 7 +++++++
 include/linux/cpu.h | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 251acea3d359..3991502b21e5 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -247,6 +247,13 @@ struct sys_device *get_cpu_sysdev(unsigned cpu)
 }
 EXPORT_SYMBOL_GPL(get_cpu_sysdev);
 
+bool cpu_is_hotpluggable(unsigned cpu)
+{
+	struct sys_device *dev = get_cpu_sysdev(cpu);
+	return dev && container_of(dev, struct cpu, sysdev)->hotpluggable;
+}
+EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
+
 int __init cpu_dev_init(void)
 {
 	int err;
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 6cb60fd2ea84..305c263021e7 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -27,6 +27,7 @@ struct cpu {
 
 extern int register_cpu(struct cpu *cpu, int num);
 extern struct sys_device *get_cpu_sysdev(unsigned cpu);
+extern bool cpu_is_hotpluggable(unsigned cpu);
 
 extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr);
 extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr);
-- 
cgit v1.2.3


From dfd56b8b38fff3586f36232db58e1e9f7885a605 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 10 Dec 2011 09:48:31 +0000
Subject: net: use IS_ENABLED(CONFIG_IPV6)

Instead of testing defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/errqueue.h               |  4 ++--
 include/linux/ipv6.h                   |  4 ++--
 include/linux/lockd/lockd.h            |  6 +++---
 include/linux/sunrpc/clnt.h            |  6 +++---
 include/net/inet6_hashtables.h         |  4 ++--
 include/net/inet_sock.h                |  6 +++---
 include/net/ip.h                       |  6 +++---
 include/net/net_namespace.h            |  2 +-
 include/net/netfilter/nf_tproxy_core.h |  2 +-
 include/net/netns/mib.h                |  2 +-
 include/net/netns/xfrm.h               |  2 +-
 include/net/protocol.h                 |  8 ++++----
 include/net/sctp/sctp.h                |  4 ++--
 include/net/sctp/structs.h             |  2 +-
 include/net/tcp.h                      |  6 +++---
 include/net/udp.h                      |  4 ++--
 net/bridge/br_multicast.c              | 32 ++++++++++++++++----------------
 net/bridge/br_private.h                |  2 +-
 net/core/secure_seq.c                  |  4 ++--
 net/dccp/dccp.h                        |  2 +-
 net/dccp/minisocks.c                   |  2 +-
 net/ipv4/inet_connection_sock.c        |  2 +-
 net/ipv4/inet_diag.c                   | 16 ++++++++--------
 net/ipv4/ip_gre.c                      |  8 ++++----
 net/ipv4/ip_sockglue.c                 |  6 +++---
 net/ipv4/tcp_input.c                   |  2 +-
 net/ipv4/tcp_minisocks.c               |  2 +-
 net/ipv4/tcp_timer.c                   |  2 +-
 net/ipv4/tunnel4.c                     | 10 +++++-----
 net/ipv4/xfrm4_tunnel.c                |  6 +++---
 net/key/af_key.c                       | 18 +++++++++---------
 net/netfilter/xt_TEE.c                 |  9 +++------
 net/netlabel/netlabel_addrlist.c       |  8 ++++----
 net/netlabel/netlabel_addrlist.h       |  2 +-
 net/netlabel/netlabel_domainhash.c     | 20 ++++++++++----------
 net/netlabel/netlabel_domainhash.h     |  2 +-
 net/netlabel/netlabel_kapi.c           | 18 +++++++++---------
 net/netlabel/netlabel_mgmt.c           |  6 +++---
 net/netlabel/netlabel_unlabeled.c      | 26 +++++++++++++-------------
 net/sctp/input.c                       |  2 +-
 net/sctp/protocol.c                    |  2 +-
 net/sctp/socket.c                      |  4 ++--
 net/sunrpc/addr.c                      |  8 ++++----
 net/sunrpc/svc.c                       |  8 ++++----
 net/sunrpc/svc_xprt.c                  |  8 ++++----
 net/sunrpc/svcauth_unix.c              |  2 +-
 net/xfrm/xfrm_policy.c                 |  6 +++---
 net/xfrm/xfrm_user.c                   | 12 ++++++------
 48 files changed, 161 insertions(+), 164 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index c9f522bd17e4..fd0628be45ce 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -25,7 +25,7 @@ struct sock_extended_err {
 #ifdef __KERNEL__
 
 #include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/ipv6.h>
 #endif
 
@@ -34,7 +34,7 @@ struct sock_extended_err {
 struct sock_exterr_skb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0c997767429a..6318268dcaf5 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -404,7 +404,7 @@ struct tcp6_sock {
 
 extern int inet6_sk_rebuild_header(struct sock *sk);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 {
 	return inet_sk(__sk)->pinet6;
@@ -515,7 +515,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #define inet6_rcv_saddr(__sk)	NULL
 #define tcp_twsk_ipv6only(__sk)		0
 #define inet_v6_ipv6only(__sk)		0
-#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif /* IS_ENABLED(CONFIG_IPV6) */
 
 #define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)	&& \
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ff9abff55aa0..90b0656a869e 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -301,7 +301,7 @@ static inline int __nlm_privileged_request4(const struct sockaddr *sap)
 	return ipv4_is_loopback(sin->sin_addr.s_addr);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline int __nlm_privileged_request6(const struct sockaddr *sap)
 {
 	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
@@ -314,12 +314,12 @@ static inline int __nlm_privileged_request6(const struct sockaddr *sap)
 
 	return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK;
 }
-#else	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#else	/* IS_ENABLED(CONFIG_IPV6) */
 static inline int __nlm_privileged_request6(const struct sockaddr *sap)
 {
 	return 0;
 }
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif	/* IS_ENABLED(CONFIG_IPV6) */
 
 /*
  * Ensure incoming requests are from local privileged callers.
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index f15fd985b08a..2c5993a17c33 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -215,7 +215,7 @@ static inline bool __rpc_copy_addr4(struct sockaddr *dst,
 	return true;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
 {
@@ -240,7 +240,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst,
 	dsin6->sin6_addr = ssin6->sin6_addr;
 	return true;
 }
-#else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+#else	/* !(IS_ENABLED(CONFIG_IPV6) */
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
 {
@@ -252,7 +252,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst,
 {
 	return false;
 }
-#endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+#endif	/* !(IS_ENABLED(CONFIG_IPV6) */
 
 /**
  * rpc_cmp_addr - compare the address portion of two sockaddrs.
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index e46674d5daea..00cbb4384c79 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -15,7 +15,7 @@
 #define _INET6_HASHTABLES_H
 
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/in6.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
@@ -110,5 +110,5 @@ extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo
 				 const struct in6_addr *saddr, const __be16 sport,
 				 const struct in6_addr *daddr, const __be16 dport,
 				 const int dif);
-#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
+#endif /* IS_ENABLED(CONFIG_IPV6) */
 #endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f941964a9931..e3e405106afe 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -71,7 +71,7 @@ struct ip_options_data {
 
 struct inet_request_sock {
 	struct request_sock	req;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	u16			inet6_rsk_offset;
 #endif
 	__be16			loc_port;
@@ -139,7 +139,7 @@ struct rtable;
 struct inet_sock {
 	/* sk and pinet6 has to be the first two members of inet_sock */
 	struct sock		sk;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6_pinfo	*pinet6;
 #endif
 	/* Socket demultiplex comparisons on incoming packets. */
@@ -188,7 +188,7 @@ static inline void __inet_sk_copy_descendant(struct sock *sk_to,
 	memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
 	       sk_from->sk_prot->obj_size - ancestor_size);
 }
-#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE))
+#if !(IS_ENABLED(CONFIG_IPV6))
 static inline void inet_sk_copy_descendant(struct sock *sk_to,
 					   const struct sock *sk_from)
 {
diff --git a/include/net/ip.h b/include/net/ip.h
index fd1561e88a1a..775009f9eaba 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -353,14 +353,14 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast,
 		memcpy(buf, &naddr, sizeof(naddr));
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/ipv6.h>
 #endif
 
 static __inline__ void inet_reset_saddr(struct sock *sk)
 {
 	inet_sk(sk)->inet_rcv_saddr = inet_sk(sk)->inet_saddr = 0;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == PF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -379,7 +379,7 @@ static inline int sk_mc_loop(struct sock *sk)
 	switch (sk->sk_family) {
 	case AF_INET:
 		return inet_sk(sk)->mc_loop;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		return inet6_sk(sk)->mc_loop;
 #endif
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3bb6fa0eace0..ee547c149810 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -77,7 +77,7 @@ struct net {
 	struct netns_packet	packet;
 	struct netns_unix	unx;
 	struct netns_ipv4	ipv4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netns_ipv6	ipv6;
 #endif
 #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
index e505358d8999..75ca9291cf2c 100644
--- a/include/net/netfilter/nf_tproxy_core.h
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -131,7 +131,7 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
 	return sk;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline struct sock *
 nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
 		      const struct in6_addr *saddr, const struct in6_addr *daddr,
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 30f6728ee98c..d542a4b28cca 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -12,7 +12,7 @@ struct netns_mib {
 	DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
 	DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct proc_dir_entry *proc_net_devsnmp6;
 	DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
 	DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 748f91f87cd5..5299e69a32af 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -56,7 +56,7 @@ struct netns_xfrm {
 #endif
 
 	struct dst_ops		xfrm4_dst_ops;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct dst_ops		xfrm6_dst_ops;
 #endif
 };
diff --git a/include/net/protocol.h b/include/net/protocol.h
index e182e13d6391..875f4895b033 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -25,7 +25,7 @@
 #define _PROTOCOL_H
 
 #include <linux/in6.h>
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/ipv6.h>
 #endif
 
@@ -46,7 +46,7 @@ struct net_protocol {
 				netns_ok:1;
 };
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 struct inet6_protocol {
 	int	(*handler)(struct sk_buff *skb);
 
@@ -91,7 +91,7 @@ struct inet_protosw {
 
 extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
 #endif
 
@@ -100,7 +100,7 @@ extern int	inet_del_protocol(const struct net_protocol *prot, unsigned char num)
 extern void	inet_register_protosw(struct inet_protosw *p);
 extern void	inet_unregister_protosw(struct inet_protosw *p);
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 extern int	inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num);
 extern int	inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num);
 extern int	inet6_register_protosw(struct inet_protosw *p);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 6a72a58cde59..d3685615a8b0 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -71,7 +71,7 @@
 #include <linux/jiffies.h>
 #include <linux/idr.h>
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #endif
@@ -383,7 +383,7 @@ static inline void sctp_sysctl_unregister(void) { return; }
 /* Size of Supported Address Parameter for 'x' address types. */
 #define SCTP_SAT_LEN(x) (sizeof(struct sctp_paramhdr) + (x) * sizeof(__u16))
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 void sctp_v6_pf_init(void);
 void sctp_v6_pf_exit(void);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 3382615bd710..ad0e31bf7450 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -365,7 +365,7 @@ static inline struct sock *sctp_opt2sk(const struct sctp_sock *sp)
        return (struct sock *)sp;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 struct sctp6_sock {
        struct sctp_sock  sctp;
        struct ipv6_pinfo inet6;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 87e3c80bfa00..02f070d339ba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -628,7 +628,7 @@ extern u32 __tcp_select_window(struct sock *sk);
 struct tcp_skb_cb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;	/* For incoming frames		*/
@@ -1152,7 +1152,7 @@ struct tcp6_md5sig_key {
 /* - sock block */
 struct tcp_md5sig_info {
 	struct tcp4_md5sig_key	*keys4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct tcp6_md5sig_key	*keys6;
 	u32			entries6;
 	u32			alloced6;
@@ -1179,7 +1179,7 @@ struct tcp6_pseudohdr {
 
 union tcp_md5sum_block {
 	struct tcp4_pseudohdr ip4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct tcp6_pseudohdr ip6;
 #endif
 };
diff --git a/include/net/udp.h b/include/net/udp.h
index 1ffb39c9f324..e39592f682c3 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -41,7 +41,7 @@
 struct udp_skb_cb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;
@@ -223,7 +223,7 @@ extern struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *sa
 	else	    SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field);      \
 } while(0)
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #define UDPX_INC_STATS_BH(sk, field) \
 	do { \
 		if ((sk)->sk_family == AF_INET) \
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 375417e633c9..568d5bf17534 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
 #include <net/mld.h>
 #include <net/addrconf.h>
@@ -36,7 +36,7 @@
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
 {
 	if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
@@ -52,7 +52,7 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 	switch (a->proto) {
 	case htons(ETH_P_IP):
 		return a->u.ip4 == b->u.ip4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		return ipv6_addr_equal(&a->u.ip6, &b->u.ip6);
 #endif
@@ -65,7 +65,7 @@ static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
 	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
 				const struct in6_addr *ip)
 {
@@ -79,7 +79,7 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
 	switch (ip->proto) {
 	case htons(ETH_P_IP):
 		return __br_ip4_hash(mdb, ip->u.ip4);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		return __br_ip6_hash(mdb, &ip->u.ip6);
 #endif
@@ -121,7 +121,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	return br_mdb_ip_get(mdb, &br_dst);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static struct net_bridge_mdb_entry *br_mdb_ip6_get(
 	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
 {
@@ -152,7 +152,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 	case htons(ETH_P_IP):
 		ip.u.ip4 = ip_hdr(skb)->daddr;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		ip.u.ip6 = ipv6_hdr(skb)->daddr;
 		break;
@@ -411,7 +411,7 @@ out:
 	return skb;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 						    const struct in6_addr *group)
 {
@@ -496,7 +496,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
 	switch (addr->proto) {
 	case htons(ETH_P_IP):
 		return br_ip4_multicast_alloc_query(br, addr->u.ip4);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		return br_ip6_multicast_alloc_query(br, &addr->u.ip6);
 #endif
@@ -773,7 +773,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 	return br_multicast_add_group(br, port, &br_group);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_add_group(struct net_bridge *br,
 				      struct net_bridge_port *port,
 				      const struct in6_addr *group)
@@ -845,7 +845,7 @@ static void br_multicast_send_query(struct net_bridge *br,
 	br_group.proto = htons(ETH_P_IP);
 	__br_multicast_send_query(br, port, &br_group);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	br_group.proto = htons(ETH_P_IPV6);
 	__br_multicast_send_query(br, port, &br_group);
 #endif
@@ -989,7 +989,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 	return err;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 					struct net_bridge_port *port,
 					struct sk_buff *skb)
@@ -1185,7 +1185,7 @@ out:
 	return err;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_query(struct net_bridge *br,
 				  struct net_bridge_port *port,
 				  struct sk_buff *skb)
@@ -1334,7 +1334,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 	br_multicast_leave_group(br, port, &br_group);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
 					 const struct in6_addr *group)
@@ -1449,7 +1449,7 @@ err_out:
 	return err;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int br_multicast_ipv6_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb)
@@ -1596,7 +1596,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		return br_multicast_ipv4_rcv(br, port, skb);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		return br_multicast_ipv6_rcv(br, port, skb);
 #endif
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 89969080c384..57dcd1489f3f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -56,7 +56,7 @@ struct br_ip
 {
 	union {
 		__be32	ip4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct in6_addr ip6;
 #endif
 	} u;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 925991ae6f52..9fbca46f3e74 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -36,7 +36,7 @@ static u32 seq_scale(u32 seq)
 }
 #endif
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 				   __be16 sport, __be16 dport)
 {
@@ -156,7 +156,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 }
 EXPORT_SYMBOL(secure_dccp_sequence_number);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 				  __be16 sport, __be16 dport)
 {
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 583490aaf56f..5818032e35a9 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -357,7 +357,7 @@ static inline int dccp_bad_service_code(const struct sock *sk,
 struct dccp_skb_cb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b50d5fd3d696..5a7f90bbffac 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -53,7 +53,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 	if (tw != NULL) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			const struct ipv6_pinfo *np = inet6_sk(sk);
 			struct inet6_timewait_sock *tw6;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a598768c616c..2e4e24476c4c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -418,7 +418,7 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
 	return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #define AF_INET_FAMILY(fam) ((fam) == AF_INET)
 #else
 #define AF_INET_FAMILY(fam) 1
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9b3e0b179cd2..575e28c57cc9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -116,7 +116,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 	if (ext & (1 << (INET_DIAG_TOS - 1)))
 		RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (r->idiag_family == AF_INET6) {
 		const struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -234,7 +234,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	r->idiag_wqueue	      = 0;
 	r->idiag_uid	      = 0;
 	r->idiag_inode	      = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (tw->tw_family == AF_INET6) {
 		const struct inet6_timewait_sock *tw6 =
 						inet6_twsk((struct sock *)tw);
@@ -286,7 +286,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
 				 req->id.idiag_dport, req->id.idiag_src[0],
 				 req->id.idiag_sport, req->id.idiag_if);
 	}
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	else if (req->sdiag_family == AF_INET6) {
 		sk = inet6_lookup(&init_net, hashinfo,
 				  (struct in6_addr *)req->id.idiag_dst,
@@ -473,7 +473,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 		return 1;
 
 	entry.family = sk->sk_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (entry.family == AF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -571,7 +571,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 		struct inet_diag_entry entry;
 
 		entry.family = tw->tw_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == AF_INET6) {
 			struct inet6_timewait_sock *tw6 =
 						inet6_twsk((struct sock *)tw);
@@ -633,7 +633,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	r->idiag_wqueue = 0;
 	r->idiag_uid = sock_i_uid(sk);
 	r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (r->idiag_family == AF_INET6) {
 		*(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr;
 		*(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
@@ -695,13 +695,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 			if (bc) {
 				entry.saddr =
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 					(entry.family == AF_INET6) ?
 					inet6_rsk(req)->loc_addr.s6_addr32 :
 #endif
 					&ireq->loc_addr;
 				entry.daddr =
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 					(entry.family == AF_INET6) ?
 					inet6_rsk(req)->rmt_addr.s6_addr32 :
 #endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fe070c1593ab..2b53a1f7abf6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -46,7 +46,7 @@
 #include <net/rtnetlink.h>
 #include <net/gre.h>
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
@@ -729,7 +729,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			if ((dst = rt->rt_gateway) == 0)
 				goto tx_error_icmp;
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		else if (skb->protocol == htons(ETH_P_IPV6)) {
 			struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb));
 			const struct in6_addr *addr6;
@@ -799,7 +799,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
 		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 
@@ -875,7 +875,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	if ((iph->ttl = tiph->ttl) == 0) {
 		if (skb->protocol == htons(ETH_P_IP))
 			iph->ttl = old_iph->ttl;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		else if (skb->protocol == htons(ETH_P_IPV6))
 			iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
 #endif
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 80d5fa450210..8aa87c19fa00 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -37,7 +37,7 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 #include <net/compat.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <net/transp_v6.h>
 #endif
 
@@ -508,7 +508,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 						sock_owned_by_user(sk));
 		if (inet->is_icsk) {
 			struct inet_connection_sock *icsk = inet_csk(sk);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == PF_INET ||
 			    (!((1 << sk->sk_state) &
 			       (TCPF_LISTEN | TCPF_CLOSE)) &&
@@ -519,7 +519,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 				if (opt)
 					icsk->icsk_ext_hdr_len += opt->opt.optlen;
 				icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 			}
 #endif
 		}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0cbb44076cfa..b9cbc351c511 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2663,7 +2663,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 		       tp->snd_ssthresh, tp->prior_ssthresh,
 		       tp->packets_out);
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	else if (sk->sk_family == AF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
 		printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 9dc146e5ed65..550e755747e0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -336,7 +336,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 			struct inet6_timewait_sock *tw6;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 2e0f0af76c19..aa39a692f4c8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -340,7 +340,7 @@ void tcp_retransmit_timer(struct sock *sk)
 			       &inet->inet_daddr, ntohs(inet->inet_dport),
 			       inet->inet_num, tp->snd_una, tp->snd_nxt);
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		else if (sk->sk_family == AF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ac3b3ee4b07c..01775983b997 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -105,7 +105,7 @@ drop:
 	return 0;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int tunnel64_rcv(struct sk_buff *skb)
 {
 	struct xfrm_tunnel *handler;
@@ -134,7 +134,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
 			break;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static void tunnel64_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm_tunnel *handler;
@@ -152,7 +152,7 @@ static const struct net_protocol tunnel4_protocol = {
 	.netns_ok	=	1,
 };
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static const struct net_protocol tunnel64_protocol = {
 	.handler	=	tunnel64_rcv,
 	.err_handler	=	tunnel64_err,
@@ -167,7 +167,7 @@ static int __init tunnel4_init(void)
 		printk(KERN_ERR "tunnel4 init: can't add protocol\n");
 		return -EAGAIN;
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
 		printk(KERN_ERR "tunnel64 init: can't add protocol\n");
 		inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
@@ -179,7 +179,7 @@ static int __init tunnel4_init(void)
 
 static void __exit tunnel4_fini(void)
 {
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
 		printk(KERN_ERR "tunnel64 close: can't remove protocol\n");
 #endif
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 82806455e859..9247d9d70e9d 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -64,7 +64,7 @@ static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
 	.priority	=	2,
 };
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
 	.handler	=	xfrm_tunnel_rcv,
 	.err_handler	=	xfrm_tunnel_err,
@@ -84,7 +84,7 @@ static int __init ipip_init(void)
 		xfrm_unregister_type(&ipip_type, AF_INET);
 		return -EAGAIN;
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) {
 		printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n");
 		xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET);
@@ -97,7 +97,7 @@ static int __init ipip_init(void)
 
 static void __exit ipip_fini(void)
 {
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6))
 		printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n");
 #endif
diff --git a/net/key/af_key.c b/net/key/af_key.c
index bfc0bef170cb..11dbb2255ccb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -375,7 +375,7 @@ static int verify_address_len(const void *p)
 	const struct sadb_address *sp = p;
 	const struct sockaddr *addr = (const struct sockaddr *)(sp + 1);
 	const struct sockaddr_in *sin;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	const struct sockaddr_in6 *sin6;
 #endif
 	int len;
@@ -387,7 +387,7 @@ static int verify_address_len(const void *p)
 		    sp->sadb_address_prefixlen > 32)
 			return -EINVAL;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t));
 		if (sp->sadb_address_len != len ||
@@ -469,7 +469,7 @@ static int present_and_same_family(const struct sadb_address *src,
 	if (s_addr->sa_family != d_addr->sa_family)
 		return 0;
 	if (s_addr->sa_family != AF_INET
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	    && s_addr->sa_family != AF_INET6
 #endif
 		)
@@ -579,7 +579,7 @@ static inline int pfkey_sockaddr_len(sa_family_t family)
 	switch (family) {
 	case AF_INET:
 		return sizeof(struct sockaddr_in);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		return sizeof(struct sockaddr_in6);
 #endif
@@ -595,7 +595,7 @@ int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr)
 		xaddr->a4 =
 			((struct sockaddr_in *)sa)->sin_addr.s_addr;
 		return AF_INET;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		memcpy(xaddr->a6,
 		       &((struct sockaddr_in6 *)sa)->sin6_addr,
@@ -639,7 +639,7 @@ static struct  xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct
 	case AF_INET:
 		xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr;
 		break;
@@ -705,7 +705,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port
 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 		return 32;
 	    }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 	    {
 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
@@ -1311,7 +1311,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr;
 		xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr;
 		xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr;
@@ -3146,7 +3146,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
 			return NULL;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		if (opt != IPV6_IPSEC_POLICY) {
 			*dir = -EOPNOTSUPP;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 5f054a0dbbb1..68349c31083c 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -29,9 +29,6 @@
 #	define WITH_CONNTRACK 1
 #	include <net/netfilter/nf_conntrack.h>
 #endif
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-#	define WITH_IPV6 1
-#endif
 
 struct xt_tee_priv {
 	struct notifier_block	notifier;
@@ -136,7 +133,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-#ifdef WITH_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
 static bool
 tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 {
@@ -196,7 +193,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	}
 	return XT_CONTINUE;
 }
-#endif /* WITH_IPV6 */
+#endif
 
 static int tee_netdev_event(struct notifier_block *this, unsigned long event,
 			    void *ptr)
@@ -276,7 +273,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
-#ifdef WITH_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
 	{
 		.name       = "TEE",
 		.revision   = 1,
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index 96b749dacc34..6f1701322fb6 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -96,7 +96,7 @@ struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
 }
 
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_af6list_search - Search for a matching IPv6 address entry
  * @addr: IPv6 address
@@ -185,7 +185,7 @@ int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head)
 	return 0;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_af6list_add - Add a new IPv6 address entry to a list
  * @entry: address entry
@@ -263,7 +263,7 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
 	return entry;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_af6list_remove_entry - Remove an IPv6 address entry
  * @entry: address entry
@@ -342,7 +342,7 @@ void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
 	}
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_af6list_audit_addr - Audit an IPv6 address
  * @audit_buf: audit buffer
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index fdbc1d2c7352..a1287ce18130 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -133,7 +133,7 @@ static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
 }
 #endif
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 #define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list)
 
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 3f905e5370c2..38204112b9f4 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -78,7 +78,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
 	struct netlbl_dom_map *ptr;
 	struct netlbl_af4list *iter4;
 	struct netlbl_af4list *tmp4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 	struct netlbl_af6list *tmp6;
 #endif /* IPv6 */
@@ -90,7 +90,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
 			netlbl_af4list_remove_entry(iter4);
 			kfree(netlbl_domhsh_addr4_entry(iter4));
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_safe(iter6, tmp6,
 					    &ptr->type_def.addrsel->list6) {
 			netlbl_af6list_remove_entry(iter6);
@@ -217,7 +217,7 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
 			cipsov4 = map4->type_def.cipsov4;
 			netlbl_af4list_audit_addr(audit_buf, 0, NULL,
 						  addr4->addr, addr4->mask);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		} else if (addr6 != NULL) {
 			struct netlbl_domaddr6_map *map6;
 			map6 = netlbl_domhsh_addr6_entry(addr6);
@@ -306,7 +306,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 	struct netlbl_dom_map *entry_old;
 	struct netlbl_af4list *iter4;
 	struct netlbl_af4list *tmp4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 	struct netlbl_af6list *tmp6;
 #endif /* IPv6 */
@@ -338,7 +338,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 					       &entry->type_def.addrsel->list4)
 				netlbl_domhsh_audit_add(entry, iter4, NULL,
 							ret_val, audit_info);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 			netlbl_af6list_foreach_rcu(iter6,
 					       &entry->type_def.addrsel->list6)
 				netlbl_domhsh_audit_add(entry, NULL, iter6,
@@ -365,7 +365,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 				ret_val = -EEXIST;
 				goto add_return;
 			}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_rcu(iter6,
 					   &entry->type_def.addrsel->list6)
 			if (netlbl_af6list_search_exact(&iter6->addr,
@@ -386,7 +386,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 			if (ret_val != 0)
 				goto add_return;
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_safe(iter6, tmp6,
 					    &entry->type_def.addrsel->list6) {
 			netlbl_af6list_remove_entry(iter6);
@@ -510,7 +510,7 @@ int netlbl_domhsh_remove_af4(const char *domain,
 	struct netlbl_dom_map *entry_map;
 	struct netlbl_af4list *entry_addr;
 	struct netlbl_af4list *iter4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 #endif /* IPv6 */
 	struct netlbl_domaddr4_map *entry;
@@ -533,7 +533,7 @@ int netlbl_domhsh_remove_af4(const char *domain,
 		goto remove_af4_failure;
 	netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4)
 		goto remove_af4_single_addr;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6)
 		goto remove_af4_single_addr;
 #endif /* IPv6 */
@@ -644,7 +644,7 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
 	return netlbl_domhsh_addr4_entry(addr_iter);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table
  * @domain: the domain name to search for
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index bfcc0f7024c5..90872c4ca30f 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -104,7 +104,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
 		     int (*callback) (struct netlbl_dom_map *entry, void *arg),
 		     void *cb_arg);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
 						  const struct in6_addr *addr);
 #endif /* IPv6 */
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 5952237c0c86..2560e7b441c6 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -147,7 +147,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
 				goto cfg_unlbl_map_add_failure;
 			break;
 			}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		case AF_INET6: {
 			const struct in6_addr *addr6 = addr;
 			const struct in6_addr *mask6 = mask;
@@ -227,7 +227,7 @@ int netlbl_cfg_unlbl_static_add(struct net *net,
 	case AF_INET:
 		addr_len = sizeof(struct in_addr);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		addr_len = sizeof(struct in6_addr);
 		break;
@@ -270,7 +270,7 @@ int netlbl_cfg_unlbl_static_del(struct net *net,
 	case AF_INET:
 		addr_len = sizeof(struct in_addr);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		addr_len = sizeof(struct in6_addr);
 		break;
@@ -673,7 +673,7 @@ int netlbl_sock_setattr(struct sock *sk,
 			ret_val = -ENOENT;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		/* since we don't support any IPv6 labeling protocols right
 		 * now we can optimize everything away until we do */
@@ -724,7 +724,7 @@ int netlbl_sock_getattr(struct sock *sk,
 	case AF_INET:
 		ret_val = cipso_v4_sock_getattr(sk, secattr);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		ret_val = -ENOMSG;
 		break;
@@ -782,7 +782,7 @@ int netlbl_conn_setattr(struct sock *sk,
 			ret_val = -ENOENT;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		/* since we don't support any IPv6 labeling protocols right
 		 * now we can optimize everything away until we do */
@@ -853,7 +853,7 @@ int netlbl_req_setattr(struct request_sock *req,
 			ret_val = -ENOENT;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		/* since we don't support any IPv6 labeling protocols right
 		 * now we can optimize everything away until we do */
@@ -926,7 +926,7 @@ int netlbl_skbuff_setattr(struct sk_buff *skb,
 			ret_val = -ENOENT;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		/* since we don't support any IPv6 labeling protocols right
 		 * now we can optimize everything away until we do */
@@ -965,7 +965,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
 		    cipso_v4_skbuff_getattr(skb, secattr) == 0)
 			return 0;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		break;
 #endif /* IPv6 */
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 9879300beefd..4809e2e48b02 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -184,7 +184,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 
 		entry->type = NETLBL_NLTYPE_ADDRSELECT;
 		entry->type_def.addrsel = addrmap;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	} else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) {
 		struct in6_addr *addr;
 		struct in6_addr *mask;
@@ -270,7 +270,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
 	struct nlattr *nla_a;
 	struct nlattr *nla_b;
 	struct netlbl_af4list *iter4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 #endif
 
@@ -324,7 +324,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
 
 			nla_nest_end(skb, nla_b);
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_rcu(iter6,
 					   &entry->type_def.addrsel->list6) {
 			struct netlbl_domaddr6_map *map6;
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 049ccd2447d7..4b5fa0fe78fd 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -170,7 +170,7 @@ static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
 	struct netlbl_unlhsh_iface *iface;
 	struct netlbl_af4list *iter4;
 	struct netlbl_af4list *tmp4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 	struct netlbl_af6list *tmp6;
 #endif /* IPv6 */
@@ -184,7 +184,7 @@ static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
 		netlbl_af4list_remove_entry(iter4);
 		kfree(netlbl_unlhsh_addr4_entry(iter4));
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) {
 		netlbl_af6list_remove_entry(iter6);
 		kfree(netlbl_unlhsh_addr6_entry(iter6));
@@ -274,7 +274,7 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
 	return ret_val;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_unlhsh_add_addr6 - Add a new IPv6 address entry to the hash table
  * @iface: the associated interface entry
@@ -436,7 +436,7 @@ int netlbl_unlhsh_add(struct net *net,
 						  mask4->s_addr);
 		break;
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case sizeof(struct in6_addr): {
 		const struct in6_addr *addr6 = addr;
 		const struct in6_addr *mask6 = mask;
@@ -531,7 +531,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 	return 0;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_unlhsh_remove_addr6 - Remove an IPv6 address entry
  * @net: network namespace
@@ -606,14 +606,14 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
 {
 	struct netlbl_af4list *iter4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 #endif /* IPv6 */
 
 	spin_lock(&netlbl_unlhsh_lock);
 	netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list)
 		goto unlhsh_condremove_failure;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list)
 		goto unlhsh_condremove_failure;
 #endif /* IPv6 */
@@ -680,7 +680,7 @@ int netlbl_unlhsh_remove(struct net *net,
 						     iface, addr, mask,
 						     audit_info);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case sizeof(struct in6_addr):
 		ret_val = netlbl_unlhsh_remove_addr6(net,
 						     iface, addr, mask,
@@ -1196,7 +1196,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	struct netlbl_unlhsh_iface *iface;
 	struct list_head *iter_list;
 	struct netlbl_af4list *addr4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *addr6;
 #endif
 
@@ -1228,7 +1228,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 					goto unlabel_staticlist_return;
 				}
 			}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 			netlbl_af6list_foreach_rcu(addr6,
 						   &iface->addr6_list) {
 				if (iter_addr6++ < skip_addr6)
@@ -1277,7 +1277,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 	u32 skip_addr6 = cb->args[1];
 	u32 iter_addr4 = 0;
 	struct netlbl_af4list *addr4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	u32 iter_addr6 = 0;
 	struct netlbl_af6list *addr6;
 #endif
@@ -1303,7 +1303,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 			goto unlabel_staticlistdef_return;
 		}
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
 		if (iter_addr6++ < skip_addr6)
 			continue;
@@ -1494,7 +1494,7 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
 		secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid;
 		break;
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case PF_INET6: {
 		struct ipv6hdr *hdr6;
 		struct netlbl_af6list *addr6;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b7692aab6e9c..80f71af71384 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -105,7 +105,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
 struct sctp_input_cb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 61b9fca5a173..544a9b68eb53 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -637,7 +637,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg)
 		    " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state,
 		    addrw);
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		/* Now we send an ASCONF for each association */
 		/* Note. we currently don't handle link local IPv6 addressees */
 		if (addrw->a.sa.sa_family == AF_INET6) {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d56c07a3d435..db0308344d07 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6841,7 +6841,7 @@ struct proto sctp_prot = {
 	.sockets_allocated = &sctp_sockets_allocated,
 };
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 struct proto sctpv6_prot = {
 	.name		= "SCTPv6",
@@ -6872,4 +6872,4 @@ struct proto sctpv6_prot = {
 	.memory_allocated = &sctp_memory_allocated,
 	.sockets_allocated = &sctp_sockets_allocated,
 };
-#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif /* IS_ENABLED(CONFIG_IPV6) */
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 67a655ee82a9..ee77742e0ed6 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
 				  char *buf, const int buflen)
@@ -91,7 +91,7 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
 	return len;
 }
 
-#else	/* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
+#else	/* !IS_ENABLED(CONFIG_IPV6) */
 
 static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
 				  char *buf, const int buflen)
@@ -105,7 +105,7 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
 	return 0;
 }
 
-#endif	/* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
+#endif	/* !IS_ENABLED(CONFIG_IPV6) */
 
 static int rpc_ntop4(const struct sockaddr *sap,
 		     char *buf, const size_t buflen)
@@ -155,7 +155,7 @@ static size_t rpc_pton4(const char *buf, const size_t buflen,
 	return sizeof(struct sockaddr_in);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int rpc_parse_scope_id(const char *buf, const size_t buflen,
 			      const char *delim, struct sockaddr_in6 *sin6)
 {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6e038884ae0c..9d01d46b05f3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -826,7 +826,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
 	return error;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /*
  * Register an "inet6" protocol family netid with the local
  * rpcbind daemon via an rpcbind v4 SET request.
@@ -872,7 +872,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
 
 	return error;
 }
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif	/* IS_ENABLED(CONFIG_IPV6) */
 
 /*
  * Register a kernel RPC service via rpcbind version 4.
@@ -893,11 +893,11 @@ static int __svc_register(const char *progname,
 		error = __svc_rpcb_register4(program, version,
 						protocol, port);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case PF_INET6:
 		error = __svc_rpcb_register6(program, version,
 						protocol, port);
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif
 	}
 
 	if (error < 0)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 447cd0eb415c..38649cfa4e81 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -179,13 +179,13 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 		.sin_addr.s_addr	= htonl(INADDR_ANY),
 		.sin_port		= htons(port),
 	};
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct sockaddr_in6 sin6 = {
 		.sin6_family		= AF_INET6,
 		.sin6_addr		= IN6ADDR_ANY_INIT,
 		.sin6_port		= htons(port),
 	};
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif
 	struct sockaddr *sap;
 	size_t len;
 
@@ -194,12 +194,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 		sap = (struct sockaddr *)&sin;
 		len = sizeof(sin);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case PF_INET6:
 		sap = (struct sockaddr *)&sin6;
 		len = sizeof(sin6);
 		break;
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif
 	default:
 		return ERR_PTR(-EAFNOSUPPORT);
 	}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index fe258fc37f50..01153ead1dba 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -220,7 +220,7 @@ static int ip_map_parse(struct cache_detail *cd,
 		ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
 				&sin6.sin6_addr);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		memcpy(&sin6, &address.s6, sizeof(sin6));
 		break;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 82e803b56952..eb6b0b7781a5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1340,7 +1340,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 	case AF_INET:
 		dst_ops = &net->xfrm.xfrm4_dst_ops;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		dst_ops = &net->xfrm.xfrm6_dst_ops;
 		break;
@@ -2435,7 +2435,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 		case AF_INET:
 			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
 			break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		case AF_INET6:
 			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
 			break;
@@ -2485,7 +2485,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net)
 	afinfo = xfrm_policy_afinfo[AF_INET];
 	if (afinfo)
 		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	afinfo = xfrm_policy_afinfo[AF_INET6];
 	if (afinfo)
 		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d0a42df5160e..e0d747a2e803 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -28,7 +28,7 @@
 #include <net/netlink.h>
 #include <net/ah.h>
 #include <asm/uaccess.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/in6.h>
 #endif
 
@@ -150,7 +150,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		break;
 
 	case AF_INET6:
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		break;
 #else
 		err = -EAFNOSUPPORT;
@@ -201,7 +201,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 			goto out;
 		break;
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case IPPROTO_DSTOPTS:
 	case IPPROTO_ROUTING:
 		if (attrs[XFRMA_ALG_COMP]	||
@@ -1160,7 +1160,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 		break;
 
 	case AF_INET6:
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		break;
 #else
 		return  -EAFNOSUPPORT;
@@ -1231,7 +1231,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 		switch (ut[i].family) {
 		case AF_INET:
 			break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		case AF_INET6:
 			break;
 #endif
@@ -2604,7 +2604,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 			return NULL;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		if (opt != IPV6_XFRM_POLICY) {
 			*dir = -EOPNOTSUPP;
-- 
cgit v1.2.3


From a469ebd56f8bee8d5352b1a284ea39d23ba02430 Mon Sep 17 00:00:00 2001
From: Mark Einon <mark.einon@gmail.com>
Date: Tue, 6 Dec 2011 23:18:14 +0000
Subject: types.h: fix comment spelling for 'architectures'

Spelling change, architetures -> architectures

Signed-off-by: Mark Einon <mark.einon@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index 57a97234bec1..cbcef6ebeba9 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -188,7 +188,7 @@ typedef __u32 __bitwise __wsum;
  * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid
  * common 32/64-bit compat problems.
  * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other
- * architectures) and to 8-byte boundaries on 64-bit architetures.  The new
+ * architectures) and to 8-byte boundaries on 64-bit architectures.  The new
  * aligned_64 type enforces 8-byte alignment so that structs containing
  * aligned_64 values have the same alignment on 32-bit and 64-bit architectures.
  * No conversions are necessary between 32-bit user-space and a 64-bit kernel.
-- 
cgit v1.2.3


From a49a88f108516fd5ae24e26df5a63beb847807df Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 23 Oct 2011 19:57:02 -0700
Subject: usb: gadget: renesas_usbhs: tidyup the unit of detection_delay

detection_delay was assumed as msec

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/renesas_usbhs/common.c | 3 ++-
 include/linux/usb/renesas_usbhs.h  | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 1ce32d92e720..b4cf555ce58e 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -388,7 +388,8 @@ int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev)
 	 * To make sure safety context,
 	 * use workqueue for usbhs_notify_hotplug
 	 */
-	schedule_delayed_work(&priv->notify_hotplug_work, delay);
+	schedule_delayed_work(&priv->notify_hotplug_work,
+			      msecs_to_jiffies(delay));
 	return 0;
 }
 
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index e5a40c318548..c9fceb9f7690 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -118,7 +118,7 @@ struct renesas_usbhs_driver_param {
 	 *
 	 * delay time from notify_hotplug callback
 	 */
-	int detection_delay;
+	int detection_delay; /* msec */
 
 	/*
 	 * option:
-- 
cgit v1.2.3


From f1ee56a0004c4a5974e7a69665330b6ff818bf92 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 23 Oct 2011 19:57:10 -0700
Subject: usb: gadget: renesas_usbhs: add platform power control function

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/renesas_usbhs/common.c | 7 +++++++
 include/linux/usb/renesas_usbhs.h  | 8 ++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index b4cf555ce58e..17bf1f74377a 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -291,18 +291,25 @@ static u32 usbhsc_default_pipe_type[] = {
  */
 static void usbhsc_power_ctrl(struct usbhs_priv *priv, int enable)
 {
+	struct platform_device *pdev = usbhs_priv_to_pdev(priv);
 	struct device *dev = usbhs_priv_to_dev(priv);
 
 	if (enable) {
 		/* enable PM */
 		pm_runtime_get_sync(dev);
 
+		/* enable platform power */
+		usbhs_platform_call(priv, power_ctrl, pdev, priv->base, enable);
+
 		/* USB on */
 		usbhs_sys_clock_ctrl(priv, enable);
 	} else {
 		/* USB off */
 		usbhs_sys_clock_ctrl(priv, enable);
 
+		/* disable platform power */
+		usbhs_platform_call(priv, power_ctrl, pdev, priv->base, enable);
+
 		/* disable PM */
 		pm_runtime_put_sync(dev);
 	}
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index c9fceb9f7690..0d3f98879256 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -64,6 +64,14 @@ struct renesas_usbhs_platform_callback {
 	 */
 	void (*hardware_exit)(struct platform_device *pdev);
 
+	/*
+	 * option:
+	 *
+	 * for board specific clock control
+	 */
+	void (*power_ctrl)(struct platform_device *pdev,
+			   void __iomem *base, int enable);
+
 	/*
 	 * option:
 	 *
-- 
cgit v1.2.3


From d327ab5b6d660d6fe22b073b743fde1668e593bb Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Sat, 19 Nov 2011 18:27:37 +0100
Subject: usb: gadget: replace usb_gadget::is_dualspeed with max_speed

This commit replaces usb_gadget's is_dualspeed field with
a max_speed field.

[ balbi@ti.com : Fixed DWC3 driver ]

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 Documentation/feature-removal-schedule.txt | 14 ++++++++++++++
 drivers/usb/dwc3/gadget.c                  |  2 +-
 drivers/usb/gadget/amd5536udc.c            |  2 +-
 drivers/usb/gadget/atmel_usba_udc.c        |  2 +-
 drivers/usb/gadget/ci13xxx_udc.c           |  7 +++++--
 drivers/usb/gadget/dummy_hcd.c             |  2 +-
 drivers/usb/gadget/epautoconf.c            |  6 +++---
 drivers/usb/gadget/fsl_udc_core.c          |  2 +-
 drivers/usb/gadget/fusb300_udc.c           |  2 +-
 drivers/usb/gadget/goku_udc.c              |  1 +
 drivers/usb/gadget/langwell_udc.c          |  2 +-
 drivers/usb/gadget/m66592-udc.c            |  2 +-
 drivers/usb/gadget/mv_udc_core.c           |  2 +-
 drivers/usb/gadget/net2272.c               |  2 +-
 drivers/usb/gadget/net2280.c               |  2 +-
 drivers/usb/gadget/omap_udc.c              |  1 +
 drivers/usb/gadget/pch_udc.c               |  2 +-
 drivers/usb/gadget/printer.c               |  4 ++--
 drivers/usb/gadget/r8a66597-udc.c          |  2 +-
 drivers/usb/gadget/s3c-hsotg.c             |  2 +-
 drivers/usb/gadget/s3c-hsudc.c             |  2 +-
 drivers/usb/gadget/udc-core.c              | 26 ++++++++++++++++++++------
 drivers/usb/musb/musb_gadget.c             |  2 +-
 drivers/usb/renesas_usbhs/mod_gadget.c     |  2 +-
 include/linux/usb/gadget.h                 | 10 +++++-----
 25 files changed, 68 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 3d849122b5b1..a7e4ac1202d6 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -535,6 +535,20 @@ Why:    In 3.0, we can now autodetect internal 3G device and already have
 	information log when acer-wmi initial.
 Who:    Lee, Chun-Yi <jlee@novell.com>
 
+---------------------------
+
+What:	/sys/devices/platform/_UDC_/udc/_UDC_/is_dualspeed file and
+	is_dualspeed line in /sys/devices/platform/ci13xxx_*/udc/device file.
+When:	3.8
+Why:	The is_dualspeed file is superseded by maximum_speed in the same
+	directory and is_dualspeed line in device file is superseded by
+	max_speed line in the same file.
+
+	The maximum_speed/max_speed specifies maximum speed supported by UDC.
+	To check if dualspeeed is supported, check if the value is >= 3.
+	Various possible speeds are defined in <linux/usb/ch9.h>.
+Who:	Michal Nazarewicz <mina86@mina86.com>
+
 ----------------------------
 
 What:	The XFS nodelaylog mount option
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 25dbd8614e72..580272042a33 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1986,7 +1986,7 @@ int __devinit dwc3_gadget_init(struct dwc3 *dwc)
 	dev_set_name(&dwc->gadget.dev, "gadget");
 
 	dwc->gadget.ops			= &dwc3_gadget_ops;
-	dwc->gadget.is_dualspeed	= true;
+	dwc->gadget.max_speed		= USB_SPEED_SUPER;
 	dwc->gadget.speed		= USB_SPEED_UNKNOWN;
 	dwc->gadget.dev.parent		= dwc->dev;
 
diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c
index 45f422ac103f..e69cdbca8017 100644
--- a/drivers/usb/gadget/amd5536udc.c
+++ b/drivers/usb/gadget/amd5536udc.c
@@ -3349,7 +3349,7 @@ static int udc_probe(struct udc *dev)
 	dev_set_name(&dev->gadget.dev, "gadget");
 	dev->gadget.dev.release = gadget_release;
 	dev->gadget.name = name;
-	dev->gadget.is_dualspeed = 1;
+	dev->gadget.max_speed = USB_SPEED_HIGH;
 
 	/* init registers, interrupts, ... */
 	startup_registers(dev);
diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index 271a9d873608..e2fb6d583bd9 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -1038,7 +1038,7 @@ static struct usba_udc the_udc = {
 	.gadget	= {
 		.ops		= &usba_udc_ops,
 		.ep_list	= LIST_HEAD_INIT(the_udc.gadget.ep_list),
-		.is_dualspeed	= 1,
+		.max_speed	= USB_SPEED_HIGH,
 		.name		= "atmel_usba_udc",
 		.dev	= {
 			.init_name	= "gadget",
diff --git a/drivers/usb/gadget/ci13xxx_udc.c b/drivers/usb/gadget/ci13xxx_udc.c
index 8956a2448b33..9767d9170fbd 100644
--- a/drivers/usb/gadget/ci13xxx_udc.c
+++ b/drivers/usb/gadget/ci13xxx_udc.c
@@ -766,8 +766,11 @@ static ssize_t show_device(struct device *dev, struct device_attribute *attr,
 
 	n += scnprintf(buf + n, PAGE_SIZE - n, "speed             = %d\n",
 		       gadget->speed);
+	n += scnprintf(buf + n, PAGE_SIZE - n, "max_speed         = %d\n",
+		       gadget->max_speed);
+	/* TODO: Scheduled for removal in 3.8. */
 	n += scnprintf(buf + n, PAGE_SIZE - n, "is_dualspeed      = %d\n",
-		       gadget->is_dualspeed);
+		       gadget_is_dualspeed(gadget));
 	n += scnprintf(buf + n, PAGE_SIZE - n, "is_otg            = %d\n",
 		       gadget->is_otg);
 	n += scnprintf(buf + n, PAGE_SIZE - n, "is_a_peripheral   = %d\n",
@@ -2880,7 +2883,7 @@ static int udc_probe(struct ci13xxx_udc_driver *driver, struct device *dev,
 
 	udc->gadget.ops          = &usb_gadget_ops;
 	udc->gadget.speed        = USB_SPEED_UNKNOWN;
-	udc->gadget.is_dualspeed = 1;
+	udc->gadget.max_speed    = USB_SPEED_HIGH;
 	udc->gadget.is_otg       = 0;
 	udc->gadget.name         = driver->name;
 
diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
index ab8f1b488d54..cf235d84d8b7 100644
--- a/drivers/usb/gadget/dummy_hcd.c
+++ b/drivers/usb/gadget/dummy_hcd.c
@@ -977,7 +977,7 @@ static int dummy_udc_probe (struct platform_device *pdev)
 
 	dum->gadget.name = gadget_name;
 	dum->gadget.ops = &dummy_ops;
-	dum->gadget.is_dualspeed = 1;
+	dum->gadget.max_speed = USB_SPEED_SUPER;
 
 	dev_set_name(&dum->gadget.dev, "gadget");
 	dum->gadget.dev.parent = &pdev->dev;
diff --git a/drivers/usb/gadget/epautoconf.c b/drivers/usb/gadget/epautoconf.c
index 596a0b464e61..38bcbfb91f62 100644
--- a/drivers/usb/gadget/epautoconf.c
+++ b/drivers/usb/gadget/epautoconf.c
@@ -152,7 +152,7 @@ ep_matches (
 	switch (type) {
 	case USB_ENDPOINT_XFER_INT:
 		/* INT:  limit 64 bytes full speed, 1024 high/super speed */
-		if (!gadget->is_dualspeed && max > 64)
+		if (!gadget_is_dualspeed(gadget) && max > 64)
 			return 0;
 		/* FALLTHROUGH */
 
@@ -160,12 +160,12 @@ ep_matches (
 		/* ISO:  limit 1023 bytes full speed, 1024 high/super speed */
 		if (ep->maxpacket < max)
 			return 0;
-		if (!gadget->is_dualspeed && max > 1023)
+		if (!gadget_is_dualspeed(gadget) && max > 1023)
 			return 0;
 
 		/* BOTH:  "high bandwidth" works only at high speed */
 		if ((desc->wMaxPacketSize & cpu_to_le16(3<<11))) {
-			if (!gadget->is_dualspeed)
+			if (!gadget_is_dualspeed(gadget))
 				return 0;
 			/* configure your hardware with enough buffering!! */
 		}
diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c
index dd28ef3def71..6de8ec753818 100644
--- a/drivers/usb/gadget/fsl_udc_core.c
+++ b/drivers/usb/gadget/fsl_udc_core.c
@@ -2525,7 +2525,7 @@ static int __init fsl_udc_probe(struct platform_device *pdev)
 
 	/* Setup gadget structure */
 	udc_controller->gadget.ops = &fsl_gadget_ops;
-	udc_controller->gadget.is_dualspeed = 1;
+	udc_controller->gadget.max_speed = USB_SPEED_HIGH;
 	udc_controller->gadget.ep0 = &udc_controller->eps[0].ep;
 	INIT_LIST_HEAD(&udc_controller->gadget.ep_list);
 	udc_controller->gadget.speed = USB_SPEED_UNKNOWN;
diff --git a/drivers/usb/gadget/fusb300_udc.c b/drivers/usb/gadget/fusb300_udc.c
index 74da206c8406..6e32a60ab3b7 100644
--- a/drivers/usb/gadget/fusb300_udc.c
+++ b/drivers/usb/gadget/fusb300_udc.c
@@ -1463,7 +1463,7 @@ static int __init fusb300_probe(struct platform_device *pdev)
 
 	dev_set_name(&fusb300->gadget.dev, "gadget");
 
-	fusb300->gadget.is_dualspeed = 1;
+	fusb300->gadget.max_speed = USB_SPEED_HIGH;
 	fusb300->gadget.dev.parent = &pdev->dev;
 	fusb300->gadget.dev.dma_mask = pdev->dev.dma_mask;
 	fusb300->gadget.dev.release = pdev->dev.release;
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index 7f87805cddc4..ab9c924eee76 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1796,6 +1796,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	spin_lock_init(&dev->lock);
 	dev->pdev = pdev;
 	dev->gadget.ops = &goku_ops;
+	dev->gadget.max_speed = USB_SPEED_FULL;
 
 	/* the "gadget" abstracts/virtualizes the controller */
 	dev_set_name(&dev->gadget.dev, "gadget");
diff --git a/drivers/usb/gadget/langwell_udc.c b/drivers/usb/gadget/langwell_udc.c
index c9fa3bf5b377..fa0fcc11263f 100644
--- a/drivers/usb/gadget/langwell_udc.c
+++ b/drivers/usb/gadget/langwell_udc.c
@@ -3267,7 +3267,7 @@ static int langwell_udc_probe(struct pci_dev *pdev,
 	dev->gadget.ep0 = &dev->ep[0].ep;	/* gadget ep0 */
 	INIT_LIST_HEAD(&dev->gadget.ep_list);	/* ep_list */
 	dev->gadget.speed = USB_SPEED_UNKNOWN;	/* speed */
-	dev->gadget.is_dualspeed = 1;		/* support dual speed */
+	dev->gadget.max_speed = USB_SPEED_HIGH;	/* support dual speed */
 #ifdef	OTG_TRANSCEIVER
 	dev->gadget.is_otg = 1;			/* support otg mode */
 #endif
diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c
index 9aa1cbbee45b..a7692c208ea7 100644
--- a/drivers/usb/gadget/m66592-udc.c
+++ b/drivers/usb/gadget/m66592-udc.c
@@ -1653,7 +1653,7 @@ static int __init m66592_probe(struct platform_device *pdev)
 	m66592->gadget.ops = &m66592_gadget_ops;
 	device_initialize(&m66592->gadget.dev);
 	dev_set_name(&m66592->gadget.dev, "gadget");
-	m66592->gadget.is_dualspeed = 1;
+	m66592->gadget.max_speed = USB_SPEED_HIGH;
 	m66592->gadget.dev.parent = &pdev->dev;
 	m66592->gadget.dev.dma_mask = pdev->dev.dma_mask;
 	m66592->gadget.dev.release = pdev->dev.release;
diff --git a/drivers/usb/gadget/mv_udc_core.c b/drivers/usb/gadget/mv_udc_core.c
index 892412103dd8..9376a7483c9b 100644
--- a/drivers/usb/gadget/mv_udc_core.c
+++ b/drivers/usb/gadget/mv_udc_core.c
@@ -2312,7 +2312,7 @@ static int __devinit mv_udc_probe(struct platform_device *dev)
 	udc->gadget.ep0 = &udc->eps[0].ep;	/* gadget ep0 */
 	INIT_LIST_HEAD(&udc->gadget.ep_list);	/* ep_list */
 	udc->gadget.speed = USB_SPEED_UNKNOWN;	/* speed */
-	udc->gadget.is_dualspeed = 1;		/* support dual speed */
+	udc->gadget.max_speed = USB_SPEED_HIGH;	/* support dual speed */
 
 	/* the "gadget" abstracts/virtualizes the controller */
 	dev_set_name(&udc->gadget.dev, "gadget");
diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c
index d1b76368472f..d5050f4e8449 100644
--- a/drivers/usb/gadget/net2272.c
+++ b/drivers/usb/gadget/net2272.c
@@ -2235,7 +2235,7 @@ net2272_probe_init(struct device *dev, unsigned int irq)
 	ret->irq = irq;
 	ret->dev = dev;
 	ret->gadget.ops = &net2272_ops;
-	ret->gadget.is_dualspeed = 1;
+	ret->gadget.max_speed = USB_SPEED_HIGH;
 
 	/* the "gadget" abstracts/virtualizes the controller */
 	dev_set_name(&ret->gadget.dev, "gadget");
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index da2b9d0be3ca..9ee36fd5adae 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -2698,7 +2698,7 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	spin_lock_init (&dev->lock);
 	dev->pdev = pdev;
 	dev->gadget.ops = &net2280_ops;
-	dev->gadget.is_dualspeed = 1;
+	dev->gadget.max_speed = USB_SPEED_HIGH;
 
 	/* the "gadget" abstracts/virtualizes the controller */
 	dev_set_name(&dev->gadget.dev, "gadget");
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 788989a10223..ed01a0f5f119 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -2676,6 +2676,7 @@ omap_udc_setup(struct platform_device *odev, struct otg_transceiver *xceiv)
 	INIT_LIST_HEAD(&udc->gadget.ep_list);
 	INIT_LIST_HEAD(&udc->iso);
 	udc->gadget.speed = USB_SPEED_UNKNOWN;
+	udc->gadget.max_speed = USB_SPEED_FULL;
 	udc->gadget.name = driver_name;
 
 	device_initialize(&udc->gadget.dev);
diff --git a/drivers/usb/gadget/pch_udc.c b/drivers/usb/gadget/pch_udc.c
index 5048a0c07640..7f97b4adba3e 100644
--- a/drivers/usb/gadget/pch_udc.c
+++ b/drivers/usb/gadget/pch_udc.c
@@ -2941,7 +2941,7 @@ static int pch_udc_probe(struct pci_dev *pdev,
 	dev->gadget.dev.dma_mask = pdev->dev.dma_mask;
 	dev->gadget.dev.release = gadget_release;
 	dev->gadget.name = KBUILD_MODNAME;
-	dev->gadget.is_dualspeed = 1;
+	dev->gadget.max_speed = USB_SPEED_HIGH;
 
 	retval = device_register(&dev->gadget.dev);
 	if (retval)
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 65a8834f274b..b74f49ac95e5 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -1141,7 +1141,7 @@ printer_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 				break;
 #ifdef CONFIG_USB_GADGET_DUALSPEED
 			case USB_DT_DEVICE_QUALIFIER:
-				if (!gadget->is_dualspeed)
+				if (!gadget_is_dualspeed(gadget))
 					break;
 				/*
 				 * assumes ep0 uses the same value for both
@@ -1155,7 +1155,7 @@ printer_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 				break;
 
 			case USB_DT_OTHER_SPEED_CONFIG:
-				if (!gadget->is_dualspeed)
+				if (!gadget_is_dualspeed(gadget))
 					break;
 				/* FALLTHROUGH */
 #endif /* CONFIG_USB_GADGET_DUALSPEED */
diff --git a/drivers/usb/gadget/r8a66597-udc.c b/drivers/usb/gadget/r8a66597-udc.c
index fc719a3f8557..3666d7c54e24 100644
--- a/drivers/usb/gadget/r8a66597-udc.c
+++ b/drivers/usb/gadget/r8a66597-udc.c
@@ -1911,7 +1911,7 @@ static int __init r8a66597_probe(struct platform_device *pdev)
 
 	r8a66597->gadget.ops = &r8a66597_gadget_ops;
 	dev_set_name(&r8a66597->gadget.dev, "gadget");
-	r8a66597->gadget.is_dualspeed = 1;
+	r8a66597->gadget.max_speed = USB_SPEED_HIGH;
 	r8a66597->gadget.dev.parent = &pdev->dev;
 	r8a66597->gadget.dev.dma_mask = pdev->dev.dma_mask;
 	r8a66597->gadget.dev.release = pdev->dev.release;
diff --git a/drivers/usb/gadget/s3c-hsotg.c b/drivers/usb/gadget/s3c-hsotg.c
index b31448229f0b..6bc7ad87c306 100644
--- a/drivers/usb/gadget/s3c-hsotg.c
+++ b/drivers/usb/gadget/s3c-hsotg.c
@@ -3362,7 +3362,7 @@ static int __devinit s3c_hsotg_probe(struct platform_device *pdev)
 
 	dev_set_name(&hsotg->gadget.dev, "gadget");
 
-	hsotg->gadget.is_dualspeed = 1;
+	hsotg->gadget.max_speed = USB_SPEED_HIGH;
 	hsotg->gadget.ops = &s3c_hsotg_gadget_ops;
 	hsotg->gadget.name = dev_name(dev);
 
diff --git a/drivers/usb/gadget/s3c-hsudc.c b/drivers/usb/gadget/s3c-hsudc.c
index 20a553b46aed..09ea96558b91 100644
--- a/drivers/usb/gadget/s3c-hsudc.c
+++ b/drivers/usb/gadget/s3c-hsudc.c
@@ -1310,7 +1310,7 @@ static int s3c_hsudc_probe(struct platform_device *pdev)
 	device_initialize(&hsudc->gadget.dev);
 	dev_set_name(&hsudc->gadget.dev, "gadget");
 
-	hsudc->gadget.is_dualspeed = 1;
+	hsudc->gadget.max_speed = USB_SPEED_HIGH;
 	hsudc->gadget.ops = &s3c_hsudc_gadget_ops;
 	hsudc->gadget.name = dev_name(dev);
 	hsudc->gadget.dev.parent = dev;
diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c
index 6939e17f4580..0b0d12ccc487 100644
--- a/drivers/usb/gadget/udc-core.c
+++ b/drivers/usb/gadget/udc-core.c
@@ -371,14 +371,28 @@ static ssize_t usb_udc_softconn_store(struct device *dev,
 }
 static DEVICE_ATTR(soft_connect, S_IWUSR, NULL, usb_udc_softconn_store);
 
-static ssize_t usb_udc_speed_show(struct device *dev,
+#define USB_UDC_SPEED_ATTR(name, param)					\
+ssize_t usb_udc_##param##_show(struct device *dev,			\
+		struct device_attribute *attr, char *buf)		\
+{									\
+	struct usb_udc *udc = container_of(dev, struct usb_udc, dev);	\
+	return snprintf(buf, PAGE_SIZE, "%s\n",				\
+			usb_speed_string(udc->gadget->param));		\
+}									\
+static DEVICE_ATTR(name, S_IRUSR, usb_udc_##param##_show, NULL)
+
+static USB_UDC_SPEED_ATTR(current_speed, speed);
+static USB_UDC_SPEED_ATTR(maximum_speed, max_speed);
+
+/* TODO: Scheduled for removal in 3.8. */
+static ssize_t usb_udc_is_dualspeed_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
 	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
-	return snprintf(buf, PAGE_SIZE, "%s\n",
-			usb_speed_string(udc->gadget->speed));
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			gadget_is_dualspeed(udc->gadget));
 }
-static DEVICE_ATTR(speed, S_IRUGO, usb_udc_speed_show, NULL);
+static DEVICE_ATTR(is_dualspeed, S_IRUSR, usb_udc_is_dualspeed_show, NULL);
 
 #define USB_UDC_ATTR(name)					\
 ssize_t usb_udc_##name##_show(struct device *dev,		\
@@ -391,7 +405,6 @@ ssize_t usb_udc_##name##_show(struct device *dev,		\
 }								\
 static DEVICE_ATTR(name, S_IRUGO, usb_udc_##name##_show, NULL)
 
-static USB_UDC_ATTR(is_dualspeed);
 static USB_UDC_ATTR(is_otg);
 static USB_UDC_ATTR(is_a_peripheral);
 static USB_UDC_ATTR(b_hnp_enable);
@@ -401,7 +414,8 @@ static USB_UDC_ATTR(a_alt_hnp_support);
 static struct attribute *usb_udc_attrs[] = {
 	&dev_attr_srp.attr,
 	&dev_attr_soft_connect.attr,
-	&dev_attr_speed.attr,
+	&dev_attr_current_speed.attr,
+	&dev_attr_maximum_speed.attr,
 
 	&dev_attr_is_dualspeed.attr,
 	&dev_attr_is_otg.attr,
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 922148ff8d29..47a3d1e5b28b 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1844,7 +1844,7 @@ int __init musb_gadget_setup(struct musb *musb)
 	 */
 
 	musb->g.ops = &musb_gadget_operations;
-	musb->g.is_dualspeed = 1;
+	musb->g.max_speed = USB_SPEED_HIGH;
 	musb->g.speed = USB_SPEED_UNKNOWN;
 
 	/* this "gadget" abstracts/virtualizes the controller */
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 8fb9056ff48d..43c67e5cde26 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -862,7 +862,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv)
 	gpriv->gadget.dev.parent	= dev;
 	gpriv->gadget.name		= "renesas_usbhs_udc";
 	gpriv->gadget.ops		= &usbhsg_gadget_ops;
-	gpriv->gadget.is_dualspeed	= 1;
+	gpriv->gadget.max_speed		= USB_SPEED_HIGH;
 	ret = device_register(&gpriv->gadget.dev);
 	if (ret < 0)
 		goto err_add_udc;
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 1d3a67523ffc..98dc306898b5 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -477,8 +477,8 @@ struct usb_gadget_ops {
  *	driver setup() requests
  * @ep_list: List of other endpoints supported by the device.
  * @speed: Speed of current connection to USB host.
- * @is_dualspeed: True if the controller supports both high and full speed
- *	operation.  If it does, the gadget driver must also support both.
+ * @max_speed: Maximal speed the UDC can handle.  UDC must support this
+ *      and all slower speeds.
  * @is_otg: True if the USB device port uses a Mini-AB jack, so that the
  *	gadget driver must provide a USB OTG descriptor.
  * @is_a_peripheral: False unless is_otg, the "A" end of a USB cable
@@ -518,7 +518,7 @@ struct usb_gadget {
 	struct usb_ep			*ep0;
 	struct list_head		ep_list;	/* of usb_ep */
 	enum usb_device_speed		speed;
-	unsigned			is_dualspeed:1;
+	enum usb_device_speed		max_speed;
 	unsigned			is_otg:1;
 	unsigned			is_a_peripheral:1;
 	unsigned			b_hnp_enable:1;
@@ -549,7 +549,7 @@ static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev)
 static inline int gadget_is_dualspeed(struct usb_gadget *g)
 {
 #ifdef CONFIG_USB_GADGET_DUALSPEED
-	/* runtime test would check "g->is_dualspeed" ... that might be
+	/* runtime test would check "g->max_speed" ... that might be
 	 * useful to work around hardware bugs, but is mostly pointless
 	 */
 	return 1;
@@ -567,7 +567,7 @@ static inline int gadget_is_superspeed(struct usb_gadget *g)
 {
 #ifdef CONFIG_USB_GADGET_SUPERSPEED
 	/*
-	 * runtime test would check "g->is_superspeed" ... that might be
+	 * runtime test would check "g->max_speed" ... that might be
 	 * useful to work around hardware bugs, but is mostly pointless
 	 */
 	return 1;
-- 
cgit v1.2.3


From 7177aed44f515d949f587170e0e177ce17e74793 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Sat, 19 Nov 2011 18:27:38 +0100
Subject: usb: gadget: rename usb_gadget_driver::speed to max_speed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit renames the “speed” field of the usb_gadget_driver
structure to “max_speed”.  This is so that to make it more
apparent that the field represents the maximum speed gadget
driver can support.

This also make the field look more like fields with the same
name in usb_gadget and usb_composite_driver structures.  All
of those represent the *maximal* speed given entity supports.

After this commit, there are the following fields in various
structures:
* usb_gadget::speed - the current connection speed,
* usb_gadget::max_speed - maximal speed UDC supports,
* usb_gadget_driver::max_speed - maximal speed gadget driver
  supports, and
* usb_composite_driver::max_speed - maximal speed composite
  gadget supports.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/amd5536udc.c        |  2 +-
 drivers/usb/gadget/at91_udc.c          |  2 +-
 drivers/usb/gadget/ci13xxx_udc.c       |  2 +-
 drivers/usb/gadget/composite.c         |  8 ++++----
 drivers/usb/gadget/dbgp.c              |  2 +-
 drivers/usb/gadget/dummy_hcd.c         | 13 ++++++-------
 drivers/usb/gadget/file_storage.c      |  2 +-
 drivers/usb/gadget/fsl_qe_udc.c        |  4 ++--
 drivers/usb/gadget/fsl_udc_core.c      |  2 +-
 drivers/usb/gadget/fusb300_udc.c       |  2 +-
 drivers/usb/gadget/goku_udc.c          |  2 +-
 drivers/usb/gadget/imx_udc.c           |  2 +-
 drivers/usb/gadget/inode.c             |  6 +++---
 drivers/usb/gadget/m66592-udc.c        |  2 +-
 drivers/usb/gadget/net2272.c           |  2 +-
 drivers/usb/gadget/net2280.c           |  2 +-
 drivers/usb/gadget/omap_udc.c          |  2 +-
 drivers/usb/gadget/pch_udc.c           |  2 +-
 drivers/usb/gadget/printer.c           |  2 +-
 drivers/usb/gadget/pxa25x_udc.c        |  2 +-
 drivers/usb/gadget/pxa27x_udc.c        |  2 +-
 drivers/usb/gadget/r8a66597-udc.c      |  2 +-
 drivers/usb/gadget/s3c-hsotg.c         |  2 +-
 drivers/usb/gadget/s3c-hsudc.c         |  2 +-
 drivers/usb/gadget/s3c2410_udc.c       |  4 ++--
 drivers/usb/musb/musb_gadget.c         |  2 +-
 drivers/usb/renesas_usbhs/mod_gadget.c |  2 +-
 include/linux/usb/gadget.h             |  4 ++--
 28 files changed, 41 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c
index e69cdbca8017..e9a2c5c44454 100644
--- a/drivers/usb/gadget/amd5536udc.c
+++ b/drivers/usb/gadget/amd5536udc.c
@@ -1959,7 +1959,7 @@ static int amd5536_start(struct usb_gadget_driver *driver,
 	u32 tmp;
 
 	if (!driver || !bind || !driver->setup
-			|| driver->speed < USB_SPEED_HIGH)
+			|| driver->max_speed < USB_SPEED_HIGH)
 		return -EINVAL;
 	if (!dev)
 		return -ENODEV;
diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c
index 8efe0fa9228d..ac41f71bf9ca 100644
--- a/drivers/usb/gadget/at91_udc.c
+++ b/drivers/usb/gadget/at91_udc.c
@@ -1633,7 +1633,7 @@ static int at91_start(struct usb_gadget_driver *driver,
 	unsigned long	flags;
 
 	if (!driver
-			|| driver->speed < USB_SPEED_FULL
+			|| driver->max_speed < USB_SPEED_FULL
 			|| !bind
 			|| !driver->setup) {
 		DBG("bad parameter.\n");
diff --git a/drivers/usb/gadget/ci13xxx_udc.c b/drivers/usb/gadget/ci13xxx_udc.c
index 9767d9170fbd..27e313718422 100644
--- a/drivers/usb/gadget/ci13xxx_udc.c
+++ b/drivers/usb/gadget/ci13xxx_udc.c
@@ -813,7 +813,7 @@ static ssize_t show_driver(struct device *dev, struct device_attribute *attr,
 	n += scnprintf(buf + n, PAGE_SIZE - n, "function  = %s\n",
 		       (driver->function ? driver->function : ""));
 	n += scnprintf(buf + n, PAGE_SIZE - n, "max speed = %d\n",
-		       driver->speed);
+		       driver->max_speed);
 
 	return n;
 }
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index f71b0787983f..a95de6a4a134 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1535,9 +1535,9 @@ composite_resume(struct usb_gadget *gadget)
 
 static struct usb_gadget_driver composite_driver = {
 #ifdef CONFIG_USB_GADGET_SUPERSPEED
-	.speed		= USB_SPEED_SUPER,
+	.max_speed	= USB_SPEED_SUPER,
 #else
-	.speed		= USB_SPEED_HIGH,
+	.max_speed	= USB_SPEED_HIGH,
 #endif
 
 	.unbind		= composite_unbind,
@@ -1584,8 +1584,8 @@ int usb_composite_probe(struct usb_composite_driver *driver,
 		driver->iProduct = driver->name;
 	composite_driver.function =  (char *) driver->name;
 	composite_driver.driver.name = driver->name;
-	composite_driver.speed = min((u8)composite_driver.speed,
-				     (u8)driver->max_speed);
+	composite_driver.max_speed =
+		min_t(u8, composite_driver.max_speed, driver->max_speed);
 	composite = driver;
 	composite_gadget_bind = bind;
 
diff --git a/drivers/usb/gadget/dbgp.c b/drivers/usb/gadget/dbgp.c
index 6256420089f3..19d7bb0df75a 100644
--- a/drivers/usb/gadget/dbgp.c
+++ b/drivers/usb/gadget/dbgp.c
@@ -404,7 +404,7 @@ fail:
 
 static struct usb_gadget_driver dbgp_driver = {
 	.function = "dbgp",
-	.speed = USB_SPEED_HIGH,
+	.max_speed = USB_SPEED_HIGH,
 	.unbind = dbgp_unbind,
 	.setup = dbgp_setup,
 	.disconnect = dbgp_disconnect,
diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
index cf235d84d8b7..db815c2da7ed 100644
--- a/drivers/usb/gadget/dummy_hcd.c
+++ b/drivers/usb/gadget/dummy_hcd.c
@@ -823,19 +823,18 @@ static int dummy_pullup (struct usb_gadget *_gadget, int value)
 
 	if (value && dum->driver) {
 		if (mod_data.is_super_speed)
-			dum->gadget.speed = dum->driver->speed;
+			dum->gadget.speed = dum->driver->max_speed;
 		else if (mod_data.is_high_speed)
 			dum->gadget.speed = min_t(u8, USB_SPEED_HIGH,
-					dum->driver->speed);
+					dum->driver->max_speed);
 		else
 			dum->gadget.speed = USB_SPEED_FULL;
 		dummy_udc_udpate_ep0(dum);
 
-		if (dum->gadget.speed < dum->driver->speed)
+		if (dum->gadget.speed < dum->driver->max_speed)
 			dev_dbg(udc_dev(dum), "This device can perform faster"
-					" if you connect it to a %s port...\n",
-					(dum->driver->speed == USB_SPEED_SUPER ?
-					 "SuperSpeed" : "HighSpeed"));
+				" if you connect it to a %s port...\n",
+				usb_speed_string(dum->driver->max_speed));
 	}
 	dum_hcd = gadget_to_dummy_hcd(_gadget);
 
@@ -898,7 +897,7 @@ static int dummy_udc_start(struct usb_gadget *g,
 	struct dummy_hcd	*dum_hcd = gadget_to_dummy_hcd(g);
 	struct dummy		*dum = dum_hcd->dum;
 
-	if (driver->speed == USB_SPEED_UNKNOWN)
+	if (driver->max_speed == USB_SPEED_UNKNOWN)
 		return -EINVAL;
 
 	/*
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index e8ff2f1c06cc..e0f30fc70e45 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -3604,7 +3604,7 @@ static void fsg_resume(struct usb_gadget *gadget)
 /*-------------------------------------------------------------------------*/
 
 static struct usb_gadget_driver		fsg_driver = {
-	.speed		= USB_SPEED_SUPER,
+	.max_speed	= USB_SPEED_SUPER,
 	.function	= (char *) fsg_string_product,
 	.unbind		= fsg_unbind,
 	.disconnect	= fsg_disconnect,
diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c
index e00cf92409ce..b7a1efef938d 100644
--- a/drivers/usb/gadget/fsl_qe_udc.c
+++ b/drivers/usb/gadget/fsl_qe_udc.c
@@ -2336,7 +2336,7 @@ static int fsl_qe_start(struct usb_gadget_driver *driver,
 	if (!udc_controller)
 		return -ENODEV;
 
-	if (!driver || driver->speed < USB_SPEED_FULL
+	if (!driver || driver->max_speed < USB_SPEED_FULL
 			|| !bind || !driver->disconnect || !driver->setup)
 		return -EINVAL;
 
@@ -2350,7 +2350,7 @@ static int fsl_qe_start(struct usb_gadget_driver *driver,
 	/* hook up the driver */
 	udc_controller->driver = driver;
 	udc_controller->gadget.dev.driver = &driver->driver;
-	udc_controller->gadget.speed = (enum usb_device_speed)(driver->speed);
+	udc_controller->gadget.speed = driver->max_speed;
 	spin_unlock_irqrestore(&udc_controller->lock, flags);
 
 	retval = bind(&udc_controller->gadget);
diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c
index 6de8ec753818..d7ea6c076ce9 100644
--- a/drivers/usb/gadget/fsl_udc_core.c
+++ b/drivers/usb/gadget/fsl_udc_core.c
@@ -1934,7 +1934,7 @@ static int fsl_start(struct usb_gadget_driver *driver,
 	if (!udc_controller)
 		return -ENODEV;
 
-	if (!driver || driver->speed < USB_SPEED_FULL
+	if (!driver || driver->max_speed < USB_SPEED_FULL
 			|| !bind || !driver->disconnect || !driver->setup)
 		return -EINVAL;
 
diff --git a/drivers/usb/gadget/fusb300_udc.c b/drivers/usb/gadget/fusb300_udc.c
index 6e32a60ab3b7..5831cb4a0b35 100644
--- a/drivers/usb/gadget/fusb300_udc.c
+++ b/drivers/usb/gadget/fusb300_udc.c
@@ -1317,7 +1317,7 @@ static int fusb300_udc_start(struct usb_gadget_driver *driver,
 	int retval;
 
 	if (!driver
-			|| driver->speed < USB_SPEED_FULL
+			|| driver->max_speed < USB_SPEED_FULL
 			|| !bind
 			|| !driver->setup)
 		return -EINVAL;
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index ab9c924eee76..5af70fcce139 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1357,7 +1357,7 @@ static int goku_start(struct usb_gadget_driver *driver,
 	int			retval;
 
 	if (!driver
-			|| driver->speed < USB_SPEED_FULL
+			|| driver->max_speed < USB_SPEED_FULL
 			|| !bind
 			|| !driver->disconnect
 			|| !driver->setup)
diff --git a/drivers/usb/gadget/imx_udc.c b/drivers/usb/gadget/imx_udc.c
index 2d978c0e7ced..8d1c75abd73d 100644
--- a/drivers/usb/gadget/imx_udc.c
+++ b/drivers/usb/gadget/imx_udc.c
@@ -1336,7 +1336,7 @@ static int imx_udc_start(struct usb_gadget_driver *driver,
 	int retval;
 
 	if (!driver
-		|| driver->speed < USB_SPEED_FULL
+		|| driver->max_speed < USB_SPEED_FULL
 		|| !bind
 		|| !driver->disconnect
 		|| !driver->setup)
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 6ccae2707e59..93612519b53a 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -1766,9 +1766,9 @@ gadgetfs_suspend (struct usb_gadget *gadget)
 
 static struct usb_gadget_driver gadgetfs_driver = {
 #ifdef	CONFIG_USB_GADGET_DUALSPEED
-	.speed		= USB_SPEED_HIGH,
+	.max_speed	= USB_SPEED_HIGH,
 #else
-	.speed		= USB_SPEED_FULL,
+	.max_speed	= USB_SPEED_FULL,
 #endif
 	.function	= (char *) driver_desc,
 	.unbind		= gadgetfs_unbind,
@@ -1792,7 +1792,7 @@ static int gadgetfs_probe (struct usb_gadget *gadget)
 }
 
 static struct usb_gadget_driver probe_driver = {
-	.speed		= USB_SPEED_HIGH,
+	.max_speed	= USB_SPEED_HIGH,
 	.unbind		= gadgetfs_nop,
 	.setup		= (void *)gadgetfs_nop,
 	.disconnect	= gadgetfs_nop,
diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c
index a7692c208ea7..3608b3bd5732 100644
--- a/drivers/usb/gadget/m66592-udc.c
+++ b/drivers/usb/gadget/m66592-udc.c
@@ -1472,7 +1472,7 @@ static int m66592_start(struct usb_gadget_driver *driver,
 	int retval;
 
 	if (!driver
-			|| driver->speed < USB_SPEED_HIGH
+			|| driver->max_speed < USB_SPEED_HIGH
 			|| !bind
 			|| !driver->setup)
 		return -EINVAL;
diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c
index d5050f4e8449..4c81d540bc26 100644
--- a/drivers/usb/gadget/net2272.c
+++ b/drivers/usb/gadget/net2272.c
@@ -1459,7 +1459,7 @@ static int net2272_start(struct usb_gadget *_gadget,
 	unsigned i;
 
 	if (!driver || !driver->unbind || !driver->setup ||
-	    driver->speed != USB_SPEED_HIGH)
+	    driver->max_speed != USB_SPEED_HIGH)
 		return -EINVAL;
 
 	dev = container_of(_gadget, struct net2272, gadget);
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index 9ee36fd5adae..cf1f36454d08 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1881,7 +1881,7 @@ static int net2280_start(struct usb_gadget *_gadget,
 	 * (dev->usb->xcvrdiag & FORCE_FULL_SPEED_MODE)
 	 * "must not be used in normal operation"
 	 */
-	if (!driver || driver->speed < USB_SPEED_HIGH
+	if (!driver || driver->max_speed < USB_SPEED_HIGH
 			|| !driver->setup)
 		return -EINVAL;
 
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index ed01a0f5f119..7db5bbe6251b 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -2110,7 +2110,7 @@ static int omap_udc_start(struct usb_gadget_driver *driver,
 		return -ENODEV;
 	if (!driver
 			// FIXME if otg, check:  driver->is_otg
-			|| driver->speed < USB_SPEED_FULL
+			|| driver->max_speed < USB_SPEED_FULL
 			|| !bind || !driver->setup)
 		return -EINVAL;
 
diff --git a/drivers/usb/gadget/pch_udc.c b/drivers/usb/gadget/pch_udc.c
index 7f97b4adba3e..dd2313cce1d3 100644
--- a/drivers/usb/gadget/pch_udc.c
+++ b/drivers/usb/gadget/pch_udc.c
@@ -2693,7 +2693,7 @@ static int pch_udc_start(struct usb_gadget_driver *driver,
 	struct pch_udc_dev	*dev = pch_udc;
 	int			retval;
 
-	if (!driver || (driver->speed == USB_SPEED_UNKNOWN) || !bind ||
+	if (!driver || (driver->max_speed == USB_SPEED_UNKNOWN) || !bind ||
 	    !driver->setup || !driver->unbind || !driver->disconnect) {
 		dev_err(&dev->pdev->dev,
 			"%s: invalid driver parameter\n", __func__);
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index b74f49ac95e5..d83134b0f78a 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -1535,7 +1535,7 @@ fail:
 /*-------------------------------------------------------------------------*/
 
 static struct usb_gadget_driver printer_driver = {
-	.speed		= DEVSPEED,
+	.max_speed	= DEVSPEED,
 
 	.function	= (char *) driver_desc,
 	.unbind		= printer_unbind,
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index c090a7e3ecf8..dd470635f4f7 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -1264,7 +1264,7 @@ static int pxa25x_start(struct usb_gadget_driver *driver,
 	int			retval;
 
 	if (!driver
-			|| driver->speed < USB_SPEED_FULL
+			|| driver->max_speed < USB_SPEED_FULL
 			|| !bind
 			|| !driver->disconnect
 			|| !driver->setup)
diff --git a/drivers/usb/gadget/pxa27x_udc.c b/drivers/usb/gadget/pxa27x_udc.c
index 18b6b091f2a6..f4c44eb806c3 100644
--- a/drivers/usb/gadget/pxa27x_udc.c
+++ b/drivers/usb/gadget/pxa27x_udc.c
@@ -1807,7 +1807,7 @@ static int pxa27x_udc_start(struct usb_gadget_driver *driver,
 	struct pxa_udc *udc = the_controller;
 	int retval;
 
-	if (!driver || driver->speed < USB_SPEED_FULL || !bind
+	if (!driver || driver->max_speed < USB_SPEED_FULL || !bind
 			|| !driver->disconnect || !driver->setup)
 		return -EINVAL;
 	if (!udc)
diff --git a/drivers/usb/gadget/r8a66597-udc.c b/drivers/usb/gadget/r8a66597-udc.c
index 3666d7c54e24..f5b8d215e1d5 100644
--- a/drivers/usb/gadget/r8a66597-udc.c
+++ b/drivers/usb/gadget/r8a66597-udc.c
@@ -1746,7 +1746,7 @@ static int r8a66597_start(struct usb_gadget *gadget,
 	struct r8a66597 *r8a66597 = gadget_to_r8a66597(gadget);
 
 	if (!driver
-			|| driver->speed < USB_SPEED_HIGH
+			|| driver->max_speed < USB_SPEED_HIGH
 			|| !driver->setup)
 		return -EINVAL;
 	if (!r8a66597)
diff --git a/drivers/usb/gadget/s3c-hsotg.c b/drivers/usb/gadget/s3c-hsotg.c
index 6bc7ad87c306..d098c36cb587 100644
--- a/drivers/usb/gadget/s3c-hsotg.c
+++ b/drivers/usb/gadget/s3c-hsotg.c
@@ -2586,7 +2586,7 @@ static int s3c_hsotg_start(struct usb_gadget_driver *driver,
 		return -EINVAL;
 	}
 
-	if (driver->speed < USB_SPEED_FULL)
+	if (driver->max_speed < USB_SPEED_FULL)
 		dev_err(hsotg->dev, "%s: bad speed\n", __func__);
 
 	if (!bind || !driver->setup) {
diff --git a/drivers/usb/gadget/s3c-hsudc.c b/drivers/usb/gadget/s3c-hsudc.c
index 09ea96558b91..f398b8590f9c 100644
--- a/drivers/usb/gadget/s3c-hsudc.c
+++ b/drivers/usb/gadget/s3c-hsudc.c
@@ -1142,7 +1142,7 @@ static int s3c_hsudc_start(struct usb_gadget_driver *driver,
 	int ret;
 
 	if (!driver
-		|| driver->speed < USB_SPEED_FULL
+		|| driver->max_speed < USB_SPEED_FULL
 		|| !bind
 		|| !driver->unbind || !driver->disconnect || !driver->setup)
 		return -EINVAL;
diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c
index b8643771fa80..4d860e9460a4 100644
--- a/drivers/usb/gadget/s3c2410_udc.c
+++ b/drivers/usb/gadget/s3c2410_udc.c
@@ -1683,9 +1683,9 @@ static int s3c2410_udc_start(struct usb_gadget_driver *driver,
 	if (udc->driver)
 		return -EBUSY;
 
-	if (!bind || !driver->setup || driver->speed < USB_SPEED_FULL) {
+	if (!bind || !driver->setup || driver->max_speed < USB_SPEED_FULL) {
 		printk(KERN_ERR "Invalid driver: bind %p setup %p speed %d\n",
-			bind, driver->setup, driver->speed);
+			bind, driver->setup, driver->max_speed);
 		return -EINVAL;
 	}
 #if defined(MODULE)
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 47a3d1e5b28b..3148461287b1 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1903,7 +1903,7 @@ static int musb_gadget_start(struct usb_gadget *g,
 	unsigned long		flags;
 	int			retval = -EINVAL;
 
-	if (driver->speed < USB_SPEED_HIGH)
+	if (driver->max_speed < USB_SPEED_HIGH)
 		goto err0;
 
 	pm_runtime_get_sync(musb->controller);
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 43c67e5cde26..c307c8f5bd3a 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -751,7 +751,7 @@ static int usbhsg_gadget_start(struct usb_gadget *gadget,
 
 	if (!driver		||
 	    !driver->setup	||
-	    driver->speed < USB_SPEED_FULL)
+	    driver->max_speed < USB_SPEED_FULL)
 		return -EINVAL;
 
 	/* first hook up the driver ... */
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 98dc306898b5..317d8925387c 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -760,7 +760,7 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget)
 /**
  * struct usb_gadget_driver - driver for usb 'slave' devices
  * @function: String describing the gadget's function
- * @speed: Highest speed the driver handles.
+ * @max_speed: Highest speed the driver handles.
  * @setup: Invoked for ep0 control requests that aren't handled by
  *	the hardware level driver. Most calls must be handled by
  *	the gadget driver, including descriptor and configuration
@@ -824,7 +824,7 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget)
  */
 struct usb_gadget_driver {
 	char			*function;
-	enum usb_device_speed	speed;
+	enum usb_device_speed	max_speed;
 	void			(*unbind)(struct usb_gadget *);
 	int			(*setup)(struct usb_gadget *,
 					const struct usb_ctrlrequest *);
-- 
cgit v1.2.3


From 267aed9dfeb2225960043f89db1f58d8ab522797 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 12 Dec 2011 13:54:36 +0000
Subject: UAPI: elf_read_implies_exec() is a kernel-only feature - so hide from
 userspace

elf_read_implies_exec() is a kernel-only feature as the second parameter is a
constant that isn't exported to userspace.  Not only that, but the
arch-specific overrides are not exported either.

So hide the macro from userspace.

Similarly, struct file should not be predeclared in userspace.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/elf.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/elf.h b/include/linux/elf.h
index 31f0508d7da7..999b4f52e8e5 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -7,15 +7,6 @@
 #include <asm/elf.h>
 #endif
 
-struct file;
-
-#ifndef elf_read_implies_exec
-  /* Executables for which elf_read_implies_exec() returns TRUE will
-     have the READ_IMPLIES_EXEC personality flag set automatically.
-     Override in asm/elf.h as needed.  */
-# define elf_read_implies_exec(ex, have_pt_gnu_stack)	0
-#endif
-
 /* 32-bit ELF base types. */
 typedef __u32	Elf32_Addr;
 typedef __u16	Elf32_Half;
@@ -414,6 +405,13 @@ typedef struct elf64_note {
 } Elf64_Nhdr;
 
 #ifdef __KERNEL__
+#ifndef elf_read_implies_exec
+  /* Executables for which elf_read_implies_exec() returns TRUE will
+     have the READ_IMPLIES_EXEC personality flag set automatically.
+     Override in asm/elf.h as needed.  */
+# define elf_read_implies_exec(ex, have_pt_gnu_stack)	0
+#endif
+
 #if ELF_CLASS == ELFCLASS32
 
 extern Elf32_Dyn _DYNAMIC [];
@@ -437,6 +435,8 @@ extern Elf64_Dyn _DYNAMIC [];
 #endif
 
 /* Optional callbacks to write extra ELF notes. */
+struct file;
+
 #ifndef ARCH_HAVE_EXTRA_ELF_NOTES
 static inline int elf_coredump_extra_notes_size(void) { return 0; }
 static inline int elf_coredump_extra_notes_write(struct file *file,
-- 
cgit v1.2.3


From 6a113ddc03bcc32d3d440dce42b445868d5be093 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 1 Dec 2011 12:04:58 +0100
Subject: iommu/amd: Add device errata handling

Add infrastructure for errata-handling and handle two known
erratas in the IOMMUv2 code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu.c       | 57 ++++++++++++++++++++++++++++++++++++++---
 drivers/iommu/amd_iommu_types.h |  1 +
 include/linux/amd-iommu.h       | 18 +++++++++++++
 3 files changed, 73 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 71773d0fb769..e453bbd09445 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -172,6 +172,15 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev)
 	return true;
 }
 
+static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
+{
+	struct iommu_dev_data *dev_data;
+
+	dev_data = get_dev_data(&pdev->dev);
+
+	return dev_data->errata & (1 << erratum) ? true : false;
+}
+
 /*
  * In this function the list of preallocated protection domains is traversed to
  * find the domain for a specific device
@@ -1934,9 +1943,33 @@ static void pdev_iommuv2_disable(struct pci_dev *pdev)
 	pci_disable_pasid(pdev);
 }
 
+/* FIXME: Change generic reset-function to do the same */
+static int pri_reset_while_enabled(struct pci_dev *pdev)
+{
+	u16 control;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	if (!pos)
+		return -EINVAL;
+
+	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
+	control |= PCI_PRI_RESET;
+	pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+
+	return 0;
+}
+
 static int pdev_iommuv2_enable(struct pci_dev *pdev)
 {
-	int ret;
+	bool reset_enable;
+	int reqs, ret;
+
+	/* FIXME: Hardcode number of outstanding requests for now */
+	reqs = 32;
+	if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
+		reqs = 1;
+	reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
 
 	/* Only allow access to user-accessible pages */
 	ret = pci_enable_pasid(pdev, 0);
@@ -1948,11 +1981,17 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
 	if (ret)
 		goto out_err;
 
-	/* FIXME: Hardcode number of outstanding requests for now */
-	ret = pci_enable_pri(pdev, 32);
+	/* Enable PRI */
+	ret = pci_enable_pri(pdev, reqs);
 	if (ret)
 		goto out_err;
 
+	if (reset_enable) {
+		ret = pri_reset_while_enabled(pdev);
+		if (ret)
+			goto out_err;
+	}
+
 	ret = pci_enable_ats(pdev, PAGE_SHIFT);
 	if (ret)
 		goto out_err;
@@ -3481,3 +3520,15 @@ struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
 	return domain->iommu_domain;
 }
 EXPORT_SYMBOL(amd_iommu_get_v2_domain);
+
+void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
+{
+	struct iommu_dev_data *dev_data;
+
+	if (!amd_iommu_v2_supported())
+		return;
+
+	dev_data = get_dev_data(&pdev->dev);
+	dev_data->errata |= (1 << erratum);
+}
+EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index c39988fbcbbb..6ad8b10b3130 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -404,6 +404,7 @@ struct iommu_dev_data {
 	} ats;				  /* ATS state */
 	bool pri_tlp;			  /* PASID TLB required for
 					     PPR completions */
+	u32 errata;			  /* Bitmap for errata to apply */
 };
 
 /*
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index a6863a2dec1f..4152c3073db4 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -26,6 +26,24 @@
 
 extern int amd_iommu_detect(void);
 
+
+/**
+ * amd_iommu_enable_device_erratum() - Enable erratum workaround for device
+ *				       in the IOMMUv2 driver
+ * @pdev: The PCI device the workaround is necessary for
+ * @erratum: The erratum workaround to enable
+ *
+ * Possible values for the erratum number are for now:
+ * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI
+ *					is enabled
+ * - AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE - Limit number of outstanding PRI
+ *					 requests to one
+ */
+#define AMD_PRI_DEV_ERRATUM_ENABLE_RESET		0
+#define AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE		1
+
+extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From ed96f228ba9725edf69385bffdc19ee5bb0ec641 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 23 Nov 2011 17:30:39 +0100
Subject: iommu/amd: Implement device aquisition code for IOMMUv2

This patch adds the amd_iommu_init_device() and
amd_iommu_free_device() functions which make a device and
the IOMMU ready for IOMMUv2 usage.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu_v2.c | 210 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/amd-iommu.h    |  23 ++++-
 2 files changed, 232 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index a19e07d11d12..bfceed25c186 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -16,20 +16,230 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/amd-iommu.h>
+#include <linux/mm_types.h>
 #include <linux/module.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+
+#include "amd_iommu_proto.h"
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
 
+#define MAX_DEVICES		0x10000
+#define PRI_QUEUE_SIZE		512
+
+struct pri_queue {
+	atomic_t inflight;
+	bool finish;
+};
+
+struct pasid_state {
+	struct list_head list;			/* For global state-list */
+	atomic_t count;				/* Reference count */
+	struct task_struct *task;		/* Task bound to this PASID */
+	struct mm_struct *mm;			/* mm_struct for the faults */
+	struct pri_queue pri[PRI_QUEUE_SIZE];	/* PRI tag states */
+	struct device_state *device_state;	/* Link to our device_state */
+	int pasid;				/* PASID index */
+};
+
+struct device_state {
+	atomic_t count;
+	struct pci_dev *pdev;
+	struct pasid_state **states;
+	struct iommu_domain *domain;
+	int pasid_levels;
+	int max_pasids;
+	spinlock_t lock;
+};
+
+struct device_state **state_table;
+static spinlock_t state_lock;
+
+/* List and lock for all pasid_states */
+static LIST_HEAD(pasid_state_list);
+
+static u16 device_id(struct pci_dev *pdev)
+{
+	u16 devid;
+
+	devid = pdev->bus->number;
+	devid = (devid << 8) | pdev->devfn;
+
+	return devid;
+}
+
+static struct device_state *get_device_state(u16 devid)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+
+	spin_lock_irqsave(&state_lock, flags);
+	dev_state = state_table[devid];
+	if (dev_state != NULL)
+		atomic_inc(&dev_state->count);
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return dev_state;
+}
+
+static void free_device_state(struct device_state *dev_state)
+{
+	iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
+	iommu_domain_free(dev_state->domain);
+	kfree(dev_state);
+}
+
+static void put_device_state(struct device_state *dev_state)
+{
+	if (atomic_dec_and_test(&dev_state->count))
+		free_device_state(dev_state);
+}
+
+int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	int ret, tmp;
+	u16 devid;
+
+	might_sleep();
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	if (pasids <= 0 || pasids > (PASID_MASK + 1))
+		return -EINVAL;
+
+	devid = device_id(pdev);
+
+	dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
+	if (dev_state == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&dev_state->lock);
+	dev_state->pdev = pdev;
+
+	tmp = pasids;
+	for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
+		dev_state->pasid_levels += 1;
+
+	atomic_set(&dev_state->count, 1);
+	dev_state->max_pasids = pasids;
+
+	ret = -ENOMEM;
+	dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
+	if (dev_state->states == NULL)
+		goto out_free_dev_state;
+
+	dev_state->domain = iommu_domain_alloc(&pci_bus_type);
+	if (dev_state->domain == NULL)
+		goto out_free_states;
+
+	amd_iommu_domain_direct_map(dev_state->domain);
+
+	ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
+	if (ret)
+		goto out_free_domain;
+
+	ret = iommu_attach_device(dev_state->domain, &pdev->dev);
+	if (ret != 0)
+		goto out_free_domain;
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	if (state_table[devid] != NULL) {
+		spin_unlock_irqrestore(&state_lock, flags);
+		ret = -EBUSY;
+		goto out_free_domain;
+	}
+
+	state_table[devid] = dev_state;
+
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return 0;
+
+out_free_domain:
+	iommu_domain_free(dev_state->domain);
+
+out_free_states:
+	free_page((unsigned long)dev_state->states);
+
+out_free_dev_state:
+	kfree(dev_state);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_init_device);
+
+void amd_iommu_free_device(struct pci_dev *pdev)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	u16 devid;
+
+	if (!amd_iommu_v2_supported())
+		return;
+
+	devid = device_id(pdev);
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	dev_state = state_table[devid];
+	if (dev_state == NULL) {
+		spin_unlock_irqrestore(&state_lock, flags);
+		return;
+	}
+
+	state_table[devid] = NULL;
+
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	put_device_state(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_free_device);
+
 static int __init amd_iommu_v2_init(void)
 {
+	size_t state_table_size;
+
 	pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
 
+	spin_lock_init(&state_lock);
+
+	state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+	state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					       get_order(state_table_size));
+	if (state_table == NULL)
+		return -ENOMEM;
+
 	return 0;
 }
 
 static void __exit amd_iommu_v2_exit(void)
 {
+	struct device_state *dev_state;
+	size_t state_table_size;
+	int i;
+
+	for (i = 0; i < MAX_DEVICES; ++i) {
+		dev_state = get_device_state(i);
+
+		if (dev_state == NULL)
+			continue;
+
+		WARN_ON_ONCE(1);
+
+		amd_iommu_free_device(dev_state->pdev);
+		put_device_state(dev_state);
+	}
+
+	state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+	free_pages((unsigned long)state_table, get_order(state_table_size));
 }
 
 module_init(amd_iommu_v2_init);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 4152c3073db4..e8c7a2ec86b3 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -20,10 +20,12 @@
 #ifndef _ASM_X86_AMD_IOMMU_H
 #define _ASM_X86_AMD_IOMMU_H
 
-#include <linux/irqreturn.h>
+#include <linux/types.h>
 
 #ifdef CONFIG_AMD_IOMMU
 
+struct pci_dev;
+
 extern int amd_iommu_detect(void);
 
 
@@ -33,6 +35,7 @@ extern int amd_iommu_detect(void);
  * @pdev: The PCI device the workaround is necessary for
  * @erratum: The erratum workaround to enable
  *
+ * The function needs to be called before amd_iommu_init_device().
  * Possible values for the erratum number are for now:
  * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI
  *					is enabled
@@ -44,6 +47,24 @@ extern int amd_iommu_detect(void);
 
 extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum);
 
+/**
+ * amd_iommu_init_device() - Init device for use with IOMMUv2 driver
+ * @pdev: The PCI device to initialize
+ * @pasids: Number of PASIDs to support for this device
+ *
+ * This function does all setup for the device pdev so that it can be
+ * used with IOMMUv2.
+ * Returns 0 on success or negative value on error.
+ */
+extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
+
+/**
+ * amd_iommu_free_device() - Free all IOMMUv2 related device resources
+ *			     and disable IOMMUv2 usage for this device
+ * @pdev: The PCI device to disable IOMMUv2 usage for'
+ */
+extern void amd_iommu_free_device(struct pci_dev *pdev);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From 2d5503b624736abfe0e0bad281f9b8d8a705b930 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 24 Nov 2011 10:41:57 +0100
Subject: iommu/amd: Add routines to bind/unbind a pasid

This patch adds routines to bind a specific process
address-space to a given PASID.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu_v2.c | 306 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/amd-iommu.h    |  26 ++++
 2 files changed, 332 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index bfceed25c186..b5ee09ece651 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -19,6 +19,7 @@
 #include <linux/amd-iommu.h>
 #include <linux/mm_types.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/iommu.h>
 #include <linux/pci.h>
 #include <linux/gfp.h>
@@ -61,6 +62,10 @@ static spinlock_t state_lock;
 
 /* List and lock for all pasid_states */
 static LIST_HEAD(pasid_state_list);
+static DEFINE_SPINLOCK(ps_lock);
+
+static void free_pasid_states(struct device_state *dev_state);
+static void unbind_pasid(struct device_state *dev_state, int pasid);
 
 static u16 device_id(struct pci_dev *pdev)
 {
@@ -88,8 +93,16 @@ static struct device_state *get_device_state(u16 devid)
 
 static void free_device_state(struct device_state *dev_state)
 {
+	/*
+	 * First detach device from domain - No more PRI requests will arrive
+	 * from that device after it is unbound from the IOMMUv2 domain.
+	 */
 	iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
+
+	/* Everything is down now, free the IOMMUv2 domain */
 	iommu_domain_free(dev_state->domain);
+
+	/* Finally get rid of the device-state */
 	kfree(dev_state);
 }
 
@@ -99,6 +112,296 @@ static void put_device_state(struct device_state *dev_state)
 		free_device_state(dev_state);
 }
 
+static void link_pasid_state(struct pasid_state *pasid_state)
+{
+	spin_lock(&ps_lock);
+	list_add_tail(&pasid_state->list, &pasid_state_list);
+	spin_unlock(&ps_lock);
+}
+
+static void __unlink_pasid_state(struct pasid_state *pasid_state)
+{
+	list_del(&pasid_state->list);
+}
+
+static void unlink_pasid_state(struct pasid_state *pasid_state)
+{
+	spin_lock(&ps_lock);
+	__unlink_pasid_state(pasid_state);
+	spin_unlock(&ps_lock);
+}
+
+/* Must be called under dev_state->lock */
+static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
+						  int pasid, bool alloc)
+{
+	struct pasid_state **root, **ptr;
+	int level, index;
+
+	level = dev_state->pasid_levels;
+	root  = dev_state->states;
+
+	while (true) {
+
+		index = (pasid >> (9 * level)) & 0x1ff;
+		ptr   = &root[index];
+
+		if (level == 0)
+			break;
+
+		if (*ptr == NULL) {
+			if (!alloc)
+				return NULL;
+
+			*ptr = (void *)get_zeroed_page(GFP_ATOMIC);
+			if (*ptr == NULL)
+				return NULL;
+		}
+
+		root   = (struct pasid_state **)*ptr;
+		level -= 1;
+	}
+
+	return ptr;
+}
+
+static int set_pasid_state(struct device_state *dev_state,
+			   struct pasid_state *pasid_state,
+			   int pasid)
+{
+	struct pasid_state **ptr;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dev_state->lock, flags);
+	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+	ret = -ENOMEM;
+	if (ptr == NULL)
+		goto out_unlock;
+
+	ret = -ENOMEM;
+	if (*ptr != NULL)
+		goto out_unlock;
+
+	*ptr = pasid_state;
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock_irqrestore(&dev_state->lock, flags);
+
+	return ret;
+}
+
+static void clear_pasid_state(struct device_state *dev_state, int pasid)
+{
+	struct pasid_state **ptr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_state->lock, flags);
+	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+	if (ptr == NULL)
+		goto out_unlock;
+
+	*ptr = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&dev_state->lock, flags);
+}
+
+static struct pasid_state *get_pasid_state(struct device_state *dev_state,
+					   int pasid)
+{
+	struct pasid_state **ptr, *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_state->lock, flags);
+	ptr = __get_pasid_state_ptr(dev_state, pasid, false);
+
+	if (ptr == NULL)
+		goto out_unlock;
+
+	ret = *ptr;
+	if (ret)
+		atomic_inc(&ret->count);
+
+out_unlock:
+	spin_unlock_irqrestore(&dev_state->lock, flags);
+
+	return ret;
+}
+
+static void free_pasid_state(struct pasid_state *pasid_state)
+{
+	kfree(pasid_state);
+}
+
+static void put_pasid_state(struct pasid_state *pasid_state)
+{
+	if (atomic_dec_and_test(&pasid_state->count)) {
+		put_device_state(pasid_state->device_state);
+		mmput(pasid_state->mm);
+		free_pasid_state(pasid_state);
+	}
+}
+
+static void unbind_pasid(struct device_state *dev_state, int pasid)
+{
+	struct pasid_state *pasid_state;
+
+	pasid_state = get_pasid_state(dev_state, pasid);
+	if (pasid_state == NULL)
+		return;
+
+	unlink_pasid_state(pasid_state);
+
+	amd_iommu_domain_clear_gcr3(dev_state->domain, pasid);
+	clear_pasid_state(dev_state, pasid);
+
+	put_pasid_state(pasid_state); /* Reference taken in this function */
+	put_pasid_state(pasid_state); /* Reference taken in bind() function */
+}
+
+static void free_pasid_states_level1(struct pasid_state **tbl)
+{
+	int i;
+
+	for (i = 0; i < 512; ++i) {
+		if (tbl[i] == NULL)
+			continue;
+
+		free_page((unsigned long)tbl[i]);
+	}
+}
+
+static void free_pasid_states_level2(struct pasid_state **tbl)
+{
+	struct pasid_state **ptr;
+	int i;
+
+	for (i = 0; i < 512; ++i) {
+		if (tbl[i] == NULL)
+			continue;
+
+		ptr = (struct pasid_state **)tbl[i];
+		free_pasid_states_level1(ptr);
+	}
+}
+
+static void free_pasid_states(struct device_state *dev_state)
+{
+	struct pasid_state *pasid_state;
+	int i;
+
+	for (i = 0; i < dev_state->max_pasids; ++i) {
+		pasid_state = get_pasid_state(dev_state, i);
+		if (pasid_state == NULL)
+			continue;
+
+		unbind_pasid(dev_state, i);
+		put_pasid_state(pasid_state);
+	}
+
+	if (dev_state->pasid_levels == 2)
+		free_pasid_states_level2(dev_state->states);
+	else if (dev_state->pasid_levels == 1)
+		free_pasid_states_level1(dev_state->states);
+	else if (dev_state->pasid_levels != 0)
+		BUG();
+
+	free_page((unsigned long)dev_state->states);
+}
+
+int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+			 struct task_struct *task)
+{
+	struct pasid_state *pasid_state;
+	struct device_state *dev_state;
+	u16 devid;
+	int ret;
+
+	might_sleep();
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	devid     = device_id(pdev);
+	dev_state = get_device_state(devid);
+
+	if (dev_state == NULL)
+		return -EINVAL;
+
+	ret = -EINVAL;
+	if (pasid < 0 || pasid >= dev_state->max_pasids)
+		goto out;
+
+	ret = -ENOMEM;
+	pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
+	if (pasid_state == NULL)
+		goto out;
+
+	atomic_set(&pasid_state->count, 1);
+	pasid_state->task         = task;
+	pasid_state->mm           = get_task_mm(task);
+	pasid_state->device_state = dev_state;
+	pasid_state->pasid        = pasid;
+
+	if (pasid_state->mm == NULL)
+		goto out_free;
+
+	ret = set_pasid_state(dev_state, pasid_state, pasid);
+	if (ret)
+		goto out_free;
+
+	ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
+					__pa(pasid_state->mm->pgd));
+	if (ret)
+		goto out_clear_state;
+
+	link_pasid_state(pasid_state);
+
+	return 0;
+
+out_clear_state:
+	clear_pasid_state(dev_state, pasid);
+
+out_free:
+	put_pasid_state(pasid_state);
+
+out:
+	put_device_state(dev_state);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_bind_pasid);
+
+void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
+{
+	struct device_state *dev_state;
+	u16 devid;
+
+	might_sleep();
+
+	if (!amd_iommu_v2_supported())
+		return;
+
+	devid = device_id(pdev);
+	dev_state = get_device_state(devid);
+	if (dev_state == NULL)
+		return;
+
+	if (pasid < 0 || pasid >= dev_state->max_pasids)
+		goto out;
+
+	unbind_pasid(dev_state, pasid);
+
+out:
+	put_device_state(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_unbind_pasid);
+
 int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
 {
 	struct device_state *dev_state;
@@ -199,6 +502,9 @@ void amd_iommu_free_device(struct pci_dev *pdev)
 
 	spin_unlock_irqrestore(&state_lock, flags);
 
+	/* Get rid of any remaining pasid states */
+	free_pasid_states(dev_state);
+
 	put_device_state(dev_state);
 }
 EXPORT_SYMBOL(amd_iommu_free_device);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index e8c7a2ec86b3..23e21e15dfab 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -24,9 +24,13 @@
 
 #ifdef CONFIG_AMD_IOMMU
 
+struct task_struct;
 struct pci_dev;
 
 extern int amd_iommu_detect(void);
+extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+				struct task_struct *task);
+extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
 
 
 /**
@@ -65,6 +69,28 @@ extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
  */
 extern void amd_iommu_free_device(struct pci_dev *pdev);
 
+/**
+ * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device
+ * @pdev: The PCI device to bind the task to
+ * @pasid: The PASID on the device the task should be bound to
+ * @task: the task to bind
+ *
+ * The function returns 0 on success or a negative value on error.
+ */
+extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+				struct task_struct *task);
+
+/**
+ * amd_iommu_unbind_pasid() - Unbind a PASID from its task on
+ *			      a device
+ * @pdev: The device of the PASID
+ * @pasid: The PASID to unbind
+ *
+ * When this function returns the device is no longer using the PASID
+ * and the PASID is no longer bound to its task.
+ */
+extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From 26c34c25e54b4a352596d88c6e44a239dab8e1c5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 3 Nov 2011 13:20:38 +0000
Subject: mfd: Disable more pulls on WM8994

Disable more pulls by default on WM8994 for a small current saving. Since
some designs do leave SPKMODE floating provide platform data to allow that
to be left enabled.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c        | 11 ++++++++---
 include/linux/mfd/wm8994/pdata.h |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index a6846b04e156..c8956f2cd280 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -373,6 +373,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 	struct wm8994_pdata *pdata = wm8994->dev->platform_data;
 	const char *devname;
 	int ret, i;
+	int pulls = 0;
 
 	dev_set_drvdata(wm8994->dev, wm8994);
 
@@ -515,12 +516,16 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 		}
 
 		wm8994->ldo_ena_always_driven = pdata->ldo_ena_always_driven;
+
+		if (pdata->spkmode_pu)
+			pulls |= WM8994_SPKMODE_PU;
 	}
 
-	/* Disable LDO pulldowns while the device is active */
+	/* Disable unneeded pulls */
 	wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
-			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
-			0);
+			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD |
+			WM8994_SPKMODE_PU | WM8994_CSNADDR_PD,
+			pulls);
 
 	/* In some system designs where the regulators are not in use,
 	 * we can achieve a small reduction in leakage currents by
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index ea32f306dca6..54e2fef587d5 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -174,6 +174,12 @@ struct wm8994_pdata {
 	 * consumption will rise.
 	 */
 	bool ldo_ena_always_driven;
+
+	/*
+	 * SPKMODE must be pulled internally by the device on this
+	 * system.
+	 */
+	bool spkmode_pu;
 };
 
 #endif
-- 
cgit v1.2.3


From 4de45284d3927b5068de6ed972b11627a3428427 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 25 Oct 2011 15:44:12 +0200
Subject: mfd: Define some additional wm8994 registers

Add a bunch of definitions for wm8994 registers that are not currently
used by software.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/registers.h | 96 ++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/wm8994-tables.c     | 12 ++---
 2 files changed, 102 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/registers.h b/include/linux/mfd/wm8994/registers.h
index 83a9caec0e43..8317b19a4972 100644
--- a/include/linux/mfd/wm8994/registers.h
+++ b/include/linux/mfd/wm8994/registers.h
@@ -95,11 +95,15 @@
 #define WM8994_FLL1_CONTROL_3                   0x222
 #define WM8994_FLL1_CONTROL_4                   0x223
 #define WM8994_FLL1_CONTROL_5                   0x224
+#define WM8958_FLL1_EFS_1                       0x226
+#define WM8958_FLL1_EFS_2                       0x227
 #define WM8994_FLL2_CONTROL_1                   0x240
 #define WM8994_FLL2_CONTROL_2                   0x241
 #define WM8994_FLL2_CONTROL_3                   0x242
 #define WM8994_FLL2_CONTROL_4                   0x243
 #define WM8994_FLL2_CONTROL_5                   0x244
+#define WM8958_FLL2_EFS_1                       0x246
+#define WM8958_FLL2_EFS_2                       0x247
 #define WM8994_AIF1_CONTROL_1                   0x300
 #define WM8994_AIF1_CONTROL_2                   0x301
 #define WM8994_AIF1_MASTER_SLAVE                0x302
@@ -116,6 +120,7 @@
 #define WM8994_AIF2DAC_LRCLK                    0x315
 #define WM8994_AIF2DAC_DATA                     0x316
 #define WM8994_AIF2ADC_DATA                     0x317
+#define WM1811_AIF2TX_CONTROL                   0x318
 #define WM8958_AIF3_CONTROL_1                   0x320
 #define WM8958_AIF3_CONTROL_2                   0x321
 #define WM8958_AIF3DAC_DATA                     0x322
@@ -166,6 +171,7 @@
 #define WM8994_AIF1_DAC1_EQ_BAND_5_A            0x491
 #define WM8994_AIF1_DAC1_EQ_BAND_5_B            0x492
 #define WM8994_AIF1_DAC1_EQ_BAND_5_PG           0x493
+#define WM8994_AIF1_DAC1_EQ_BAND_1_C            0x494
 #define WM8994_AIF1_DAC2_EQ_GAINS_1             0x4A0
 #define WM8994_AIF1_DAC2_EQ_GAINS_2             0x4A1
 #define WM8994_AIF1_DAC2_EQ_BAND_1_A            0x4A2
@@ -186,6 +192,7 @@
 #define WM8994_AIF1_DAC2_EQ_BAND_5_A            0x4B1
 #define WM8994_AIF1_DAC2_EQ_BAND_5_B            0x4B2
 #define WM8994_AIF1_DAC2_EQ_BAND_5_PG           0x4B3
+#define WM8994_AIF1_DAC2_EQ_BAND_1_C            0x4B4
 #define WM8994_AIF2_ADC_LEFT_VOLUME             0x500
 #define WM8994_AIF2_ADC_RIGHT_VOLUME            0x501
 #define WM8994_AIF2_DAC_LEFT_VOLUME             0x502
@@ -219,6 +226,7 @@
 #define WM8994_AIF2_EQ_BAND_5_A                 0x591
 #define WM8994_AIF2_EQ_BAND_5_B                 0x592
 #define WM8994_AIF2_EQ_BAND_5_PG                0x593
+#define WM8994_AIF2_EQ_BAND_1_C                 0x594
 #define WM8994_DAC1_MIXER_VOLUMES               0x600
 #define WM8994_DAC1_LEFT_MIXER_ROUTING          0x601
 #define WM8994_DAC1_RIGHT_MIXER_ROUTING         0x602
@@ -264,7 +272,43 @@
 #define WM8958_DSP2_RELEASETIME                 0xA03
 #define WM8958_DSP2_VERMAJMIN                   0xA04
 #define WM8958_DSP2_VERBUILD                    0xA05
+#define WM8958_DSP2_TESTREG                     0xA06
+#define WM8958_DSP2_XORREG                      0xA07
+#define WM8958_DSP2_SHIFTMAXX                   0xA08
+#define WM8958_DSP2_SHIFTMAXY                   0xA09
+#define WM8958_DSP2_SHIFTMAXZ                   0xA0A
+#define WM8958_DSP2_SHIFTMAXEXTLO               0xA0B
+#define WM8958_DSP2_AESSELECT                   0xA0C
 #define WM8958_DSP2_EXECCONTROL                 0xA0D
+#define WM8958_DSP2_SAMPLEBREAK                 0xA0E
+#define WM8958_DSP2_COUNTBREAK                  0xA0F
+#define WM8958_DSP2_INTSTATUS                   0xA10
+#define WM8958_DSP2_EVENTSTATUS                 0xA11
+#define WM8958_DSP2_INTMASK                     0xA12
+#define WM8958_DSP2_CONFIGDWIDTH                0xA13
+#define WM8958_DSP2_CONFIGINSTR                 0xA14
+#define WM8958_DSP2_CONFIGDMEM                  0xA15
+#define WM8958_DSP2_CONFIGDELAYS                0xA16
+#define WM8958_DSP2_CONFIGNUMIO                 0xA17
+#define WM8958_DSP2_CONFIGEXTDEPTH              0xA18
+#define WM8958_DSP2_CONFIGMULTIPLIER            0xA19
+#define WM8958_DSP2_CONFIGCTRLDWIDTH            0xA1A
+#define WM8958_DSP2_CONFIGPIPELINE              0xA1B
+#define WM8958_DSP2_SHIFTMAXEXTHI               0xA1C
+#define WM8958_DSP2_SWVERSIONREG                0xA1D
+#define WM8958_DSP2_CONFIGXMEM                  0xA1E
+#define WM8958_DSP2_CONFIGYMEM                  0xA1F
+#define WM8958_DSP2_CONFIGZMEM                  0xA20
+#define WM8958_FW_BUILD_1                       0x2000
+#define WM8958_FW_BUILD_0                       0x2001
+#define WM8958_FW_ID_1                          0x2002
+#define WM8958_FW_ID_0                          0x2003
+#define WM8958_FW_MAJOR_1                       0x2004
+#define WM8958_FW_MAJOR_0                       0x2005
+#define WM8958_FW_MINOR_1                       0x2006
+#define WM8958_FW_MINOR_0                       0x2007
+#define WM8958_FW_PATCH_1                       0x2008
+#define WM8958_FW_PATCH_0                       0x2009
 #define WM8958_MBC_BAND_2_LOWER_CUTOFF_C1_1     0x2200
 #define WM8958_MBC_BAND_2_LOWER_CUTOFF_C1_2     0x2201
 #define WM8958_MBC_BAND_2_LOWER_CUTOFF_C2_1     0x2202
@@ -333,6 +377,14 @@
 #define WM8958_MBC_B2_PG2_2                     0x242D
 #define WM8958_MBC_B1_PG2_1                     0x242E
 #define WM8958_MBC_B1_PG2_2                     0x242F
+#define WM8958_MBC_CROSSOVER_1                  0x2600
+#define WM8958_MBC_CROSSOVER_2                  0x2601
+#define WM8958_MBC_HPF_1                        0x2602
+#define WM8958_MBC_HPF_2                        0x2603
+#define WM8958_MBC_LPF_1                        0x2606
+#define WM8958_MBC_LPF_2                        0x2607
+#define WM8958_MBC_RMS_LIMIT_1                  0x260A
+#define WM8958_MBC_RMS_LIMIT_2                  0x260B
 #define WM8994_WRITE_SEQUENCER_0                0x3000
 #define WM8994_WRITE_SEQUENCER_1                0x3001
 #define WM8994_WRITE_SEQUENCER_2                0x3002
@@ -2389,6 +2441,10 @@
 /*
  * R548 (0x224) - FLL1 Control (5)
  */
+#define WM8958_FLL1_BYP                         0x8000  /* FLL1_BYP */
+#define WM8958_FLL1_BYP_MASK                    0x8000  /* FLL1_BYP */
+#define WM8958_FLL1_BYP_SHIFT                       15  /* FLL1_BYP */
+#define WM8958_FLL1_BYP_WIDTH                        1  /* FLL1_BYP */
 #define WM8994_FLL1_FRC_NCO_VAL_MASK            0x1F80  /* FLL1_FRC_NCO_VAL - [12:7] */
 #define WM8994_FLL1_FRC_NCO_VAL_SHIFT                7  /* FLL1_FRC_NCO_VAL - [12:7] */
 #define WM8994_FLL1_FRC_NCO_VAL_WIDTH                6  /* FLL1_FRC_NCO_VAL - [12:7] */
@@ -2403,6 +2459,24 @@
 #define WM8994_FLL1_REFCLK_SRC_SHIFT                 0  /* FLL1_REFCLK_SRC - [1:0] */
 #define WM8994_FLL1_REFCLK_SRC_WIDTH                 2  /* FLL1_REFCLK_SRC - [1:0] */
 
+/*
+ * R550 (0x226) - FLL1 EFS 1
+ */
+#define WM8958_FLL1_LAMBDA_MASK                 0xFFFF  /* FLL1_LAMBDA - [15:0] */
+#define WM8958_FLL1_LAMBDA_SHIFT                     0  /* FLL1_LAMBDA - [15:0] */
+#define WM8958_FLL1_LAMBDA_WIDTH                    16  /* FLL1_LAMBDA - [15:0] */
+
+/*
+ * R551 (0x227) - FLL1 EFS 2
+ */
+#define WM8958_FLL1_LFSR_SEL_MASK               0x0006  /* FLL1_LFSR_SEL - [2:1] */
+#define WM8958_FLL1_LFSR_SEL_SHIFT                   1  /* FLL1_LFSR_SEL - [2:1] */
+#define WM8958_FLL1_LFSR_SEL_WIDTH                   2  /* FLL1_LFSR_SEL - [2:1] */
+#define WM8958_FLL1_EFS_ENA                     0x0001  /* FLL1_EFS_ENA */
+#define WM8958_FLL1_EFS_ENA_MASK                0x0001  /* FLL1_EFS_ENA */
+#define WM8958_FLL1_EFS_ENA_SHIFT                    0  /* FLL1_EFS_ENA */
+#define WM8958_FLL1_EFS_ENA_WIDTH                    1  /* FLL1_EFS_ENA */
+
 /*
  * R576 (0x240) - FLL2 Control (1)
  */
@@ -2452,6 +2526,10 @@
 /*
  * R580 (0x244) - FLL2 Control (5)
  */
+#define WM8958_FLL2_BYP                         0x8000  /* FLL2_BYP */
+#define WM8958_FLL2_BYP_MASK                    0x8000  /* FLL2_BYP */
+#define WM8958_FLL2_BYP_SHIFT                       15  /* FLL2_BYP */
+#define WM8958_FLL2_BYP_WIDTH                        1  /* FLL2_BYP */
 #define WM8994_FLL2_FRC_NCO_VAL_MASK            0x1F80  /* FLL2_FRC_NCO_VAL - [12:7] */
 #define WM8994_FLL2_FRC_NCO_VAL_SHIFT                7  /* FLL2_FRC_NCO_VAL - [12:7] */
 #define WM8994_FLL2_FRC_NCO_VAL_WIDTH                6  /* FLL2_FRC_NCO_VAL - [12:7] */
@@ -2466,6 +2544,24 @@
 #define WM8994_FLL2_REFCLK_SRC_SHIFT                 0  /* FLL2_REFCLK_SRC - [1:0] */
 #define WM8994_FLL2_REFCLK_SRC_WIDTH                 2  /* FLL2_REFCLK_SRC - [1:0] */
 
+/*
+ * R582 (0x246) - FLL2 EFS 1
+ */
+#define WM8958_FLL2_LAMBDA_MASK                 0xFFFF  /* FLL2_LAMBDA - [15:0] */
+#define WM8958_FLL2_LAMBDA_SHIFT                     0  /* FLL2_LAMBDA - [15:0] */
+#define WM8958_FLL2_LAMBDA_WIDTH                    16  /* FLL2_LAMBDA - [15:0] */
+
+/*
+ * R583 (0x247) - FLL2 EFS 2
+ */
+#define WM8958_FLL2_LFSR_SEL_MASK               0x0006  /* FLL2_LFSR_SEL - [2:1] */
+#define WM8958_FLL2_LFSR_SEL_SHIFT                   1  /* FLL2_LFSR_SEL - [2:1] */
+#define WM8958_FLL2_LFSR_SEL_WIDTH                   2  /* FLL2_LFSR_SEL - [2:1] */
+#define WM8958_FLL2_EFS_ENA                     0x0001  /* FLL2_EFS_ENA */
+#define WM8958_FLL2_EFS_ENA_MASK                0x0001  /* FLL2_EFS_ENA */
+#define WM8958_FLL2_EFS_ENA_SHIFT                    0  /* FLL2_EFS_ENA */
+#define WM8958_FLL2_EFS_ENA_WIDTH                    1  /* FLL2_EFS_ENA */
+
 /*
  * R768 (0x300) - AIF1 Control (1)
  */
diff --git a/sound/soc/codecs/wm8994-tables.c b/sound/soc/codecs/wm8994-tables.c
index df5a8b9a250f..6ed19d9e7454 100644
--- a/sound/soc/codecs/wm8994-tables.c
+++ b/sound/soc/codecs/wm8994-tables.c
@@ -78,7 +78,7 @@ const struct wm8994_access_mask wm8994_access_masks[WM8994_CACHE_SIZE] = {
 	{ 0x0000, 0x0000 }, /* R74 */
 	{ 0x0000, 0x0000 }, /* R75 */
 	{ 0x8000, 0x8000 }, /* R76    - Charge Pump (1) */
-	{ 0x0000, 0x0000 }, /* R77 */
+	{ 0x8000, 0x8000 }, /* R77    - Charge Pump (2) */
 	{ 0x0000, 0x0000 }, /* R78 */
 	{ 0x0000, 0x0000 }, /* R79 */
 	{ 0x0000, 0x0000 }, /* R80 */
@@ -1651,7 +1651,7 @@ const u16 wm8994_reg_defaults[WM8994_CACHE_SIZE] = {
 	0x0000,     /* R74 */
 	0x0000,     /* R75 */
 	0x1F25,     /* R76    - Charge Pump (1) */
-	0x0000,     /* R77 */
+	0xAB19,     /* R77    - Charge Pump (2) */
 	0x0000,     /* R78 */
 	0x0000,     /* R79 */
 	0x0000,     /* R80 */
@@ -2124,8 +2124,8 @@ const u16 wm8994_reg_defaults[WM8994_CACHE_SIZE] = {
 	0x0000,     /* R547   - FLL1 Control (4) */
 	0x0C80,     /* R548   - FLL1 Control (5) */
 	0x0000,     /* R549 */
-	0x0000,     /* R550 */
-	0x0000,     /* R551 */
+	0x0000,     /* R550   - FLL1 EFS 1 */
+	0x0006,     /* R551   - FLL1 EFS 2 */
 	0x0000,     /* R552 */
 	0x0000,     /* R553 */
 	0x0000,     /* R554 */
@@ -2156,8 +2156,8 @@ const u16 wm8994_reg_defaults[WM8994_CACHE_SIZE] = {
 	0x0000,     /* R579   - FLL2 Control (4) */
 	0x0C80,     /* R580   - FLL2 Control (5) */
 	0x0000,     /* R581 */
-	0x0000,     /* R582 */
-	0x0000,     /* R583 */
+	0x0000,     /* R582   - FLL2 EFS 1 */
+	0x0006,     /* R583   - FLL2 EFS 2 */
 	0x0000,     /* R584 */
 	0x0000,     /* R585 */
 	0x0000,     /* R586 */
-- 
cgit v1.2.3


From c3f1386171a100d27d9fb978f474a6a330888af5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 25 Oct 2011 14:23:53 +0200
Subject: mfd: Enable register cache for wm8994 devices

As part of this we provide information about the registers that exist in
the device to the regmap core, drop the small amount of cache that the
core had been using and let regmap do the sync.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c       | 82 +++++++++++++++--------------------------
 include/linux/mfd/wm8994/core.h |  2 -
 2 files changed, 30 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 016769475ffb..aafac5b5f3a5 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -28,11 +28,7 @@
 #include <linux/mfd/wm8994/pdata.h>
 #include <linux/mfd/wm8994/registers.h>
 
-static int wm8994_read(struct wm8994 *wm8994, unsigned short reg,
-		       int bytes, void *dest)
-{
-	return regmap_raw_read(wm8994->regmap, reg, dest, bytes);
-}
+#include "wm8994.h"
 
 /**
  * wm8994_reg_read: Read a single WM8994 register.
@@ -68,12 +64,6 @@ int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
 	return regmap_bulk_read(wm8994->regmap, reg, buf, count);
 }
 
-static int wm8994_write(struct wm8994 *wm8994, unsigned short reg,
-			int bytes, const void *src)
-{
-	return regmap_raw_write(wm8994->regmap, reg, src, bytes);
-}
-
 /**
  * wm8994_reg_write: Write a single WM8994 register.
  *
@@ -258,27 +248,14 @@ static int wm8994_suspend(struct device *dev)
 				WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
 				WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD);
 
-	/* GPIO configuration state is saved here since we may be configuring
-	 * the GPIO alternate functions even if we're not using the gpiolib
-	 * driver for them.
-	 */
-	ret = wm8994_read(wm8994, WM8994_GPIO_1, WM8994_NUM_GPIO_REGS * 2,
-			  &wm8994->gpio_regs);
-	if (ret < 0)
-		dev_err(dev, "Failed to save GPIO registers: %d\n", ret);
-
-	/* For similar reasons we also stash the regulator states */
-	ret = wm8994_read(wm8994, WM8994_LDO_1, WM8994_NUM_LDO_REGS * 2,
-			  &wm8994->ldo_regs);
-	if (ret < 0)
-		dev_err(dev, "Failed to save LDO registers: %d\n", ret);
-
 	/* Explicitly put the device into reset in case regulators
 	 * don't get disabled in order to ensure consistent restart.
 	 */
 	wm8994_reg_write(wm8994, WM8994_SOFTWARE_RESET,
 			 wm8994_reg_read(wm8994, WM8994_SOFTWARE_RESET));
 
+	regcache_mark_dirty(wm8994->regmap);
+
 	wm8994->suspended = true;
 
 	ret = regulator_bulk_disable(wm8994->num_supplies,
@@ -294,7 +271,7 @@ static int wm8994_suspend(struct device *dev)
 static int wm8994_resume(struct device *dev)
 {
 	struct wm8994 *wm8994 = dev_get_drvdata(dev);
-	int ret, i;
+	int ret;
 
 	/* We may have lied to the PM core about suspending */
 	if (!wm8994->suspended)
@@ -307,27 +284,12 @@ static int wm8994_resume(struct device *dev)
 		return ret;
 	}
 
-	/* Write register at a time as we use the cache on the CPU so store
-	 * it in native endian.
-	 */
-	for (i = 0; i < ARRAY_SIZE(wm8994->irq_masks_cur); i++) {
-		ret = wm8994_reg_write(wm8994, WM8994_INTERRUPT_STATUS_1_MASK
-				       + i, wm8994->irq_masks_cur[i]);
-		if (ret < 0)
-			dev_err(dev, "Failed to restore interrupt masks: %d\n",
-				ret);
+	ret = regcache_sync(wm8994->regmap);
+	if (ret != 0) {
+		dev_err(dev, "Failed to restore register map: %d\n", ret);
+		goto err_enable;
 	}
 
-	ret = wm8994_write(wm8994, WM8994_LDO_1, WM8994_NUM_LDO_REGS * 2,
-			   &wm8994->ldo_regs);
-	if (ret < 0)
-		dev_err(dev, "Failed to restore LDO registers: %d\n", ret);
-
-	ret = wm8994_write(wm8994, WM8994_GPIO_1, WM8994_NUM_GPIO_REGS * 2,
-			   &wm8994->gpio_regs);
-	if (ret < 0)
-		dev_err(dev, "Failed to restore GPIO registers: %d\n", ret);
-
 	/* Disable LDO pulldowns while the device is active */
 	wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
 			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
@@ -336,6 +298,11 @@ static int wm8994_resume(struct device *dev)
 	wm8994->suspended = false;
 
 	return 0;
+
+err_enable:
+	regulator_bulk_disable(wm8994->num_supplies, wm8994->supplies);
+
+	return ret;
 }
 #endif
 
@@ -361,11 +328,6 @@ static int wm8994_ldo_in_use(struct wm8994_pdata *pdata, int ldo)
 }
 #endif
 
-static struct regmap_config wm8994_regmap_config = {
-	.reg_bits = 16,
-	.val_bits = 16,
-};
-
 /*
  * Instantiate the generic non-control parts of the device.
  */
@@ -594,6 +556,7 @@ static int wm8994_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
 	struct wm8994 *wm8994;
+	struct regmap_config *regmap_config;
 	int ret;
 
 	wm8994 = devm_kzalloc(&i2c->dev, sizeof(struct wm8994), GFP_KERNEL);
@@ -605,7 +568,22 @@ static int wm8994_i2c_probe(struct i2c_client *i2c,
 	wm8994->irq = i2c->irq;
 	wm8994->type = id->driver_data;
 
-	wm8994->regmap = regmap_init_i2c(i2c, &wm8994_regmap_config);
+	switch (wm8994->type) {
+	case WM1811:
+		regmap_config = &wm1811_regmap_config;
+		break;
+	case WM8994:
+		regmap_config = &wm8994_regmap_config;
+		break;
+	case WM8958:
+		regmap_config = &wm8958_regmap_config;
+		break;
+	default:
+		dev_err(wm8994->dev, "Unknown device type %d\n", wm8994->type);
+		return -EINVAL;
+	}
+
+	wm8994->regmap = regmap_init_i2c(i2c, regmap_config);
 	if (IS_ERR(wm8994->regmap)) {
 		ret = PTR_ERR(wm8994->regmap);
 		dev_err(wm8994->dev, "Failed to allocate register map: %d\n",
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index f44bdb7273bd..d98593d52e7c 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -70,8 +70,6 @@ struct wm8994 {
 
 	/* Used over suspend/resume */
 	bool suspended;
-	u16 ldo_regs[WM8994_NUM_LDO_REGS];
-	u16 gpio_regs[WM8994_NUM_GPIO_REGS];
 
 	struct regulator_dev *dbvdd;
 	int num_supplies;
-- 
cgit v1.2.3


From 43913e5ef9e9e05141418577523456e6b23777eb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 28 Nov 2011 18:48:14 +0000
Subject: mfd: Constify WM8994 regulator_init_data

The driver has no need to modify the regulator_init_data so declare it
const to allow machine code to do so.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/pdata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 54e2fef587d5..b00897a6c461 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -23,7 +23,7 @@ struct wm8994_ldo_pdata {
 	int enable;
 
 	const char *supply;
-	struct regulator_init_data *init_data;
+	const struct regulator_init_data *init_data;
 };
 
 #define WM8994_CONFIGURE_GPIO 0x10000
-- 
cgit v1.2.3


From 19f9557174d61fcfe132a6846a83c36437ff014e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Dec 2011 13:53:18 +0000
Subject: mfd: Add missing mutex.h inclusion to WM8994 core.h

struct wm8994 includes a mutex so we need to include mutex.h before we
declare it. All current users rely on this being done implicitly.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index d98593d52e7c..f537d2eae390 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -15,6 +15,7 @@
 #ifndef __MFD_WM8994_CORE_H__
 #define __MFD_WM8994_CORE_H__
 
+#include <linux/mutex.h>
 #include <linux/interrupt.h>
 
 enum wm8994_type {
-- 
cgit v1.2.3


From 7ed5849c2861faf9c13f027868f635bd782a50e5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Dec 2011 13:55:49 +0000
Subject: mfd: Mark WM1811 GPIO6 register volatile for later revisions

For later chip revisions the WM1811 GPIO6 register is always volatile so
store the device revision when initialising the driver and then check at
runtime if we're running on a newer device.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c       | 12 +++++++-----
 drivers/mfd/wm8994-regmap.c     | 19 +++++++++++++++++--
 include/linux/mfd/wm8994/core.h |  1 +
 3 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 8b4f22a88e22..93f8599aba32 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -446,15 +446,16 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 			ret);
 		goto err_enable;
 	}
+	wm8994->revision = ret;
 
 	switch (wm8994->type) {
 	case WM8994:
-		switch (ret) {
+		switch (wm8994->revision) {
 		case 0:
 		case 1:
 			dev_warn(wm8994->dev,
 				 "revision %c not fully supported\n",
-				 'A' + ret);
+				 'A' + wm8994->revision);
 			break;
 		default:
 			break;
@@ -462,14 +463,15 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 		break;
 	case WM1811:
 		/* Revision C did not change the relevant layer */
-		if (ret > 1)
-			ret++;
+		if (wm8994->revision > 1)
+			wm8994->revision++;
 		break;
 	default:
 		break;
 	}
 
-	dev_info(wm8994->dev, "%s revision %c\n", devname, 'A' + ret);
+	dev_info(wm8994->dev, "%s revision %c\n", devname,
+		 'A' + wm8994->revision);
 
 	switch (wm8994->type) {
 	case WM1811:
diff --git a/drivers/mfd/wm8994-regmap.c b/drivers/mfd/wm8994-regmap.c
index 03594c257ef2..c598ae69b8ff 100644
--- a/drivers/mfd/wm8994-regmap.c
+++ b/drivers/mfd/wm8994-regmap.c
@@ -12,6 +12,7 @@
  *
  */
 
+#include <linux/mfd/wm8994/core.h>
 #include <linux/mfd/wm8994/registers.h>
 #include <linux/regmap.h>
 
@@ -210,7 +211,6 @@ static struct reg_default wm1811_defaults[] = {
 	{ 0x0702, 0xA101 },    /* R1794 - Pull Control (BCLK2) */
 	{ 0x0703, 0xA101 },    /* R1795 - Pull Control (DACLRCLK2) */
 	{ 0x0704, 0xA101 },    /* R1796 - Pull Control (DACDAT2) */
-	{ 0x0705, 0xA101 },    /* R1797 - GPIO 6 */
 	{ 0x0707, 0xA101 },    /* R1799 - GPIO 8 */
 	{ 0x0708, 0xA101 },    /* R1800 - GPIO 9 */
 	{ 0x0709, 0xA101 },    /* R1801 - GPIO 10 */
@@ -1145,6 +1145,21 @@ static bool wm8994_volatile_register(struct device *dev, unsigned int reg)
 	}
 }
 
+static bool wm1811_volatile_register(struct device *dev, unsigned int reg)
+{
+	struct wm8994 *wm8994 = dev_get_drvdata(dev);
+
+	switch (reg) {
+	case WM8994_GPIO_6:
+		if (wm8994->revision > 1)
+			return true;
+		else
+			return false;
+	default:
+		return wm8994_volatile_register(dev, reg);
+	}
+}
+
 static bool wm8958_volatile_register(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
@@ -1185,7 +1200,7 @@ struct regmap_config wm1811_regmap_config = {
 	.num_reg_defaults = ARRAY_SIZE(wm1811_defaults),
 
 	.max_register = WM8994_MAX_REGISTER,
-	.volatile_reg = wm8994_volatile_register,
+	.volatile_reg = wm1811_volatile_register,
 	.readable_reg = wm1811_readable_register,
 };
 
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index f537d2eae390..4dd4409678ce 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -56,6 +56,7 @@ struct wm8994 {
 	struct mutex irq_lock;
 
 	enum wm8994_type type;
+	int revision;
 
 	struct device *dev;
 	struct regmap *regmap;
-- 
cgit v1.2.3


From 8ab30691826fc05efa47c4ffba19b80496bb3a2c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 25 Oct 2011 10:19:04 +0200
Subject: mfd: Convert wm8994 to use generic regmap irq_chip

Factor out the irq_chip implementation, substantially reducing the code
size for the driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig             |   1 +
 drivers/mfd/wm8994-irq.c        | 196 +++++++---------------------------------
 include/linux/mfd/wm8994/core.h |   3 +-
 3 files changed, 35 insertions(+), 165 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f1391c21ef26..017f6dbab333 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -477,6 +477,7 @@ config MFD_WM8994
 	bool "Support Wolfson Microelectronics WM8994"
 	select MFD_CORE
 	select REGMAP_I2C
+	select REGMAP_IRQ
 	depends on I2C=y && GENERIC_HARDIRQS
 	help
 	  The WM8994 is a highly integrated hi-fi CODEC designed for
diff --git a/drivers/mfd/wm8994-irq.c b/drivers/mfd/wm8994-irq.c
index f9dd6b691258..46b20c445ecf 100644
--- a/drivers/mfd/wm8994-irq.c
+++ b/drivers/mfd/wm8994-irq.c
@@ -18,238 +18,127 @@
 #include <linux/irq.h>
 #include <linux/mfd/core.h>
 #include <linux/interrupt.h>
+#include <linux/regmap.h>
 
 #include <linux/mfd/wm8994/core.h>
 #include <linux/mfd/wm8994/registers.h>
 
 #include <linux/delay.h>
 
-struct wm8994_irq_data {
-	int reg;
-	int mask;
-};
-
-static struct wm8994_irq_data wm8994_irqs[] = {
+static struct regmap_irq wm8994_irqs[] = {
 	[WM8994_IRQ_TEMP_SHUT] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_TEMP_SHUT_EINT,
 	},
 	[WM8994_IRQ_MIC1_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_MIC1_DET_EINT,
 	},
 	[WM8994_IRQ_MIC1_SHRT] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_MIC1_SHRT_EINT,
 	},
 	[WM8994_IRQ_MIC2_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_MIC2_DET_EINT,
 	},
 	[WM8994_IRQ_MIC2_SHRT] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_MIC2_SHRT_EINT,
 	},
 	[WM8994_IRQ_FLL1_LOCK] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_FLL1_LOCK_EINT,
 	},
 	[WM8994_IRQ_FLL2_LOCK] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_FLL2_LOCK_EINT,
 	},
 	[WM8994_IRQ_SRC1_LOCK] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_SRC1_LOCK_EINT,
 	},
 	[WM8994_IRQ_SRC2_LOCK] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_SRC2_LOCK_EINT,
 	},
 	[WM8994_IRQ_AIF1DRC1_SIG_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_AIF1DRC1_SIG_DET,
 	},
 	[WM8994_IRQ_AIF1DRC2_SIG_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_AIF1DRC2_SIG_DET_EINT,
 	},
 	[WM8994_IRQ_AIF2DRC_SIG_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_AIF2DRC_SIG_DET_EINT,
 	},
 	[WM8994_IRQ_FIFOS_ERR] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_FIFOS_ERR_EINT,
 	},
 	[WM8994_IRQ_WSEQ_DONE] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_WSEQ_DONE_EINT,
 	},
 	[WM8994_IRQ_DCS_DONE] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_DCS_DONE_EINT,
 	},
 	[WM8994_IRQ_TEMP_WARN] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_TEMP_WARN_EINT,
 	},
 	[WM8994_IRQ_GPIO(1)] = {
-		.reg = 1,
 		.mask = WM8994_GP1_EINT,
 	},
 	[WM8994_IRQ_GPIO(2)] = {
-		.reg = 1,
 		.mask = WM8994_GP2_EINT,
 	},
 	[WM8994_IRQ_GPIO(3)] = {
-		.reg = 1,
 		.mask = WM8994_GP3_EINT,
 	},
 	[WM8994_IRQ_GPIO(4)] = {
-		.reg = 1,
 		.mask = WM8994_GP4_EINT,
 	},
 	[WM8994_IRQ_GPIO(5)] = {
-		.reg = 1,
 		.mask = WM8994_GP5_EINT,
 	},
 	[WM8994_IRQ_GPIO(6)] = {
-		.reg = 1,
 		.mask = WM8994_GP6_EINT,
 	},
 	[WM8994_IRQ_GPIO(7)] = {
-		.reg = 1,
 		.mask = WM8994_GP7_EINT,
 	},
 	[WM8994_IRQ_GPIO(8)] = {
-		.reg = 1,
 		.mask = WM8994_GP8_EINT,
 	},
 	[WM8994_IRQ_GPIO(9)] = {
-		.reg = 1,
 		.mask = WM8994_GP8_EINT,
 	},
 	[WM8994_IRQ_GPIO(10)] = {
-		.reg = 1,
 		.mask = WM8994_GP10_EINT,
 	},
 	[WM8994_IRQ_GPIO(11)] = {
-		.reg = 1,
 		.mask = WM8994_GP11_EINT,
 	},
 };
 
-static inline struct wm8994_irq_data *irq_to_wm8994_irq(struct wm8994 *wm8994,
-							int irq)
-{
-	return &wm8994_irqs[irq - wm8994->irq_base];
-}
-
-static void wm8994_irq_lock(struct irq_data *data)
-{
-	struct wm8994 *wm8994 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&wm8994->irq_lock);
-}
-
-static void wm8994_irq_sync_unlock(struct irq_data *data)
-{
-	struct wm8994 *wm8994 = irq_data_get_irq_chip_data(data);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(wm8994->irq_masks_cur); i++) {
-		/* If there's been a change in the mask write it back
-		 * to the hardware. */
-		if (wm8994->irq_masks_cur[i] != wm8994->irq_masks_cache[i]) {
-			wm8994->irq_masks_cache[i] = wm8994->irq_masks_cur[i];
-			wm8994_reg_write(wm8994,
-					 WM8994_INTERRUPT_STATUS_1_MASK + i,
-					 wm8994->irq_masks_cur[i]);
-		}
-	}
-
-	mutex_unlock(&wm8994->irq_lock);
-}
-
-static void wm8994_irq_enable(struct irq_data *data)
-{
-	struct wm8994 *wm8994 = irq_data_get_irq_chip_data(data);
-	struct wm8994_irq_data *irq_data = irq_to_wm8994_irq(wm8994,
-							     data->irq);
-
-	wm8994->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask;
-}
-
-static void wm8994_irq_disable(struct irq_data *data)
-{
-	struct wm8994 *wm8994 = irq_data_get_irq_chip_data(data);
-	struct wm8994_irq_data *irq_data = irq_to_wm8994_irq(wm8994,
-							     data->irq);
-
-	wm8994->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask;
-}
+static struct regmap_irq_chip wm8994_irq_chip = {
+	.name = "wm8994",
+	.irqs = wm8994_irqs,
+	.num_irqs = ARRAY_SIZE(wm8994_irqs),
 
-static struct irq_chip wm8994_irq_chip = {
-	.name			= "wm8994",
-	.irq_bus_lock		= wm8994_irq_lock,
-	.irq_bus_sync_unlock	= wm8994_irq_sync_unlock,
-	.irq_disable		= wm8994_irq_disable,
-	.irq_enable		= wm8994_irq_enable,
+	.num_regs = 2,
+	.status_base = WM8994_INTERRUPT_STATUS_1,
+	.mask_base = WM8994_INTERRUPT_STATUS_1_MASK,
+	.ack_base = WM8994_INTERRUPT_STATUS_1,
 };
 
-/* The processing of the primary interrupt occurs in a thread so that
- * we can interact with the device over I2C or SPI. */
-static irqreturn_t wm8994_irq_thread(int irq, void *data)
-{
-	struct wm8994 *wm8994 = data;
-	unsigned int i;
-	u16 status[WM8994_NUM_IRQ_REGS];
-	int ret;
-
-	ret = wm8994_bulk_read(wm8994, WM8994_INTERRUPT_STATUS_1,
-			       WM8994_NUM_IRQ_REGS, status);
-	if (ret < 0) {
-		dev_err(wm8994->dev, "Failed to read interrupt status: %d\n",
-			ret);
-		return IRQ_NONE;
-	}
-
-	/* Bit swap and apply masking */
-	for (i = 0; i < WM8994_NUM_IRQ_REGS; i++) {
-		status[i] = be16_to_cpu(status[i]);
-		status[i] &= ~wm8994->irq_masks_cur[i];
-	}
-
-	/* Ack any unmasked IRQs */
-	for (i = 0; i < ARRAY_SIZE(status); i++) {
-		if (status[i])
-			wm8994_reg_write(wm8994, WM8994_INTERRUPT_STATUS_1 + i,
-					 status[i]);
-	}
-
-	/* Report */
-	for (i = 0; i < ARRAY_SIZE(wm8994_irqs); i++) {
-		if (status[wm8994_irqs[i].reg - 1] & wm8994_irqs[i].mask)
-			handle_nested_irq(wm8994->irq_base + i);
-	}
-
-	return IRQ_HANDLED;
-}
-
 int wm8994_irq_init(struct wm8994 *wm8994)
 {
-	int i, cur_irq, ret;
-
-	mutex_init(&wm8994->irq_lock);
-
-	/* Mask the individual interrupt sources */
-	for (i = 0; i < ARRAY_SIZE(wm8994->irq_masks_cur); i++) {
-		wm8994->irq_masks_cur[i] = 0xffff;
-		wm8994->irq_masks_cache[i] = 0xffff;
-		wm8994_reg_write(wm8994, WM8994_INTERRUPT_STATUS_1_MASK + i,
-				 0xffff);
-	}
+	int ret;
 
 	if (!wm8994->irq) {
 		dev_warn(wm8994->dev,
@@ -264,30 +153,12 @@ int wm8994_irq_init(struct wm8994 *wm8994)
 		return 0;
 	}
 
-	/* Register them with genirq */
-	for (cur_irq = wm8994->irq_base;
-	     cur_irq < ARRAY_SIZE(wm8994_irqs) + wm8994->irq_base;
-	     cur_irq++) {
-		irq_set_chip_data(cur_irq, wm8994);
-		irq_set_chip_and_handler(cur_irq, &wm8994_irq_chip,
-					 handle_edge_irq);
-		irq_set_nested_thread(cur_irq, 1);
-
-		/* ARM needs us to explicitly flag the IRQ as valid
-		 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-		set_irq_flags(cur_irq, IRQF_VALID);
-#else
-		irq_set_noprobe(cur_irq);
-#endif
-	}
-
-	ret = request_threaded_irq(wm8994->irq, NULL, wm8994_irq_thread,
-				   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
-				   "wm8994", wm8994);
+	ret = regmap_add_irq_chip(wm8994->regmap, wm8994->irq,
+				  IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				  wm8994->irq_base, &wm8994_irq_chip,
+				  &wm8994->irq_data);
 	if (ret != 0) {
-		dev_err(wm8994->dev, "Failed to request IRQ %d: %d\n",
-			wm8994->irq, ret);
+		dev_err(wm8994->dev, "Failed to register IRQ chip: %d\n", ret);
 		return ret;
 	}
 
@@ -299,6 +170,5 @@ int wm8994_irq_init(struct wm8994 *wm8994)
 
 void wm8994_irq_exit(struct wm8994 *wm8994)
 {
-	if (wm8994->irq)
-		free_irq(wm8994->irq, wm8994);
+	regmap_del_irq_chip(wm8994->irq, wm8994->irq_data);
 }
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 4dd4409678ce..9eff2a351ec5 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -67,8 +67,7 @@ struct wm8994 {
 	int irq_base;
 
 	int irq;
-	u16 irq_masks_cur[WM8994_NUM_IRQ_REGS];
-	u16 irq_masks_cache[WM8994_NUM_IRQ_REGS];
+	struct regmap_irq_chip_data *irq_data;
 
 	/* Used over suspend/resume */
 	bool suspended;
-- 
cgit v1.2.3


From ba2780c796badfc3741c7cb499a575ca49f17e6d Mon Sep 17 00:00:00 2001
From: Manu Abraham <abraham.manu@gmail.com>
Date: Sun, 13 Nov 2011 18:47:44 -0300
Subject: [media] DVB: Query DVB frontend delivery capabilities

 Currently, for any multi-standard frontend it is assumed that it just
 has a single standard capability. This is fine in some cases, but
 makes things hard when there are incompatible standards in conjuction.
 Eg: DVB-S can be seen as a subset of DVB-S2, but the same doesn't hold
 the same for DSS. This is not specific to any driver as it is, but a
 generic issue. This was handled correctly in the multiproto tree,
 while such functionality is missing from the v5 API update.

 http://www.linuxtv.org/pipermail/vdr/2008-November/018417.html

 Later on a FE_CAN_2G_MODULATION was added as a hack to workaround this
 issue in the v5 API, but that hack is incapable of addressing the
 issue, as it can be used to simply distinguish between DVB-S and
 DVB-S2 alone, or another X vs X2 modulation. If there are more systems,
 then you have a potential issue.

 An application needs to query the device capabilities before requesting
 any operation from the device.

Signed-off-by: Manu Abraham <abraham.manu@gmail.com>
Acked-by: Andreas Oberritter <obi@linuxtv.org>
Acked-by: Oliver Endriss <o.endriss@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c | 36 +++++++++++++++++++++++++++++++
 include/linux/dvb/frontend.h              |  4 +++-
 include/linux/dvb/version.h               |  2 +-
 3 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index c849455458ea..821b2250ec70 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -974,6 +974,8 @@ static struct dtv_cmds_h dtv_cmds[DTV_MAX_COMMAND + 1] = {
 	_DTV_CMD(DTV_GUARD_INTERVAL, 0, 0),
 	_DTV_CMD(DTV_TRANSMISSION_MODE, 0, 0),
 	_DTV_CMD(DTV_HIERARCHY, 0, 0),
+
+	_DTV_CMD(DTV_ENUM_DELSYS, 0, 0),
 };
 
 static void dtv_property_dump(struct dtv_property *tvp)
@@ -1209,6 +1211,37 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 static int dvb_frontend_ioctl_properties(struct file *file,
 			unsigned int cmd, void *parg);
 
+static void dtv_set_default_delivery_caps(const struct dvb_frontend *fe, struct dtv_property *p)
+{
+	const struct dvb_frontend_info *info = &fe->ops.info;
+	u32 ncaps = 0;
+
+	switch (info->type) {
+	case FE_QPSK:
+		p->u.buffer.data[ncaps++] = SYS_DVBS;
+		if (info->caps & FE_CAN_2G_MODULATION)
+			p->u.buffer.data[ncaps++] = SYS_DVBS2;
+		if (info->caps & FE_CAN_TURBO_FEC)
+			p->u.buffer.data[ncaps++] = SYS_TURBO;
+		break;
+	case FE_QAM:
+		p->u.buffer.data[ncaps++] = SYS_DVBC_ANNEX_AC;
+		break;
+	case FE_OFDM:
+		p->u.buffer.data[ncaps++] = SYS_DVBT;
+		if (info->caps & FE_CAN_2G_MODULATION)
+			p->u.buffer.data[ncaps++] = SYS_DVBT2;
+		break;
+	case FE_ATSC:
+		if (info->caps & (FE_CAN_8VSB | FE_CAN_16VSB))
+			p->u.buffer.data[ncaps++] = SYS_ATSC;
+		if (info->caps & (FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_128 | FE_CAN_QAM_256))
+			p->u.buffer.data[ncaps++] = SYS_DVBC_ANNEX_B;
+		break;
+	}
+	p->u.buffer.len = ncaps;
+}
+
 static int dtv_property_process_get(struct dvb_frontend *fe,
 				    struct dtv_property *tvp,
 				    struct file *file)
@@ -1229,6 +1262,9 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 	}
 
 	switch(tvp->cmd) {
+	case DTV_ENUM_DELSYS:
+		dtv_set_default_delivery_caps(fe, tvp);
+		break;
 	case DTV_FREQUENCY:
 		tvp->u.data = c->frequency;
 		break;
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index d9251df867b5..cb114f52ccf7 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -316,7 +316,9 @@ struct dvb_frontend_event {
 
 #define DTV_DVBT2_PLP_ID	43
 
-#define DTV_MAX_COMMAND				DTV_DVBT2_PLP_ID
+#define DTV_ENUM_DELSYS		44
+
+#define DTV_MAX_COMMAND				DTV_ENUM_DELSYS
 
 typedef enum fe_pilot {
 	PILOT_ON,
diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h
index 66594b1d5d7b..0559e2bd38f9 100644
--- a/include/linux/dvb/version.h
+++ b/include/linux/dvb/version.h
@@ -24,6 +24,6 @@
 #define _DVBVERSION_H_
 
 #define DVB_API_VERSION 5
-#define DVB_API_VERSION_MINOR 4
+#define DVB_API_VERSION_MINOR 5
 
 #endif /*_DVBVERSION_H_*/
-- 
cgit v1.2.3


From 15c9a0acc3f7873db4b7d35d016729b2dc229b49 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Mon, 12 Dec 2011 09:25:57 -0700
Subject: of: create of_phandle_args to simplify return of phandle parsing data

of_parse_phandle_with_args() needs to return quite a bit of data.  Rather
than making each datum a separate **out_ argument, this patch creates
struct of_phandle_args to contain all the returned data and reworks the
user of the function.  This patch also enables of_parse_phandle_with_args()
to return the device node pointer for the phandle node.

This patch also ends up being fairly major surgery to
of_parse_handle_with_args().  The existing structure didn't work well
when extending to use of_phandle_args, and I discovered bugs during testing.
I also took the opportunity to rename the function to be like the
existing of_parse_phandle().

v2: - moved declaration of of_phandle_args to fix compile on non-DT builds
    - fixed incorrect index in example usage
    - fixed incorrect return code handling for empty entries

Reviewed-by: Shawn Guo <shawn.guo@freescale.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/base.c          | 146 +++++++++++++++++++++++----------------------
 drivers/of/gpio.c          |  43 ++++++-------
 include/asm-generic/gpio.h |   5 +-
 include/linux/of.h         |  11 +++-
 include/linux/of_gpio.h    |  10 ++--
 5 files changed, 112 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 9b6588ef0673..c6db9ab9046e 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -824,17 +824,19 @@ of_parse_phandle(struct device_node *np, const char *phandle_name, int index)
 EXPORT_SYMBOL(of_parse_phandle);
 
 /**
- * of_parse_phandles_with_args - Find a node pointed by phandle in a list
+ * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
  * @np:		pointer to a device tree node containing a list
  * @list_name:	property name that contains a list
  * @cells_name:	property name that specifies phandles' arguments count
  * @index:	index of a phandle to parse out
- * @out_node:	optional pointer to device_node struct pointer (will be filled)
- * @out_args:	optional pointer to arguments pointer (will be filled)
+ * @out_args:	optional pointer to output arguments structure (will be filled)
  *
  * This function is useful to parse lists of phandles and their arguments.
- * Returns 0 on success and fills out_node and out_args, on error returns
- * appropriate errno value.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->node
+ * pointer.
  *
  * Example:
  *
@@ -851,94 +853,96 @@ EXPORT_SYMBOL(of_parse_phandle);
  * }
  *
  * To get a device_node of the `node2' node you may call this:
- * of_parse_phandles_with_args(node3, "list", "#list-cells", 2, &node2, &args);
+ * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
  */
-int of_parse_phandles_with_args(struct device_node *np, const char *list_name,
+int of_parse_phandle_with_args(struct device_node *np, const char *list_name,
 				const char *cells_name, int index,
-				struct device_node **out_node,
-				const void **out_args)
+				struct of_phandle_args *out_args)
 {
-	int ret = -EINVAL;
-	const __be32 *list;
-	const __be32 *list_end;
-	int size;
-	int cur_index = 0;
+	const __be32 *list, *list_end;
+	int size, cur_index = 0;
+	uint32_t count = 0;
 	struct device_node *node = NULL;
-	const void *args = NULL;
+	phandle phandle;
 
+	/* Retrieve the phandle list property */
 	list = of_get_property(np, list_name, &size);
-	if (!list) {
-		ret = -ENOENT;
-		goto err0;
-	}
+	if (!list)
+		return -EINVAL;
 	list_end = list + size / sizeof(*list);
 
+	/* Loop over the phandles until all the requested entry is found */
 	while (list < list_end) {
-		const __be32 *cells;
-		phandle phandle;
+		count = 0;
 
+		/*
+		 * If phandle is 0, then it is an empty entry with no
+		 * arguments.  Skip forward to the next entry.
+		 */
 		phandle = be32_to_cpup(list++);
-		args = list;
-
-		/* one cell hole in the list = <>; */
-		if (!phandle)
-			goto next;
-
-		node = of_find_node_by_phandle(phandle);
-		if (!node) {
-			pr_debug("%s: could not find phandle\n",
-				 np->full_name);
-			goto err0;
-		}
+		if (phandle) {
+			/*
+			 * Find the provider node and parse the #*-cells
+			 * property to determine the argument length
+			 */
+			node = of_find_node_by_phandle(phandle);
+			if (!node) {
+				pr_err("%s: could not find phandle\n",
+					 np->full_name);
+				break;
+			}
+			if (of_property_read_u32(node, cells_name, &count)) {
+				pr_err("%s: could not get %s for %s\n",
+					 np->full_name, cells_name,
+					 node->full_name);
+				break;
+			}
 
-		cells = of_get_property(node, cells_name, &size);
-		if (!cells || size != sizeof(*cells)) {
-			pr_debug("%s: could not get %s for %s\n",
-				 np->full_name, cells_name, node->full_name);
-			goto err1;
+			/*
+			 * Make sure that the arguments actually fit in the
+			 * remaining property data length
+			 */
+			if (list + count > list_end) {
+				pr_err("%s: arguments longer than property\n",
+					 np->full_name);
+				break;
+			}
 		}
 
-		list += be32_to_cpup(cells);
-		if (list > list_end) {
-			pr_debug("%s: insufficient arguments length\n",
-				 np->full_name);
-			goto err1;
+		/*
+		 * All of the error cases above bail out of the loop, so at
+		 * this point, the parsing is successful. If the requested
+		 * index matches, then fill the out_args structure and return,
+		 * or return -ENOENT for an empty entry.
+		 */
+		if (cur_index == index) {
+			if (!phandle)
+				return -ENOENT;
+
+			if (out_args) {
+				int i;
+				if (WARN_ON(count > MAX_PHANDLE_ARGS))
+					count = MAX_PHANDLE_ARGS;
+				out_args->np = node;
+				out_args->args_count = count;
+				for (i = 0; i < count; i++)
+					out_args->args[i] = be32_to_cpup(list++);
+			}
+			return 0;
 		}
-next:
-		if (cur_index == index)
-			break;
 
 		of_node_put(node);
 		node = NULL;
-		args = NULL;
+		list += count;
 		cur_index++;
 	}
 
-	if (!node) {
-		/*
-		 * args w/o node indicates that the loop above has stopped at
-		 * the 'hole' cell. Report this differently.
-		 */
-		if (args)
-			ret = -EEXIST;
-		else
-			ret = -ENOENT;
-		goto err0;
-	}
-
-	if (out_node)
-		*out_node = node;
-	if (out_args)
-		*out_args = args;
-
-	return 0;
-err1:
-	of_node_put(node);
-err0:
-	pr_debug("%s failed with status %d\n", __func__, ret);
-	return ret;
+	/* Loop exited without finding a valid entry; return an error */
+	if (node)
+		of_node_put(node);
+	return -EINVAL;
 }
-EXPORT_SYMBOL(of_parse_phandles_with_args);
+EXPORT_SYMBOL(of_parse_phandle_with_args);
 
 /**
  * prom_add_property - Add a property to a node
diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c
index ea4f2faab222..7e62d15d60f6 100644
--- a/drivers/of/gpio.c
+++ b/drivers/of/gpio.c
@@ -35,32 +35,27 @@ int of_get_named_gpio_flags(struct device_node *np, const char *propname,
                            int index, enum of_gpio_flags *flags)
 {
 	int ret;
-	struct device_node *gpio_np;
 	struct gpio_chip *gc;
-	int size;
-	const void *gpio_spec;
-	const __be32 *gpio_cells;
+	struct of_phandle_args gpiospec;
 
-	ret = of_parse_phandles_with_args(np, propname, "#gpio-cells", index,
-					  &gpio_np, &gpio_spec);
+	ret = of_parse_phandle_with_args(np, propname, "#gpio-cells", index,
+					 &gpiospec);
 	if (ret) {
 		pr_debug("%s: can't parse gpios property\n", __func__);
 		goto err0;
 	}
 
-	gc = of_node_to_gpiochip(gpio_np);
+	gc = of_node_to_gpiochip(gpiospec.np);
 	if (!gc) {
 		pr_debug("%s: gpio controller %s isn't registered\n",
-			 np->full_name, gpio_np->full_name);
+			 np->full_name, gpiospec.np->full_name);
 		ret = -ENODEV;
 		goto err1;
 	}
 
-	gpio_cells = of_get_property(gpio_np, "#gpio-cells", &size);
-	if (!gpio_cells || size != sizeof(*gpio_cells) ||
-			be32_to_cpup(gpio_cells) != gc->of_gpio_n_cells) {
+	if (gpiospec.args_count != gc->of_gpio_n_cells) {
 		pr_debug("%s: wrong #gpio-cells for %s\n",
-			 np->full_name, gpio_np->full_name);
+			 np->full_name, gpiospec.np->full_name);
 		ret = -EINVAL;
 		goto err1;
 	}
@@ -69,13 +64,13 @@ int of_get_named_gpio_flags(struct device_node *np, const char *propname,
 	if (flags)
 		*flags = 0;
 
-	ret = gc->of_xlate(gc, np, gpio_spec, flags);
+	ret = gc->of_xlate(gc, &gpiospec, flags);
 	if (ret < 0)
 		goto err1;
 
 	ret += gc->base;
 err1:
-	of_node_put(gpio_np);
+	of_node_put(gpiospec.np);
 err0:
 	pr_debug("%s exited with status %d\n", __func__, ret);
 	return ret;
@@ -105,8 +100,8 @@ unsigned int of_gpio_count(struct device_node *np)
 	do {
 		int ret;
 
-		ret = of_parse_phandles_with_args(np, "gpios", "#gpio-cells",
-						  cnt, NULL, NULL);
+		ret = of_parse_phandle_with_args(np, "gpios", "#gpio-cells",
+						 cnt, NULL);
 		/* A hole in the gpios = <> counts anyway. */
 		if (ret < 0 && ret != -EEXIST)
 			break;
@@ -127,12 +122,9 @@ EXPORT_SYMBOL(of_gpio_count);
  * gpio chips. This function performs only one sanity check: whether gpio
  * is less than ngpios (that is specified in the gpio_chip).
  */
-int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
-			 const void *gpio_spec, u32 *flags)
+int of_gpio_simple_xlate(struct gpio_chip *gc,
+			 const struct of_phandle_args *gpiospec, u32 *flags)
 {
-	const __be32 *gpio = gpio_spec;
-	const u32 n = be32_to_cpup(gpio);
-
 	/*
 	 * We're discouraging gpio_cells < 2, since that way you'll have to
 	 * write your own xlate function (that will have to retrive the GPIO
@@ -144,13 +136,16 @@ int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
 		return -EINVAL;
 	}
 
-	if (n > gc->ngpio)
+	if (WARN_ON(gpiospec->args_count < gc->of_gpio_n_cells))
+		return -EINVAL;
+
+	if (gpiospec->args[0] > gc->ngpio)
 		return -EINVAL;
 
 	if (flags)
-		*flags = be32_to_cpu(gpio[1]);
+		*flags = gpiospec->args[1];
 
-	return n;
+	return gpiospec->args[0];
 }
 EXPORT_SYMBOL(of_gpio_simple_xlate);
 
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 6b10bdc105d6..d466c8d8826d 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -4,6 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/errno.h>
+#include <linux/of.h>
 
 #ifdef CONFIG_GPIOLIB
 
@@ -128,8 +129,8 @@ struct gpio_chip {
 	 */
 	struct device_node *of_node;
 	int of_gpio_n_cells;
-	int (*of_xlate)(struct gpio_chip *gc, struct device_node *np,
-		        const void *gpio_spec, u32 *flags);
+	int (*of_xlate)(struct gpio_chip *gc,
+		        const struct of_phandle_args *gpiospec, u32 *flags);
 #endif
 };
 
diff --git a/include/linux/of.h b/include/linux/of.h
index 4948552d60f5..ea44fd72af5f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -65,6 +65,13 @@ struct device_node {
 #endif
 };
 
+#define MAX_PHANDLE_ARGS 8
+struct of_phandle_args {
+	struct device_node *np;
+	int args_count;
+	uint32_t args[MAX_PHANDLE_ARGS];
+};
+
 #ifdef CONFIG_OF
 
 /* Pointer for first entry in chain of all nodes. */
@@ -230,9 +237,9 @@ extern int of_modalias_node(struct device_node *node, char *modalias, int len);
 extern struct device_node *of_parse_phandle(struct device_node *np,
 					    const char *phandle_name,
 					    int index);
-extern int of_parse_phandles_with_args(struct device_node *np,
+extern int of_parse_phandle_with_args(struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
-	struct device_node **out_node, const void **out_args);
+	struct of_phandle_args *out_args);
 
 extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
 extern int of_alias_get_id(struct device_node *np, const char *stem);
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index 52280a2b5e63..b254052a49d7 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/gpio.h>
+#include <linux/of.h>
 
 struct device_node;
 
@@ -57,8 +58,9 @@ extern int of_mm_gpiochip_add(struct device_node *np,
 extern void of_gpiochip_add(struct gpio_chip *gc);
 extern void of_gpiochip_remove(struct gpio_chip *gc);
 extern struct gpio_chip *of_node_to_gpiochip(struct device_node *np);
-extern int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
-				const void *gpio_spec, u32 *flags);
+extern int of_gpio_simple_xlate(struct gpio_chip *gc,
+				const struct of_phandle_args *gpiospec,
+				u32 *flags);
 
 #else /* CONFIG_OF_GPIO */
 
@@ -75,8 +77,8 @@ static inline unsigned int of_gpio_count(struct device_node *np)
 }
 
 static inline int of_gpio_simple_xlate(struct gpio_chip *gc,
-				       struct device_node *np,
-				       const void *gpio_spec, u32 *flags)
+				       const struct of_phandle_args *gpiospec,
+				       u32 *flags)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.2.3


From c91043adaf50ef13609003120f3471783460fb71 Mon Sep 17 00:00:00 2001
From: Qinglin Ye <yestyle@gmail.com>
Date: Sun, 11 Dec 2011 16:40:22 +0800
Subject: USB: Remove the duplicate definition of HUB_SET_DEPTH

The macro HUB_SET_DEPTH is defined twice in ch11.h (introduced by
commit 0eadcc0 "usb: USB3.0 ch11 definitions" and dbe79bb "USB 3.0
Hub Changes"), so remove the duplicate one in the USB 2.0 part.

Signed-off-by: Qinglin Ye <yestyle@gmail.com>
Cc: John Youn <John.Youn@synopsys.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ch11.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
index 0832eb841a30..31fdb4c6ee3d 100644
--- a/include/linux/usb/ch11.h
+++ b/include/linux/usb/ch11.h
@@ -26,7 +26,6 @@
 #define HUB_RESET_TT		9
 #define HUB_GET_TT_STATE	10
 #define HUB_STOP_TT		11
-#define HUB_SET_DEPTH		12
 
 /*
  * Hub class additional requests defined by USB 3.0 spec
-- 
cgit v1.2.3


From e1aab161e0135aafcd439be20b4f35e4b0922d95 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Sun, 11 Dec 2011 21:47:03 +0000
Subject: socket: initial cgroup code.

The goal of this work is to move the memory pressure tcp
controls to a cgroup, instead of just relying on global
conditions.

To avoid excessive overhead in the network fast paths,
the code that accounts allocated memory to a cgroup is
hidden inside a static_branch(). This branch is patched out
until the first non-root cgroup is created. So when nobody
is using cgroups, even if it is mounted, no significant performance
penalty should be seen.

This patch handles the generic part of the code, and has nothing
tcp-specific.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujtsu.com>
CC: Kirill A. Shutemov <kirill@shutemov.name>
CC: David S. Miller <davem@davemloft.net>
CC: Eric W. Biederman <ebiederm@xmission.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/cgroups/memory.txt |   4 +-
 include/linux/memcontrol.h       |  22 ++++++
 include/net/sock.h               | 156 +++++++++++++++++++++++++++++++++++++--
 mm/memcontrol.c                  |  46 +++++++++++-
 net/core/sock.c                  |  24 ++++--
 5 files changed, 235 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index f2453241142b..23a8dc5319a3 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -289,7 +289,9 @@ to trigger slab reclaim when those limits are reached.
 
 2.7.1 Current Kernel Memory resources accounted
 
-None
+* sockets memory pressure: some sockets protocols have memory pressure
+thresholds. The Memory Controller allows them to be controlled individually
+per cgroup, instead of globally.
 
 3. User Interface
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b87068a1a09e..f15021b9f734 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -85,6 +85,8 @@ extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
 
+extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+
 static inline
 int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 {
@@ -381,5 +383,25 @@ mem_cgroup_print_bad_page(struct page *page)
 }
 #endif
 
+#ifdef CONFIG_INET
+enum {
+	UNDER_LIMIT,
+	SOFT_LIMIT,
+	OVER_LIMIT,
+};
+
+struct sock;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+void sock_update_memcg(struct sock *sk);
+void sock_release_memcg(struct sock *sk);
+#else
+static inline void sock_update_memcg(struct sock *sk)
+{
+}
+static inline void sock_release_memcg(struct sock *sk)
+{
+}
+#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+#endif /* CONFIG_INET */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/include/net/sock.h b/include/net/sock.h
index ed0dbf034539..d5eab256167c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -54,6 +54,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/memcontrol.h>
+#include <linux/res_counter.h>
 
 #include <linux/filter.h>
 #include <linux/rculist_nulls.h>
@@ -168,6 +169,7 @@ struct sock_common {
 	/* public: */
 };
 
+struct cg_proto;
 /**
   *	struct sock - network layer representation of sockets
   *	@__sk_common: shared layout with inet_timewait_sock
@@ -228,6 +230,7 @@ struct sock_common {
   *	@sk_security: used by security modules
   *	@sk_mark: generic packet mark
   *	@sk_classid: this socket's cgroup classid
+  *	@sk_cgrp: this socket's cgroup-specific proto data
   *	@sk_write_pending: a write to stream socket waits to start
   *	@sk_state_change: callback to indicate change in the state of the sock
   *	@sk_data_ready: callback to indicate there is data to be processed
@@ -342,6 +345,7 @@ struct sock {
 #endif
 	__u32			sk_mark;
 	u32			sk_classid;
+	struct cg_proto		*sk_cgrp;
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk, int bytes);
 	void			(*sk_write_space)(struct sock *sk);
@@ -838,6 +842,37 @@ struct proto {
 #ifdef SOCK_REFCNT_DEBUG
 	atomic_t		socks;
 #endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+	/*
+	 * cgroup specific init/deinit functions. Called once for all
+	 * protocols that implement it, from cgroups populate function.
+	 * This function has to setup any files the protocol want to
+	 * appear in the kmem cgroup filesystem.
+	 */
+	int			(*init_cgroup)(struct cgroup *cgrp,
+					       struct cgroup_subsys *ss);
+	void			(*destroy_cgroup)(struct cgroup *cgrp,
+						  struct cgroup_subsys *ss);
+	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
+#endif
+};
+
+struct cg_proto {
+	void			(*enter_memory_pressure)(struct sock *sk);
+	struct res_counter	*memory_allocated;	/* Current allocated memory. */
+	struct percpu_counter	*sockets_allocated;	/* Current number of sockets. */
+	int			*memory_pressure;
+	long			*sysctl_mem;
+	/*
+	 * memcg field is used to find which memcg we belong directly
+	 * Each memcg struct can hold more than one cg_proto, so container_of
+	 * won't really cut.
+	 *
+	 * The elegant solution would be having an inverse function to
+	 * proto_cgroup in struct proto, but that means polluting the structure
+	 * for everybody, instead of just for memcg users.
+	 */
+	struct mem_cgroup	*memcg;
 };
 
 extern int proto_register(struct proto *prot, int alloc_slab);
@@ -856,7 +891,7 @@ static inline void sk_refcnt_debug_dec(struct sock *sk)
 	       sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
 }
 
-static inline void sk_refcnt_debug_release(const struct sock *sk)
+inline void sk_refcnt_debug_release(const struct sock *sk)
 {
 	if (atomic_read(&sk->sk_refcnt) != 1)
 		printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
@@ -868,6 +903,24 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
 #define sk_refcnt_debug_release(sk) do { } while (0)
 #endif /* SOCK_REFCNT_DEBUG */
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+extern struct jump_label_key memcg_socket_limit_enabled;
+static inline struct cg_proto *parent_cg_proto(struct proto *proto,
+					       struct cg_proto *cg_proto)
+{
+	return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
+}
+#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled)
+#else
+#define mem_cgroup_sockets_enabled 0
+static inline struct cg_proto *parent_cg_proto(struct proto *proto,
+					       struct cg_proto *cg_proto)
+{
+	return NULL;
+}
+#endif
+
+
 static inline bool sk_has_memory_pressure(const struct sock *sk)
 {
 	return sk->sk_prot->memory_pressure != NULL;
@@ -877,6 +930,10 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 {
 	if (!sk->sk_prot->memory_pressure)
 		return false;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		return !!*sk->sk_cgrp->memory_pressure;
+
 	return !!*sk->sk_prot->memory_pressure;
 }
 
@@ -884,52 +941,136 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
 {
 	int *memory_pressure = sk->sk_prot->memory_pressure;
 
-	if (memory_pressure && *memory_pressure)
+	if (!memory_pressure)
+		return;
+
+	if (*memory_pressure)
 		*memory_pressure = 0;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		struct cg_proto *cg_proto = sk->sk_cgrp;
+		struct proto *prot = sk->sk_prot;
+
+		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
+			if (*cg_proto->memory_pressure)
+				*cg_proto->memory_pressure = 0;
+	}
+
 }
 
 static inline void sk_enter_memory_pressure(struct sock *sk)
 {
-	if (sk->sk_prot->enter_memory_pressure)
-		sk->sk_prot->enter_memory_pressure(sk);
+	if (!sk->sk_prot->enter_memory_pressure)
+		return;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		struct cg_proto *cg_proto = sk->sk_cgrp;
+		struct proto *prot = sk->sk_prot;
+
+		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
+			cg_proto->enter_memory_pressure(sk);
+	}
+
+	sk->sk_prot->enter_memory_pressure(sk);
 }
 
 static inline long sk_prot_mem_limits(const struct sock *sk, int index)
 {
 	long *prot = sk->sk_prot->sysctl_mem;
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		prot = sk->sk_cgrp->sysctl_mem;
 	return prot[index];
 }
 
+static inline void memcg_memory_allocated_add(struct cg_proto *prot,
+					      unsigned long amt,
+					      int *parent_status)
+{
+	struct res_counter *fail;
+	int ret;
+
+	ret = res_counter_charge(prot->memory_allocated,
+				 amt << PAGE_SHIFT, &fail);
+
+	if (ret < 0)
+		*parent_status = OVER_LIMIT;
+}
+
+static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
+					      unsigned long amt)
+{
+	res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
+}
+
+static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
+{
+	u64 ret;
+	ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE);
+	return ret >> PAGE_SHIFT;
+}
+
 static inline long
 sk_memory_allocated(const struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		return memcg_memory_allocated_read(sk->sk_cgrp);
+
 	return atomic_long_read(prot->memory_allocated);
 }
 
 static inline long
-sk_memory_allocated_add(struct sock *sk, int amt)
+sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
 {
 	struct proto *prot = sk->sk_prot;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
+		/* update the root cgroup regardless */
+		atomic_long_add_return(amt, prot->memory_allocated);
+		return memcg_memory_allocated_read(sk->sk_cgrp);
+	}
+
 	return atomic_long_add_return(amt, prot->memory_allocated);
 }
 
 static inline void
-sk_memory_allocated_sub(struct sock *sk, int amt)
+sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status)
 {
 	struct proto *prot = sk->sk_prot;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
+	    parent_status != OVER_LIMIT) /* Otherwise was uncharged already */
+		memcg_memory_allocated_sub(sk->sk_cgrp, amt);
+
 	atomic_long_sub(amt, prot->memory_allocated);
 }
 
 static inline void sk_sockets_allocated_dec(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		struct cg_proto *cg_proto = sk->sk_cgrp;
+
+		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
+			percpu_counter_dec(cg_proto->sockets_allocated);
+	}
+
 	percpu_counter_dec(prot->sockets_allocated);
 }
 
 static inline void sk_sockets_allocated_inc(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		struct cg_proto *cg_proto = sk->sk_cgrp;
+
+		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
+			percpu_counter_inc(cg_proto->sockets_allocated);
+	}
+
 	percpu_counter_inc(prot->sockets_allocated);
 }
 
@@ -938,6 +1079,9 @@ sk_sockets_allocated_read_positive(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
 
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		return percpu_counter_sum_positive(sk->sk_cgrp->sockets_allocated);
+
 	return percpu_counter_sum_positive(prot->sockets_allocated);
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9fbcff71245e..3de3901ae0a7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -379,7 +379,48 @@ enum mem_type {
 
 static void mem_cgroup_get(struct mem_cgroup *memcg);
 static void mem_cgroup_put(struct mem_cgroup *memcg);
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+
+/* Writing them here to avoid exposing memcg's inner layout */
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_INET
+#include <net/sock.h>
+
+static bool mem_cgroup_is_root(struct mem_cgroup *memcg);
+void sock_update_memcg(struct sock *sk)
+{
+	/* A socket spends its whole life in the same cgroup */
+	if (sk->sk_cgrp) {
+		WARN_ON(1);
+		return;
+	}
+	if (static_branch(&memcg_socket_limit_enabled)) {
+		struct mem_cgroup *memcg;
+
+		BUG_ON(!sk->sk_prot->proto_cgroup);
+
+		rcu_read_lock();
+		memcg = mem_cgroup_from_task(current);
+		if (!mem_cgroup_is_root(memcg)) {
+			mem_cgroup_get(memcg);
+			sk->sk_cgrp = sk->sk_prot->proto_cgroup(memcg);
+		}
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL(sock_update_memcg);
+
+void sock_release_memcg(struct sock *sk)
+{
+	if (static_branch(&memcg_socket_limit_enabled) && sk->sk_cgrp) {
+		struct mem_cgroup *memcg;
+		WARN_ON(!sk->sk_cgrp->memcg);
+		memcg = sk->sk_cgrp->memcg;
+		mem_cgroup_put(memcg);
+	}
+}
+#endif /* CONFIG_INET */
+#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+
 static void drain_all_stock_async(struct mem_cgroup *memcg);
 
 static struct mem_cgroup_per_zone *
@@ -4932,12 +4973,13 @@ static void mem_cgroup_put(struct mem_cgroup *memcg)
 /*
  * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
  */
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
+struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 {
 	if (!memcg->res.parent)
 		return NULL;
 	return mem_cgroup_from_res_counter(memcg->res.parent, res);
 }
+EXPORT_SYMBOL(parent_mem_cgroup);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 static void __init enable_swap_cgroup(void)
diff --git a/net/core/sock.c b/net/core/sock.c
index a3d4205e7238..6a871b8fdd20 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -111,6 +111,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/user_namespace.h>
+#include <linux/jump_label.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -142,6 +143,9 @@
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
+struct jump_label_key memcg_socket_limit_enabled;
+EXPORT_SYMBOL(memcg_socket_limit_enabled);
+
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
@@ -1711,23 +1715,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 	struct proto *prot = sk->sk_prot;
 	int amt = sk_mem_pages(size);
 	long allocated;
+	int parent_status = UNDER_LIMIT;
 
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
 
-	allocated = sk_memory_allocated_add(sk, amt);
+	allocated = sk_memory_allocated_add(sk, amt, &parent_status);
 
 	/* Under limit. */
-	if (allocated <= sk_prot_mem_limits(sk, 0)) {
+	if (parent_status == UNDER_LIMIT &&
+			allocated <= sk_prot_mem_limits(sk, 0)) {
 		sk_leave_memory_pressure(sk);
 		return 1;
 	}
 
-	/* Under pressure. */
-	if (allocated > sk_prot_mem_limits(sk, 1))
+	/* Under pressure. (we or our parents) */
+	if ((parent_status > SOFT_LIMIT) ||
+			allocated > sk_prot_mem_limits(sk, 1))
 		sk_enter_memory_pressure(sk);
 
-	/* Over hard limit. */
-	if (allocated > sk_prot_mem_limits(sk, 2))
+	/* Over hard limit (we or our parents) */
+	if ((parent_status == OVER_LIMIT) ||
+			(allocated > sk_prot_mem_limits(sk, 2)))
 		goto suppress_allocation;
 
 	/* guarantee minimum buffer size under pressure */
@@ -1774,7 +1782,7 @@ suppress_allocation:
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
 
-	sk_memory_allocated_sub(sk, amt);
+	sk_memory_allocated_sub(sk, amt, parent_status);
 
 	return 0;
 }
@@ -1787,7 +1795,7 @@ EXPORT_SYMBOL(__sk_mem_schedule);
 void __sk_mem_reclaim(struct sock *sk)
 {
 	sk_memory_allocated_sub(sk,
-				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
+				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0);
 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
 	if (sk_under_memory_pressure(sk) &&
-- 
cgit v1.2.3


From d1a4c0b37c296e600ffe08edb0db2dc1b8f550d7 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Sun, 11 Dec 2011 21:47:04 +0000
Subject: tcp memory pressure controls

This patch introduces memory pressure controls for the tcp
protocol. It uses the generic socket memory pressure code
introduced in earlier patches, and fills in the
necessary data in cg_proto struct.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujtisu.com>
CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/cgroups/memory.txt |  2 ++
 include/linux/memcontrol.h       |  1 +
 include/net/sock.h               |  2 ++
 include/net/tcp_memcontrol.h     | 17 +++++++++
 mm/memcontrol.c                  | 40 +++++++++++++++++++++-
 net/core/sock.c                  | 43 +++++++++++++++++++++--
 net/ipv4/Makefile                |  1 +
 net/ipv4/tcp_ipv4.c              |  9 ++++-
 net/ipv4/tcp_memcontrol.c        | 74 ++++++++++++++++++++++++++++++++++++++++
 net/ipv6/tcp_ipv6.c              |  5 +++
 10 files changed, 189 insertions(+), 5 deletions(-)
 create mode 100644 include/net/tcp_memcontrol.h
 create mode 100644 net/ipv4/tcp_memcontrol.c

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 23a8dc5319a3..687dea5bf1fd 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -293,6 +293,8 @@ to trigger slab reclaim when those limits are reached.
 thresholds. The Memory Controller allows them to be controlled individually
 per cgroup, instead of globally.
 
+* tcp memory pressure: sockets memory pressure for the tcp protocol.
+
 3. User Interface
 
 0. Configuration
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f15021b9f734..1513994ce207 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -86,6 +86,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
 
 extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
 
 static inline
 int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
diff --git a/include/net/sock.h b/include/net/sock.h
index d5eab256167c..18ecc9919d29 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -64,6 +64,8 @@
 #include <net/dst.h>
 #include <net/checksum.h>
 
+int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss);
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss);
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h
new file mode 100644
index 000000000000..5f5e1582d764
--- /dev/null
+++ b/include/net/tcp_memcontrol.h
@@ -0,0 +1,17 @@
+#ifndef _TCP_MEMCG_H
+#define _TCP_MEMCG_H
+
+struct tcp_memcontrol {
+	struct cg_proto cg_proto;
+	/* per-cgroup tcp memory pressure knobs */
+	struct res_counter tcp_memory_allocated;
+	struct percpu_counter tcp_sockets_allocated;
+	/* those two are read-mostly, leave them at the end */
+	long tcp_prot_mem[3];
+	int tcp_memory_pressure;
+};
+
+struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
+int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
+void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
+#endif /* _TCP_MEMCG_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3de3901ae0a7..7266202fa7cf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -50,6 +50,8 @@
 #include <linux/cpu.h>
 #include <linux/oom.h>
 #include "internal.h"
+#include <net/sock.h>
+#include <net/tcp_memcontrol.h>
 
 #include <asm/uaccess.h>
 
@@ -295,6 +297,10 @@ struct mem_cgroup {
 	 */
 	struct mem_cgroup_stat_cpu nocpu_base;
 	spinlock_t pcp_counter_lock;
+
+#ifdef CONFIG_INET
+	struct tcp_memcontrol tcp_mem;
+#endif
 };
 
 /* Stuffs for move charges at task migration. */
@@ -384,6 +390,7 @@ static void mem_cgroup_put(struct mem_cgroup *memcg);
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
 #ifdef CONFIG_INET
 #include <net/sock.h>
+#include <net/ip.h>
 
 static bool mem_cgroup_is_root(struct mem_cgroup *memcg);
 void sock_update_memcg(struct sock *sk)
@@ -418,6 +425,15 @@ void sock_release_memcg(struct sock *sk)
 		mem_cgroup_put(memcg);
 	}
 }
+
+struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
+{
+	if (!memcg || mem_cgroup_is_root(memcg))
+		return NULL;
+
+	return &memcg->tcp_mem.cg_proto;
+}
+EXPORT_SYMBOL(tcp_proto_cgroup);
 #endif /* CONFIG_INET */
 #endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
 
@@ -800,7 +816,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 	preempt_enable();
 }
 
-static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
+struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
 {
 	return container_of(cgroup_subsys_state(cont,
 				mem_cgroup_subsys_id), struct mem_cgroup,
@@ -4732,14 +4748,34 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
 
 	ret = cgroup_add_files(cont, ss, kmem_cgroup_files,
 			       ARRAY_SIZE(kmem_cgroup_files));
+
+	/*
+	 * Part of this would be better living in a separate allocation
+	 * function, leaving us with just the cgroup tree population work.
+	 * We, however, depend on state such as network's proto_list that
+	 * is only initialized after cgroup creation. I found the less
+	 * cumbersome way to deal with it to defer it all to populate time
+	 */
+	if (!ret)
+		ret = mem_cgroup_sockets_init(cont, ss);
 	return ret;
 };
 
+static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
+				struct cgroup *cont)
+{
+	mem_cgroup_sockets_destroy(cont, ss);
+}
 #else
 static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
 {
 	return 0;
 }
+
+static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
+				struct cgroup *cont)
+{
+}
 #endif
 
 static struct cftype mem_cgroup_files[] = {
@@ -5098,6 +5134,8 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
+	kmem_cgroup_destroy(ss, cont);
+
 	mem_cgroup_put(memcg);
 }
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 6a871b8fdd20..5a6a90620656 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -136,6 +136,46 @@
 #include <net/tcp.h>
 #endif
 
+static DEFINE_RWLOCK(proto_list_lock);
+static LIST_HEAD(proto_list);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	struct proto *proto;
+	int ret = 0;
+
+	read_lock(&proto_list_lock);
+	list_for_each_entry(proto, &proto_list, node) {
+		if (proto->init_cgroup) {
+			ret = proto->init_cgroup(cgrp, ss);
+			if (ret)
+				goto out;
+		}
+	}
+
+	read_unlock(&proto_list_lock);
+	return ret;
+out:
+	list_for_each_entry_continue_reverse(proto, &proto_list, node)
+		if (proto->destroy_cgroup)
+			proto->destroy_cgroup(cgrp, ss);
+	read_unlock(&proto_list_lock);
+	return ret;
+}
+
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	struct proto *proto;
+
+	read_lock(&proto_list_lock);
+	list_for_each_entry_reverse(proto, &proto_list, node)
+		if (proto->destroy_cgroup)
+			proto->destroy_cgroup(cgrp, ss);
+	read_unlock(&proto_list_lock);
+}
+#endif
+
 /*
  * Each address family might have different locking rules, so we have
  * one slock key per address family:
@@ -2291,9 +2331,6 @@ void sk_common_release(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_common_release);
 
-static DEFINE_RWLOCK(proto_list_lock);
-static LIST_HEAD(proto_list);
-
 #ifdef CONFIG_PROC_FS
 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
 struct prot_inuse {
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index e9d98e621112..ff75d3bbcd6a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f48bf312cfe8..42714cb1fef3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -73,6 +73,7 @@
 #include <net/xfrm.h>
 #include <net/netdma.h>
 #include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
 
 #include <linux/inet.h>
 #include <linux/ipv6.h>
@@ -1917,6 +1918,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
 	local_bh_disable();
+	sock_update_memcg(sk);
 	sk_sockets_allocated_inc(sk);
 	local_bh_enable();
 
@@ -1974,6 +1976,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	}
 
 	sk_sockets_allocated_dec(sk);
+	sock_release_memcg(sk);
 }
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
 
@@ -2634,10 +2637,14 @@ struct proto tcp_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+	.init_cgroup		= tcp_init_cgroup,
+	.destroy_cgroup		= tcp_destroy_cgroup,
+	.proto_cgroup		= tcp_proto_cgroup,
+#endif
 };
 EXPORT_SYMBOL(tcp_prot);
 
-
 static int __net_init tcp_sk_init(struct net *net)
 {
 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
new file mode 100644
index 000000000000..4a68d2c24556
--- /dev/null
+++ b/net/ipv4/tcp_memcontrol.c
@@ -0,0 +1,74 @@
+#include <net/tcp.h>
+#include <net/tcp_memcontrol.h>
+#include <net/sock.h>
+#include <linux/memcontrol.h>
+#include <linux/module.h>
+
+static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
+{
+	return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
+}
+
+static void memcg_tcp_enter_memory_pressure(struct sock *sk)
+{
+	if (!sk->sk_cgrp->memory_pressure)
+		*sk->sk_cgrp->memory_pressure = 1;
+}
+EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
+
+int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	/*
+	 * The root cgroup does not use res_counters, but rather,
+	 * rely on the data already collected by the network
+	 * subsystem
+	 */
+	struct res_counter *res_parent = NULL;
+	struct cg_proto *cg_proto, *parent_cg;
+	struct tcp_memcontrol *tcp;
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+
+	cg_proto = tcp_prot.proto_cgroup(memcg);
+	if (!cg_proto)
+		return 0;
+
+	tcp = tcp_from_cgproto(cg_proto);
+
+	tcp->tcp_prot_mem[0] = sysctl_tcp_mem[0];
+	tcp->tcp_prot_mem[1] = sysctl_tcp_mem[1];
+	tcp->tcp_prot_mem[2] = sysctl_tcp_mem[2];
+	tcp->tcp_memory_pressure = 0;
+
+	parent_cg = tcp_prot.proto_cgroup(parent);
+	if (parent_cg)
+		res_parent = parent_cg->memory_allocated;
+
+	res_counter_init(&tcp->tcp_memory_allocated, res_parent);
+	percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
+
+	cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
+	cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
+	cg_proto->sysctl_mem = tcp->tcp_prot_mem;
+	cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
+	cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
+	cg_proto->memcg = memcg;
+
+	return 0;
+}
+EXPORT_SYMBOL(tcp_init_cgroup);
+
+void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+	struct cg_proto *cg_proto;
+	struct tcp_memcontrol *tcp;
+
+	cg_proto = tcp_prot.proto_cgroup(memcg);
+	if (!cg_proto)
+		return;
+
+	tcp = tcp_from_cgproto(cg_proto);
+	percpu_counter_destroy(&tcp->tcp_sockets_allocated);
+}
+EXPORT_SYMBOL(tcp_destroy_cgroup);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b69c7030aba9..95d3cfb65d39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -62,6 +62,7 @@
 #include <net/netdma.h>
 #include <net/inet_common.h>
 #include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
 
 #include <asm/uaccess.h>
 
@@ -1994,6 +1995,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
 	local_bh_disable();
+	sock_update_memcg(sk);
 	sk_sockets_allocated_inc(sk);
 	local_bh_enable();
 
@@ -2227,6 +2229,9 @@ struct proto tcpv6_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+	.proto_cgroup		= tcp_proto_cgroup,
+#endif
 };
 
 static const struct inet6_protocol tcpv6_protocol = {
-- 
cgit v1.2.3


From 90b41a1cd44cc4e507b554ae5a36562a1ba9a4e8 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Mon, 12 Dec 2011 14:30:00 +0000
Subject: netem: add cell concept to simulate special MAC behavior

This extension can be used to simulate special link layer
characteristics. Simulate because packet data is not modified, only the
calculation base is changed to delay a packet based on the original
packet size and artificial cell information.

packet_overhead can be used to simulate a link layer header compression
scheme (e.g. set packet_overhead to -20) or with a positive
packet_overhead value an additional MAC header can be simulated. It is
also possible to "replace" the 14 byte Ethernet header with something
else.

cell_size and cell_overhead can be used to simulate link layer schemes,
based on cells, like some TDMA schemes. Another application area are MAC
schemes using a link layer fragmentation with a (small) header each.
Cell size is the maximum amount of data bytes within one cell. Cell
overhead is an additional variable to change the per-cell-overhead
(e.g.  5 byte header per fragment).

Example (5 kbit/s, 20 byte per packet overhead, cell-size 100 byte, per
cell overhead 5 byte):

  tc qdisc add dev eth0 root netem rate 5kbit 20 100 5

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  3 +++
 net/sched/sch_netem.c     | 33 +++++++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 8786ea741f52..8daced32a014 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -502,6 +502,9 @@ struct tc_netem_corrupt {
 
 struct tc_netem_rate {
 	__u32	rate;	/* byte/s */
+	__s32	packet_overhead;
+	__u32	cell_size;
+	__s32	cell_overhead;
 };
 
 enum {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3bfd73344f76..1fa2f903d221 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
+#include <linux/reciprocal_div.h>
 
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
@@ -80,6 +81,10 @@ struct netem_sched_data {
 	u32 reorder;
 	u32 corrupt;
 	u32 rate;
+	s32 packet_overhead;
+	u32 cell_size;
+	u32 cell_size_reciprocal;
+	s32 cell_overhead;
 
 	struct crndstate {
 		u32 last;
@@ -299,11 +304,23 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 }
 
-static psched_time_t packet_len_2_sched_time(unsigned int len, u32 rate)
+static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
 {
-	u64 ticks = (u64)len * NSEC_PER_SEC;
+	u64 ticks;
 
-	do_div(ticks, rate);
+	len += q->packet_overhead;
+
+	if (q->cell_size) {
+		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
+
+		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
+			cells++;
+		len = cells * (q->cell_size + q->cell_overhead);
+	}
+
+	ticks = (u64)len * NSEC_PER_SEC;
+
+	do_div(ticks, q->rate);
 	return PSCHED_NS2TICKS(ticks);
 }
 
@@ -384,7 +401,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		if (q->rate) {
 			struct sk_buff_head *list = &q->qdisc->q;
 
-			delay += packet_len_2_sched_time(skb->len, q->rate);
+			delay += packet_len_2_sched_time(skb->len, q);
 
 			if (!skb_queue_empty(list)) {
 				/*
@@ -568,6 +585,11 @@ static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
 	const struct tc_netem_rate *r = nla_data(attr);
 
 	q->rate = r->rate;
+	q->packet_overhead = r->packet_overhead;
+	q->cell_size = r->cell_size;
+	if (q->cell_size)
+		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
+	q->cell_overhead = r->cell_overhead;
 }
 
 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
@@ -909,6 +931,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 
 	rate.rate = q->rate;
+	rate.packet_overhead = q->packet_overhead;
+	rate.cell_size = q->cell_size;
+	rate.cell_overhead = q->cell_overhead;
 	NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
 
 	if (dump_loss_model(q, skb) != 0)
-- 
cgit v1.2.3


From 257058ae2b971646b96ab3a15605ac69186e562a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 12 Dec 2011 18:12:21 -0800
Subject: threadgroup: rename signal->threadgroup_fork_lock to ->group_rwsem

Make the following renames to prepare for extension of threadgroup
locking.

* s/signal->threadgroup_fork_lock/signal->group_rwsem/
* s/threadgroup_fork_read_lock()/threadgroup_change_begin()/
* s/threadgroup_fork_read_unlock()/threadgroup_change_end()/
* s/threadgroup_fork_write_lock()/threadgroup_lock()/
* s/threadgroup_fork_write_unlock()/threadgroup_unlock()/

This patch doesn't cause any behavior change.

-v2: Rename threadgroup_change_done() to threadgroup_change_end() per
     KAMEZAWA's suggestion.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Menage <paul@paulmenage.org>
---
 include/linux/init_task.h |  9 ++++-----
 include/linux/sched.h     | 30 +++++++++++++++---------------
 kernel/cgroup.c           | 13 ++++++-------
 kernel/fork.c             |  8 ++++----
 4 files changed, 29 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 94b1e356c02a..f4544b99efe4 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -23,11 +23,10 @@ extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
 #ifdef CONFIG_CGROUPS
-#define INIT_THREADGROUP_FORK_LOCK(sig)					\
-	.threadgroup_fork_lock =					\
-		__RWSEM_INITIALIZER(sig.threadgroup_fork_lock),
+#define INIT_GROUP_RWSEM(sig)						\
+	.group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem),
 #else
-#define INIT_THREADGROUP_FORK_LOCK(sig)
+#define INIT_GROUP_RWSEM(sig)
 #endif
 
 #define INIT_SIGNALS(sig) {						\
@@ -46,7 +45,7 @@ extern struct fs_struct init_fs;
 	},								\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
-	INIT_THREADGROUP_FORK_LOCK(sig)					\
+	INIT_GROUP_RWSEM(sig)						\
 }
 
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d81cce933869..8cd523202a3b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -635,13 +635,13 @@ struct signal_struct {
 #endif
 #ifdef CONFIG_CGROUPS
 	/*
-	 * The threadgroup_fork_lock prevents threads from forking with
+	 * The group_rwsem prevents threads from forking with
 	 * CLONE_THREAD while held for writing. Use this for fork-sensitive
 	 * threadgroup-wide operations. It's taken for reading in fork.c in
 	 * copy_process().
 	 * Currently only needed write-side by cgroups.
 	 */
-	struct rw_semaphore threadgroup_fork_lock;
+	struct rw_semaphore group_rwsem;
 #endif
 
 	int oom_adj;		/* OOM kill score adjustment (bit shift) */
@@ -2371,29 +2371,29 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
 	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
 }
 
-/* See the declaration of threadgroup_fork_lock in signal_struct. */
+/* See the declaration of group_rwsem in signal_struct. */
 #ifdef CONFIG_CGROUPS
-static inline void threadgroup_fork_read_lock(struct task_struct *tsk)
+static inline void threadgroup_change_begin(struct task_struct *tsk)
 {
-	down_read(&tsk->signal->threadgroup_fork_lock);
+	down_read(&tsk->signal->group_rwsem);
 }
-static inline void threadgroup_fork_read_unlock(struct task_struct *tsk)
+static inline void threadgroup_change_end(struct task_struct *tsk)
 {
-	up_read(&tsk->signal->threadgroup_fork_lock);
+	up_read(&tsk->signal->group_rwsem);
 }
-static inline void threadgroup_fork_write_lock(struct task_struct *tsk)
+static inline void threadgroup_lock(struct task_struct *tsk)
 {
-	down_write(&tsk->signal->threadgroup_fork_lock);
+	down_write(&tsk->signal->group_rwsem);
 }
-static inline void threadgroup_fork_write_unlock(struct task_struct *tsk)
+static inline void threadgroup_unlock(struct task_struct *tsk)
 {
-	up_write(&tsk->signal->threadgroup_fork_lock);
+	up_write(&tsk->signal->group_rwsem);
 }
 #else
-static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {}
-static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {}
-static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {}
-static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {}
+static inline void threadgroup_change_begin(struct task_struct *tsk) {}
+static inline void threadgroup_change_end(struct task_struct *tsk) {}
+static inline void threadgroup_lock(struct task_struct *tsk) {}
+static inline void threadgroup_unlock(struct task_struct *tsk) {}
 #endif
 
 #ifndef __HAVE_THREAD_FUNCTIONS
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6545fd61b10d..b409df3b2e9d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2003,8 +2003,8 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
  * @cgrp: the cgroup to attach to
  * @leader: the threadgroup leader task_struct of the group to be attached
  *
- * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will
- * take task_lock of each thread in leader's threadgroup individually in turn.
+ * Call holding cgroup_mutex and the group_rwsem of the leader. Will take
+ * task_lock of each thread in leader's threadgroup individually in turn.
  */
 int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 {
@@ -2030,8 +2030,8 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	 * step 0: in order to do expensive, possibly blocking operations for
 	 * every thread, we cannot iterate the thread group list, since it needs
 	 * rcu or tasklist locked. instead, build an array of all threads in the
-	 * group - threadgroup_fork_lock prevents new threads from appearing,
-	 * and if threads exit, this will just be an over-estimate.
+	 * group - group_rwsem prevents new threads from appearing, and if
+	 * threads exit, this will just be an over-estimate.
 	 */
 	group_size = get_nr_threads(leader);
 	/* flex_array supports very large thread-groups better than kmalloc. */
@@ -2249,7 +2249,6 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
 			cgroup_unlock();
 			return -ESRCH;
 		}
-
 		/*
 		 * even if we're attaching all tasks in the thread group, we
 		 * only need to check permissions on one of them.
@@ -2273,9 +2272,9 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
 	}
 
 	if (threadgroup) {
-		threadgroup_fork_write_lock(tsk);
+		threadgroup_lock(tsk);
 		ret = cgroup_attach_proc(cgrp, tsk);
-		threadgroup_fork_write_unlock(tsk);
+		threadgroup_unlock(tsk);
 	} else {
 		ret = cgroup_attach_task(cgrp, tsk);
 	}
diff --git a/kernel/fork.c b/kernel/fork.c
index 827808613847..d4ac9e3e0075 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -972,7 +972,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sched_autogroup_fork(sig);
 
 #ifdef CONFIG_CGROUPS
-	init_rwsem(&sig->threadgroup_fork_lock);
+	init_rwsem(&sig->group_rwsem);
 #endif
 
 	sig->oom_adj = current->signal->oom_adj;
@@ -1157,7 +1157,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->io_context = NULL;
 	p->audit_context = NULL;
 	if (clone_flags & CLONE_THREAD)
-		threadgroup_fork_read_lock(current);
+		threadgroup_change_begin(current);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
@@ -1372,7 +1372,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
 	if (clone_flags & CLONE_THREAD)
-		threadgroup_fork_read_unlock(current);
+		threadgroup_change_end(current);
 	perf_event_fork(p);
 	return p;
 
@@ -1407,7 +1407,7 @@ bad_fork_cleanup_policy:
 bad_fork_cleanup_cgroup:
 #endif
 	if (clone_flags & CLONE_THREAD)
-		threadgroup_fork_read_unlock(current);
+		threadgroup_change_end(current);
 	cgroup_exit(p, cgroup_callbacks_done);
 	delayacct_tsk_free(p);
 	module_put(task_thread_info(p)->exec_domain->module);
-- 
cgit v1.2.3


From 77e4ef99d1c596a31747668e5fd837f77b6349b6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 12 Dec 2011 18:12:21 -0800
Subject: threadgroup: extend threadgroup_lock() to cover exit and exec

threadgroup_lock() protected only protected against new addition to
the threadgroup, which was inherently somewhat incomplete and
problematic for its only user cgroup.  On-going migration could race
against exec and exit leading to interesting problems - the symmetry
between various attach methods, task exiting during method execution,
->exit() racing against attach methods, migrating task switching basic
properties during exec and so on.

This patch extends threadgroup_lock() such that it protects against
all three threadgroup altering operations - fork, exit and exec.  For
exit, threadgroup_change_begin/end() calls are added to exit_signals
around assertion of PF_EXITING.  For exec, threadgroup_[un]lock() are
updated to also grab and release cred_guard_mutex.

With this change, threadgroup_lock() guarantees that the target
threadgroup will remain stable - no new task will be added, no new
PF_EXITING will be set and exec won't happen.

The next patch will update cgroup so that it can take full advantage
of this change.

-v2: beefed up comment as suggested by Frederic.

-v3: narrowed scope of protection in exit path as suggested by
     Frederic.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Menage <paul@paulmenage.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 47 +++++++++++++++++++++++++++++++++++++++++------
 kernel/signal.c       | 10 ++++++++++
 2 files changed, 51 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8cd523202a3b..c0c5876c52c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -635,11 +635,13 @@ struct signal_struct {
 #endif
 #ifdef CONFIG_CGROUPS
 	/*
-	 * The group_rwsem prevents threads from forking with
-	 * CLONE_THREAD while held for writing. Use this for fork-sensitive
-	 * threadgroup-wide operations. It's taken for reading in fork.c in
-	 * copy_process().
-	 * Currently only needed write-side by cgroups.
+	 * group_rwsem prevents new tasks from entering the threadgroup and
+	 * member tasks from exiting,a more specifically, setting of
+	 * PF_EXITING.  fork and exit paths are protected with this rwsem
+	 * using threadgroup_change_begin/end().  Users which require
+	 * threadgroup to remain stable should use threadgroup_[un]lock()
+	 * which also takes care of exec path.  Currently, cgroup is the
+	 * only user.
 	 */
 	struct rw_semaphore group_rwsem;
 #endif
@@ -2371,7 +2373,6 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
 	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
 }
 
-/* See the declaration of group_rwsem in signal_struct. */
 #ifdef CONFIG_CGROUPS
 static inline void threadgroup_change_begin(struct task_struct *tsk)
 {
@@ -2381,13 +2382,47 @@ static inline void threadgroup_change_end(struct task_struct *tsk)
 {
 	up_read(&tsk->signal->group_rwsem);
 }
+
+/**
+ * threadgroup_lock - lock threadgroup
+ * @tsk: member task of the threadgroup to lock
+ *
+ * Lock the threadgroup @tsk belongs to.  No new task is allowed to enter
+ * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
+ * perform exec.  This is useful for cases where the threadgroup needs to
+ * stay stable across blockable operations.
+ *
+ * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
+ * synchronization.  While held, no new task will be added to threadgroup
+ * and no existing live task will have its PF_EXITING set.
+ *
+ * During exec, a task goes and puts its thread group through unusual
+ * changes.  After de-threading, exclusive access is assumed to resources
+ * which are usually shared by tasks in the same group - e.g. sighand may
+ * be replaced with a new one.  Also, the exec'ing task takes over group
+ * leader role including its pid.  Exclude these changes while locked by
+ * grabbing cred_guard_mutex which is used to synchronize exec path.
+ */
 static inline void threadgroup_lock(struct task_struct *tsk)
 {
+	/*
+	 * exec uses exit for de-threading nesting group_rwsem inside
+	 * cred_guard_mutex. Grab cred_guard_mutex first.
+	 */
+	mutex_lock(&tsk->signal->cred_guard_mutex);
 	down_write(&tsk->signal->group_rwsem);
 }
+
+/**
+ * threadgroup_unlock - unlock threadgroup
+ * @tsk: member task of the threadgroup to unlock
+ *
+ * Reverse threadgroup_lock().
+ */
 static inline void threadgroup_unlock(struct task_struct *tsk)
 {
 	up_write(&tsk->signal->group_rwsem);
+	mutex_unlock(&tsk->signal->cred_guard_mutex);
 }
 #else
 static inline void threadgroup_change_begin(struct task_struct *tsk) {}
diff --git a/kernel/signal.c b/kernel/signal.c
index b3f78d09a105..399c184bf0ae 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2359,8 +2359,15 @@ void exit_signals(struct task_struct *tsk)
 	int group_stop = 0;
 	sigset_t unblocked;
 
+	/*
+	 * @tsk is about to have PF_EXITING set - lock out users which
+	 * expect stable threadgroup.
+	 */
+	threadgroup_change_begin(tsk);
+
 	if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
 		tsk->flags |= PF_EXITING;
+		threadgroup_change_end(tsk);
 		return;
 	}
 
@@ -2370,6 +2377,9 @@ void exit_signals(struct task_struct *tsk)
 	 * see wants_signal(), do_signal_stop().
 	 */
 	tsk->flags |= PF_EXITING;
+
+	threadgroup_change_end(tsk);
+
 	if (!signal_pending(tsk))
 		goto out;
 
-- 
cgit v1.2.3


From 2f7ee5691eecb67c8108b92001a85563ea336ac5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 12 Dec 2011 18:12:21 -0800
Subject: cgroup: introduce cgroup_taskset and use it in subsys->can_attach(),
 cancel_attach() and attach()

Currently, there's no way to pass multiple tasks to cgroup_subsys
methods necessitating the need for separate per-process and per-task
methods.  This patch introduces cgroup_taskset which can be used to
pass multiple tasks and their associated cgroups to cgroup_subsys
methods.

Three methods - can_attach(), cancel_attach() and attach() - are
converted to use cgroup_taskset.  This unifies passed parameters so
that all methods have access to all information.  Conversions in this
patchset are identical and don't introduce any behavior change.

-v2: documentation updated as per Paul Menage's suggestion.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Menage <paul@paulmenage.org>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: James Morris <jmorris@namei.org>
---
 Documentation/cgroups/cgroups.txt | 31 ++++++++----
 include/linux/cgroup.h            | 28 +++++++++--
 kernel/cgroup.c                   | 99 +++++++++++++++++++++++++++++++++++----
 kernel/cgroup_freezer.c           |  2 +-
 kernel/cpuset.c                   | 18 +++----
 mm/memcontrol.c                   | 16 +++----
 security/device_cgroup.c          |  7 +--
 7 files changed, 158 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 9c452ef2328c..8a2f302327fa 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -594,15 +594,25 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
 called multiple times against a cgroup.
 
 int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	       struct task_struct *task)
+	       struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
 
-Called prior to moving a task into a cgroup; if the subsystem
-returns an error, this will abort the attach operation.  If a NULL
-task is passed, then a successful result indicates that *any*
-unspecified task can be moved into the cgroup. Note that this isn't
-called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex and it is ensured that either
+Called prior to moving one or more tasks into a cgroup; if the
+subsystem returns an error, this will abort the attach operation.
+@tset contains the tasks to be attached and is guaranteed to have at
+least one task in it.
+
+If there are multiple tasks in the taskset, then:
+  - it's guaranteed that all are from the same thread group
+  - @tset contains all tasks from the thread group whether or not
+    they're switching cgroups
+  - the first task is the leader
+
+Each @tset entry also contains the task's old cgroup and tasks which
+aren't switching cgroup can be skipped easily using the
+cgroup_taskset_for_each() iterator. Note that this isn't called on a
+fork. If this method returns 0 (success) then this should remain valid
+while the caller holds cgroup_mutex and it is ensured that either
 attach() or cancel_attach() will be called in future.
 
 int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk);
@@ -613,14 +623,14 @@ attached (possibly many when using cgroup_attach_proc). Called after
 can_attach.
 
 void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	       struct task_struct *task, bool threadgroup)
+		   struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
 
 Called when a task attach operation has failed after can_attach() has succeeded.
 A subsystem whose can_attach() has some side-effects should provide this
 function, so that the subsystem can implement a rollback. If not, not necessary.
 This will be called only about subsystems whose can_attach() operation have
-succeeded.
+succeeded. The parameters are identical to can_attach().
 
 void pre_attach(struct cgroup *cgrp);
 (cgroup_mutex held by caller)
@@ -629,11 +639,12 @@ For any non-per-thread attachment work that needs to happen before
 attach_task. Needed by cpuset.
 
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	    struct cgroup *old_cgrp, struct task_struct *task)
+	    struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
 
 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
+The parameters are identical to can_attach().
 
 void attach_task(struct cgroup *cgrp, struct task_struct *tsk);
 (cgroup_mutex held by caller)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1b7f9d525013..34256ad9e553 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -456,6 +456,28 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
 void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
 void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
 
+/*
+ * Control Group taskset, used to pass around set of tasks to cgroup_subsys
+ * methods.
+ */
+struct cgroup_taskset;
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
+struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset);
+int cgroup_taskset_size(struct cgroup_taskset *tset);
+
+/**
+ * cgroup_taskset_for_each - iterate cgroup_taskset
+ * @task: the loop cursor
+ * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all
+ * @tset: taskset to iterate
+ */
+#define cgroup_taskset_for_each(task, skip_cgrp, tset)			\
+	for ((task) = cgroup_taskset_first((tset)); (task);		\
+	     (task) = cgroup_taskset_next((tset)))			\
+		if (!(skip_cgrp) ||					\
+		    cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp))
+
 /*
  * Control Group subsystem type.
  * See Documentation/cgroups/cgroups.txt for details
@@ -467,14 +489,14 @@ struct cgroup_subsys {
 	int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			  struct task_struct *tsk);
+			  struct cgroup_taskset *tset);
 	int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			      struct task_struct *tsk);
+			      struct cgroup_taskset *tset);
 	void (*pre_attach)(struct cgroup *cgrp);
 	void (*attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-		       struct cgroup *old_cgrp, struct task_struct *tsk);
+		       struct cgroup_taskset *tset);
 	void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
 	void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			struct cgroup *old_cgrp, struct task_struct *task);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0f2d00519d37..41ee01e392e6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1757,11 +1757,85 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 }
 EXPORT_SYMBOL_GPL(cgroup_path);
 
+/*
+ * Control Group taskset
+ */
 struct task_and_cgroup {
 	struct task_struct	*task;
 	struct cgroup		*cgrp;
 };
 
+struct cgroup_taskset {
+	struct task_and_cgroup	single;
+	struct flex_array	*tc_array;
+	int			tc_array_len;
+	int			idx;
+	struct cgroup		*cur_cgrp;
+};
+
+/**
+ * cgroup_taskset_first - reset taskset and return the first task
+ * @tset: taskset of interest
+ *
+ * @tset iteration is initialized and the first task is returned.
+ */
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
+{
+	if (tset->tc_array) {
+		tset->idx = 0;
+		return cgroup_taskset_next(tset);
+	} else {
+		tset->cur_cgrp = tset->single.cgrp;
+		return tset->single.task;
+	}
+}
+EXPORT_SYMBOL_GPL(cgroup_taskset_first);
+
+/**
+ * cgroup_taskset_next - iterate to the next task in taskset
+ * @tset: taskset of interest
+ *
+ * Return the next task in @tset.  Iteration must have been initialized
+ * with cgroup_taskset_first().
+ */
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
+{
+	struct task_and_cgroup *tc;
+
+	if (!tset->tc_array || tset->idx >= tset->tc_array_len)
+		return NULL;
+
+	tc = flex_array_get(tset->tc_array, tset->idx++);
+	tset->cur_cgrp = tc->cgrp;
+	return tc->task;
+}
+EXPORT_SYMBOL_GPL(cgroup_taskset_next);
+
+/**
+ * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task
+ * @tset: taskset of interest
+ *
+ * Return the cgroup for the current (last returned) task of @tset.  This
+ * function must be preceded by either cgroup_taskset_first() or
+ * cgroup_taskset_next().
+ */
+struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
+{
+	return tset->cur_cgrp;
+}
+EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
+
+/**
+ * cgroup_taskset_size - return the number of tasks in taskset
+ * @tset: taskset of interest
+ */
+int cgroup_taskset_size(struct cgroup_taskset *tset)
+{
+	return tset->tc_array ? tset->tc_array_len : 1;
+}
+EXPORT_SYMBOL_GPL(cgroup_taskset_size);
+
+
 /*
  * cgroup_task_migrate - move a task from one cgroup to another.
  *
@@ -1842,6 +1916,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	struct cgroup_subsys *ss, *failed_ss = NULL;
 	struct cgroup *oldcgrp;
 	struct cgroupfs_root *root = cgrp->root;
+	struct cgroup_taskset tset = { };
 
 	/* @tsk either already exited or can't exit until the end */
 	if (tsk->flags & PF_EXITING)
@@ -1852,9 +1927,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	if (cgrp == oldcgrp)
 		return 0;
 
+	tset.single.task = tsk;
+	tset.single.cgrp = oldcgrp;
+
 	for_each_subsys(root, ss) {
 		if (ss->can_attach) {
-			retval = ss->can_attach(ss, cgrp, tsk);
+			retval = ss->can_attach(ss, cgrp, &tset);
 			if (retval) {
 				/*
 				 * Remember on which subsystem the can_attach()
@@ -1885,7 +1963,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 		if (ss->attach_task)
 			ss->attach_task(cgrp, tsk);
 		if (ss->attach)
-			ss->attach(ss, cgrp, oldcgrp, tsk);
+			ss->attach(ss, cgrp, &tset);
 	}
 
 	synchronize_rcu();
@@ -1907,7 +1985,7 @@ out:
 				 */
 				break;
 			if (ss->cancel_attach)
-				ss->cancel_attach(ss, cgrp, tsk);
+				ss->cancel_attach(ss, cgrp, &tset);
 		}
 	}
 	return retval;
@@ -2023,6 +2101,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	struct task_struct *tsk;
 	struct task_and_cgroup *tc;
 	struct flex_array *group;
+	struct cgroup_taskset tset = { };
 	/*
 	 * we need to make sure we have css_sets for all the tasks we're
 	 * going to move -before- we actually start moving them, so that in
@@ -2089,6 +2168,8 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	} while_each_thread(leader, tsk);
 	/* remember the number of threads in the array for later. */
 	group_size = i;
+	tset.tc_array = group;
+	tset.tc_array_len = group_size;
 	read_unlock(&tasklist_lock);
 
 	/* methods shouldn't be called if no task is actually migrating */
@@ -2101,7 +2182,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	 */
 	for_each_subsys(root, ss) {
 		if (ss->can_attach) {
-			retval = ss->can_attach(ss, cgrp, leader);
+			retval = ss->can_attach(ss, cgrp, &tset);
 			if (retval) {
 				failed_ss = ss;
 				goto out_cancel_attach;
@@ -2183,10 +2264,8 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	 * being moved, this call will need to be reworked to communicate that.
 	 */
 	for_each_subsys(root, ss) {
-		if (ss->attach) {
-			tc = flex_array_get(group, 0);
-			ss->attach(ss, cgrp, tc->cgrp, tc->task);
-		}
+		if (ss->attach)
+			ss->attach(ss, cgrp, &tset);
 	}
 
 	/*
@@ -2208,11 +2287,11 @@ out_cancel_attach:
 		for_each_subsys(root, ss) {
 			if (ss == failed_ss) {
 				if (cancel_failed_ss && ss->cancel_attach)
-					ss->cancel_attach(ss, cgrp, leader);
+					ss->cancel_attach(ss, cgrp, &tset);
 				break;
 			}
 			if (ss->cancel_attach)
-				ss->cancel_attach(ss, cgrp, leader);
+				ss->cancel_attach(ss, cgrp, &tset);
 		}
 	}
 out_put_tasks:
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e411a60cc2c8..e95c6fb65cc0 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -159,7 +159,7 @@ static void freezer_destroy(struct cgroup_subsys *ss,
  */
 static int freezer_can_attach(struct cgroup_subsys *ss,
 			      struct cgroup *new_cgroup,
-			      struct task_struct *task)
+			      struct cgroup_taskset *tset)
 {
 	struct freezer *freezer;
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9fe58c46a426..512bd59e8627 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1371,10 +1371,10 @@ static int fmeter_getrate(struct fmeter *fmp)
 }
 
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-			     struct task_struct *tsk)
+static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+			     struct cgroup_taskset *tset)
 {
-	struct cpuset *cs = cgroup_cs(cont);
+	struct cpuset *cs = cgroup_cs(cgrp);
 
 	if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
 		return -ENOSPC;
@@ -1387,7 +1387,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
 	 * set_cpus_allowed_ptr() on all attached tasks before cpus_allowed may
 	 * be changed.
 	 */
-	if (tsk->flags & PF_THREAD_BOUND)
+	if (cgroup_taskset_first(tset)->flags & PF_THREAD_BOUND)
 		return -EINVAL;
 
 	return 0;
@@ -1437,12 +1437,14 @@ static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
 	cpuset_update_task_spread_flag(cs, tsk);
 }
 
-static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-			  struct cgroup *oldcont, struct task_struct *tsk)
+static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+			  struct cgroup_taskset *tset)
 {
 	struct mm_struct *mm;
-	struct cpuset *cs = cgroup_cs(cont);
-	struct cpuset *oldcs = cgroup_cs(oldcont);
+	struct task_struct *tsk = cgroup_taskset_first(tset);
+	struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset);
+	struct cpuset *cs = cgroup_cs(cgrp);
+	struct cpuset *oldcs = cgroup_cs(oldcgrp);
 
 	/*
 	 * Change mm, possibly for multiple threads in a threadgroup. This is
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6aff93c98aca..81640e74a709 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5298,8 +5298,9 @@ static void mem_cgroup_clear_mc(void)
 
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
+	struct task_struct *p = cgroup_taskset_first(tset);
 	int ret = 0;
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
 
@@ -5337,7 +5338,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
 
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
 	mem_cgroup_clear_mc();
 }
@@ -5454,9 +5455,9 @@ retry:
 
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 				struct cgroup *cont,
-				struct cgroup *old_cont,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
+	struct task_struct *p = cgroup_taskset_first(tset);
 	struct mm_struct *mm = get_task_mm(p);
 
 	if (mm) {
@@ -5471,19 +5472,18 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 #else	/* !CONFIG_MMU */
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
 	return 0;
 }
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
 }
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 				struct cgroup *cont,
-				struct cgroup *old_cont,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
 }
 #endif
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 4450fbeec411..8b5b5d8612c6 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -62,11 +62,12 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
 struct cgroup_subsys devices_subsys;
 
 static int devcgroup_can_attach(struct cgroup_subsys *ss,
-		struct cgroup *new_cgroup, struct task_struct *task)
+			struct cgroup *new_cgrp, struct cgroup_taskset *set)
 {
-	if (current != task && !capable(CAP_SYS_ADMIN))
-			return -EPERM;
+	struct task_struct *task = cgroup_taskset_first(set);
 
+	if (current != task && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 494c167cf76d02000adf740c215adc69a824ecc9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 12 Dec 2011 18:12:22 -0800
Subject: cgroup: kill subsys->can_attach_task(), pre_attach() and
 attach_task()

These three methods are no longer used.  Kill them.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Menage <paul@paulmenage.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
---
 Documentation/cgroups/cgroups.txt | 20 ---------------
 include/linux/cgroup.h            |  3 ---
 kernel/cgroup.c                   | 52 ++++-----------------------------------
 3 files changed, 5 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 8a2f302327fa..a7c96ae5557c 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -615,13 +615,6 @@ fork. If this method returns 0 (success) then this should remain valid
 while the caller holds cgroup_mutex and it is ensured that either
 attach() or cancel_attach() will be called in future.
 
-int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk);
-(cgroup_mutex held by caller)
-
-As can_attach, but for operations that must be run once per task to be
-attached (possibly many when using cgroup_attach_proc). Called after
-can_attach.
-
 void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 		   struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
@@ -632,12 +625,6 @@ function, so that the subsystem can implement a rollback. If not, not necessary.
 This will be called only about subsystems whose can_attach() operation have
 succeeded. The parameters are identical to can_attach().
 
-void pre_attach(struct cgroup *cgrp);
-(cgroup_mutex held by caller)
-
-For any non-per-thread attachment work that needs to happen before
-attach_task. Needed by cpuset.
-
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	    struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
@@ -646,13 +633,6 @@ Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
 The parameters are identical to can_attach().
 
-void attach_task(struct cgroup *cgrp, struct task_struct *tsk);
-(cgroup_mutex held by caller)
-
-As attach, but for operations that must be run once per task to be attached,
-like can_attach_task. Called before attach. Currently does not support any
-subsystem that might need the old_cgrp for every thread in the group.
-
 void fork(struct cgroup_subsy *ss, struct task_struct *task)
 
 Called when a task is forked into a cgroup.
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 34256ad9e553..7ad5e406c421 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -490,11 +490,8 @@ struct cgroup_subsys {
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			  struct cgroup_taskset *tset);
-	int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			      struct cgroup_taskset *tset);
-	void (*pre_attach)(struct cgroup *cgrp);
-	void (*attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 		       struct cgroup_taskset *tset);
 	void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 41ee01e392e6..1b3b84174ead 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1944,13 +1944,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 				goto out;
 			}
 		}
-		if (ss->can_attach_task) {
-			retval = ss->can_attach_task(cgrp, tsk);
-			if (retval) {
-				failed_ss = ss;
-				goto out;
-			}
-		}
 	}
 
 	retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
@@ -1958,10 +1951,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 		goto out;
 
 	for_each_subsys(root, ss) {
-		if (ss->pre_attach)
-			ss->pre_attach(cgrp);
-		if (ss->attach_task)
-			ss->attach_task(cgrp, tsk);
 		if (ss->attach)
 			ss->attach(ss, cgrp, &tset);
 	}
@@ -2093,7 +2082,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 {
 	int retval, i, group_size, nr_migrating_tasks;
 	struct cgroup_subsys *ss, *failed_ss = NULL;
-	bool cancel_failed_ss = false;
 	/* guaranteed to be initialized later, but the compiler needs this */
 	struct css_set *oldcg;
 	struct cgroupfs_root *root = cgrp->root;
@@ -2188,21 +2176,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 				goto out_cancel_attach;
 			}
 		}
-		/* a callback to be run on every thread in the threadgroup. */
-		if (ss->can_attach_task) {
-			/* run on each task in the threadgroup. */
-			for (i = 0; i < group_size; i++) {
-				tc = flex_array_get(group, i);
-				if (tc->cgrp == cgrp)
-					continue;
-				retval = ss->can_attach_task(cgrp, tc->task);
-				if (retval) {
-					failed_ss = ss;
-					cancel_failed_ss = true;
-					goto out_cancel_attach;
-				}
-			}
-		}
 	}
 
 	/*
@@ -2234,15 +2207,10 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	}
 
 	/*
-	 * step 3: now that we're guaranteed success wrt the css_sets, proceed
-	 * to move all tasks to the new cgroup, calling ss->attach_task for each
-	 * one along the way. there are no failure cases after here, so this is
-	 * the commit point.
+	 * step 3: now that we're guaranteed success wrt the css_sets,
+	 * proceed to move all tasks to the new cgroup.  There are no
+	 * failure cases after here, so this is the commit point.
 	 */
-	for_each_subsys(root, ss) {
-		if (ss->pre_attach)
-			ss->pre_attach(cgrp);
-	}
 	for (i = 0; i < group_size; i++) {
 		tc = flex_array_get(group, i);
 		/* leave current thread as it is if it's already there */
@@ -2250,18 +2218,11 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 			continue;
 		retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true);
 		BUG_ON(retval);
-		/* attach each task to each subsystem */
-		for_each_subsys(root, ss) {
-			if (ss->attach_task)
-				ss->attach_task(cgrp, tc->task);
-		}
 	}
 	/* nothing is sensitive to fork() after this point. */
 
 	/*
-	 * step 4: do expensive, non-thread-specific subsystem callbacks.
-	 * TODO: if ever a subsystem needs to know the oldcgrp for each task
-	 * being moved, this call will need to be reworked to communicate that.
+	 * step 4: do subsystem attach callbacks.
 	 */
 	for_each_subsys(root, ss) {
 		if (ss->attach)
@@ -2285,11 +2246,8 @@ out_cancel_attach:
 	/* same deal as in cgroup_attach_task */
 	if (retval) {
 		for_each_subsys(root, ss) {
-			if (ss == failed_ss) {
-				if (cancel_failed_ss && ss->cancel_attach)
-					ss->cancel_attach(ss, cgrp, &tset);
+			if (ss == failed_ss)
 				break;
-			}
 			if (ss->cancel_attach)
 				ss->cancel_attach(ss, cgrp, &tset);
 		}
-- 
cgit v1.2.3


From 13c07b0286d340275f2d97adf085cecda37ede37 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 12 Dec 2011 22:06:55 -0800
Subject: linux/log2.h: Fix rounddown_pow_of_two(1)

Exactly like roundup_pow_of_two(1), the rounddown version was buggy for
the case of a compile-time constant '1' argument.  Probably because it
originated from the same code, sharing history with the roundup version
from before the bugfix (for that one, see commit 1a06a52ee1b0: "Fix
roundup_pow_of_two(1)").

However, unlike the roundup version, the fix for rounddown is to just
remove the broken special case entirely.  It's simply not needed - the
generic code

    1UL << ilog2(n)

does the right thing for the constant '1' argment too.  The only reason
roundup needed that special case was because rounding up does so by
subtracting one from the argument (and then adding one to the result)
causing the obvious problems with "ilog2(0)".

But rounddown doesn't do any of that, since ilog2() naturally truncates
(ie "rounds down") to the right rounded down value.  And without the
ilog2(0) case, there's no reason for the special case that had the wrong
value.

tl;dr: rounddown_pow_of_two(1) should be 1, not 0.

Acked-by: Dmitry Torokhov <dtor@vmware.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/log2.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/log2.h b/include/linux/log2.h
index 25b808631cd9..fd7ff3d91e6a 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -185,7 +185,6 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 #define rounddown_pow_of_two(n)			\
 (						\
 	__builtin_constant_p(n) ? (		\
-		(n == 1) ? 0 :			\
 		(1UL << ilog2(n))) :		\
 	__rounddown_pow_of_two(n)		\
  )
-- 
cgit v1.2.3


From 6682bb86fe1f3dba7e9cb1f0955775604599cea8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 09:26:45 +0000
Subject: UAPI: Guard linux/sound.h

Place reinclusion guards on linux/sound.h otherwise the UAPI splitter script
won't insert a #include to make the kernel header include the UAPI header.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/sound.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sound.h b/include/linux/sound.h
index 44dcf0570432..fae20ba01fbf 100644
--- a/include/linux/sound.h
+++ b/include/linux/sound.h
@@ -1,3 +1,5 @@
+#ifndef _LINUX_SOUND_H
+#define _LINUX_SOUND_H
 
 /*
  * Minor numbers for the sound driver.
@@ -42,3 +44,5 @@ extern void unregister_sound_mixer(int unit);
 extern void unregister_sound_midi(int unit);
 extern void unregister_sound_dsp(int unit);
 #endif /* __KERNEL__ */
+
+#endif /* _LINUX_SOUND_H */
-- 
cgit v1.2.3


From c15a48d60697f06a847c5862b94a6e24d1b48a7b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 09:26:45 +0000
Subject: UAPI: Guard linux/isdn_divertif.h

Place reinclusion guards on linux/isdn_divertif.h otherwise the UAPI splitter
script won't insert the #include to include the UAPI header from the kernel
header.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/isdn_divertif.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/isdn_divertif.h b/include/linux/isdn_divertif.h
index 07821ca5955f..a5a50f523807 100644
--- a/include/linux/isdn_divertif.h
+++ b/include/linux/isdn_divertif.h
@@ -10,6 +10,8 @@
  *
  */
 
+#ifndef _LINUX_ISDN_DIVERTIF_H
+#define _LINUX_ISDN_DIVERTIF_H
 
 /***********************************************************/
 /* magic value is also used to control version information */
@@ -45,3 +47,5 @@ typedef struct
 /*********************/
 extern int DIVERT_REG_NAME(isdn_divert_if *);
 #endif
+
+#endif /* _LINUX_ISDN_DIVERTIF_H */
-- 
cgit v1.2.3


From 6c9f2ef9b5b3a84952ba2f99472f82e3398ac3e9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 09:26:45 +0000
Subject: UAPI: Guard linux/pmu.h

Place reinclusion guards on linux/pmu.h otherwise the UAPI splitter won't
insert the #include to include the UAPI header from the kernel header.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/pmu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pmu.h b/include/linux/pmu.h
index cafe98d96948..84e6a55a1202 100644
--- a/include/linux/pmu.h
+++ b/include/linux/pmu.h
@@ -6,6 +6,8 @@
  * Copyright (C) 1998 Paul Mackerras.
  */
 
+#ifndef _LINUX_PMU_H
+#define _LINUX_PMU_H
 
 #define PMU_DRIVER_VERSION	2
 
@@ -207,3 +209,5 @@ extern int pmu_sys_suspended;
 #endif
 
 #endif	/* __KERNEL__ */
+
+#endif /* _LINUX_PMU_H */
-- 
cgit v1.2.3


From fde28451359b476ba065a9a16fa83aa44168fe59 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 09:26:45 +0000
Subject: UAPI: Guard linux/cuda.h

Place reinclusion guards on linux/cuda.h otherwise the UAPI splitter script
won't insert a #include to make the kernel header include the UAPI header.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/cuda.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cuda.h b/include/linux/cuda.h
index 6a3e6385d3f3..9f9865ff781e 100644
--- a/include/linux/cuda.h
+++ b/include/linux/cuda.h
@@ -5,6 +5,9 @@
  * Copyright (C) 1996 Paul Mackerras.
  */
 
+#ifndef _LINUX_CUDA_H
+#define _LINUX_CUDA_H
+
 /* CUDA commands (2nd byte) */
 #define CUDA_WARM_START		0
 #define CUDA_AUTOPOLL		1
@@ -34,3 +37,5 @@ extern int cuda_request(struct adb_request *req,
 extern void cuda_poll(void);
 
 #endif	/* __KERNEL */
+
+#endif /* _LINUX_CUDA_H */
-- 
cgit v1.2.3


From b18da0c56e9ff43a007b6c8e302c62e720964151 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 13 Dec 2011 11:58:49 +0100
Subject: fuse: support ioctl on directories

Multiplexing filesystems may want to support ioctls on the underlying
files and directores (e.g. FS_IOC_{GET,SET}FLAGS).

Ioctl support on directories was missing so add it now.

Reported-by: Antonio SJ Musumeci <bile@landofbile.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c        | 26 ++++++++++++++++++++++++++
 fs/fuse/file.c       |  8 ++++----
 fs/fuse/fuse_i.h     |  2 ++
 include/linux/fuse.h |  7 ++++++-
 4 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 9f63e493a9b6..344577933f62 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1182,6 +1182,30 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
 	return fuse_fsync_common(file, start, end, datasync, 1);
 }
 
+static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
+
+	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
+	if (fc->minor < 18)
+		return -ENOTTY;
+
+	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
+}
+
+static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
+				   unsigned long arg)
+{
+	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
+
+	if (fc->minor < 18)
+		return -ENOTTY;
+
+	return fuse_ioctl_common(file, cmd, arg,
+				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
+}
+
 static bool update_mtime(unsigned ivalid)
 {
 	/* Always update if mtime is explicitly set  */
@@ -1596,6 +1620,8 @@ static const struct file_operations fuse_dir_operations = {
 	.open		= fuse_dir_open,
 	.release	= fuse_dir_release,
 	.fsync		= fuse_dir_fsync,
+	.unlocked_ioctl	= fuse_dir_ioctl,
+	.compat_ioctl	= fuse_dir_compat_ioctl,
 };
 
 static const struct inode_operations fuse_common_inode_operations = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c297425cba71..4a199fd93fbd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1926,8 +1926,8 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 }
 EXPORT_SYMBOL_GPL(fuse_do_ioctl);
 
-static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
-				   unsigned long arg, unsigned int flags)
+long fuse_ioctl_common(struct file *file, unsigned int cmd,
+		       unsigned long arg, unsigned int flags)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1944,13 +1944,13 @@ static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
 static long fuse_file_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
-	return fuse_file_ioctl_common(file, cmd, arg, 0);
+	return fuse_ioctl_common(file, cmd, arg, 0);
 }
 
 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
 				   unsigned long arg)
 {
-	return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
+	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
 }
 
 /*
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index cf6db0a93219..09337bcc2554 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -765,6 +765,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 		       size_t count, loff_t *ppos, int write);
 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 		   unsigned int flags);
+long fuse_ioctl_common(struct file *file, unsigned int cmd,
+		       unsigned long arg, unsigned int flags);
 unsigned fuse_file_poll(struct file *file, poll_table *wait);
 int fuse_dev_release(struct inode *inode, struct file *file);
 
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 464cff526860..446c89718b9c 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -50,6 +50,9 @@
  *
  * 7.17
  *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ *  - add FUSE_IOCTL_DIR flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -81,7 +84,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 17
+#define FUSE_KERNEL_MINOR_VERSION 18
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -214,6 +217,7 @@ struct fuse_file_lock {
  * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
  * FUSE_IOCTL_RETRY: retry with new iovecs
  * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
  *
  * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
  */
@@ -221,6 +225,7 @@ struct fuse_file_lock {
 #define FUSE_IOCTL_UNRESTRICTED	(1 << 1)
 #define FUSE_IOCTL_RETRY	(1 << 2)
 #define FUSE_IOCTL_32BIT	(1 << 3)
+#define FUSE_IOCTL_DIR		(1 << 4)
 
 #define FUSE_IOCTL_MAX_IOV	256
 
-- 
cgit v1.2.3


From 451d0f599934fd97faf54a5d7954b518e66192cb Mon Sep 17 00:00:00 2001
From: John Muir <john@jmuir.com>
Date: Tue, 6 Dec 2011 21:50:06 +0100
Subject: FUSE: Notifying the kernel of deletion.

Allows a FUSE file-system to tell the kernel when a file or directory is
deleted. If the specified dentry has the specified inode number, the kernel will
unhash it.

The current 'fuse_notify_inval_entry' does not cause the kernel to clean up
directories that are in use properly, and as a result the users of those
directories see incorrect semantics from the file-system. The error condition
seen when 'fuse_notify_inval_entry' is used to notify of a deleted directory is
avoided when 'fuse_notify_delete' is used instead.

The following scenario demonstrates the difference:
1. User A chdirs into 'testdir' and starts reading 'testfile'.
2. User B rm -rf 'testdir'.
3. User B creates 'testdir'.
4. User C chdirs into 'testdir'.

If you run the above within the same machine on any file-system (including fuse
file-systems), there is no problem: user C is able to chdir into the new
testdir. The old testdir is removed from the dentry tree, but still open by user
A.

If operations 2 and 3 are performed via the network such that the fuse
file-system uses one of the notify functions to tell the kernel that the nodes
are gone, then the following error occurs for user C while user A holds the
original directory open:

muirj@empacher:~> ls /test/testdir
ls: cannot access /test/testdir: No such file or directory

The issue here is that the kernel still has a dentry for testdir, and so it is
requesting the attributes for the old directory, while the file-system is
responding that the directory no longer exists.

If on the other hand, if the file-system can notify the kernel that the
directory is deleted using the new 'fuse_notify_delete' function, then the above
ls will find the new directory as expected.

Signed-off-by: John Muir <john@jmuir.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/fuse/dir.c        | 32 +++++++++++++++++++++++++++--
 fs/fuse/fuse_i.h     |  8 +++++++-
 include/linux/fuse.h |  9 +++++++++
 4 files changed, 102 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 2aaf3eaaf13d..5f3368ab0fa9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1378,7 +1378,59 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
 	down_read(&fc->killsb);
 	err = -ENOENT;
 	if (fc->sb)
-		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
+	up_read(&fc->killsb);
+	kfree(buf);
+	return err;
+
+err:
+	kfree(buf);
+	fuse_copy_finish(cs);
+	return err;
+}
+
+static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
+			      struct fuse_copy_state *cs)
+{
+	struct fuse_notify_delete_out outarg;
+	int err = -ENOMEM;
+	char *buf;
+	struct qstr name;
+
+	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
+	if (!buf)
+		goto err;
+
+	err = -EINVAL;
+	if (size < sizeof(outarg))
+		goto err;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto err;
+
+	err = -ENAMETOOLONG;
+	if (outarg.namelen > FUSE_NAME_MAX)
+		goto err;
+
+	err = -EINVAL;
+	if (size != sizeof(outarg) + outarg.namelen + 1)
+		goto err;
+
+	name.name = buf;
+	name.len = outarg.namelen;
+	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+	if (err)
+		goto err;
+	fuse_copy_finish(cs);
+	buf[outarg.namelen] = 0;
+	name.hash = full_name_hash(name.name, name.len);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (fc->sb)
+		err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
+					       outarg.child, &name);
 	up_read(&fc->killsb);
 	kfree(buf);
 	return err;
@@ -1597,6 +1649,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 	case FUSE_NOTIFY_RETRIEVE:
 		return fuse_notify_retrieve(fc, size, cs);
 
+	case FUSE_NOTIFY_DELETE:
+		return fuse_notify_delete(fc, size, cs);
+
 	default:
 		fuse_copy_finish(cs);
 		return -EINVAL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 344577933f62..bef8c3011d31 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -868,7 +868,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
 }
 
 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
-			     struct qstr *name)
+			     u64 child_nodeid, struct qstr *name)
 {
 	int err = -ENOTDIR;
 	struct inode *parent;
@@ -895,8 +895,36 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
 
 	fuse_invalidate_attr(parent);
 	fuse_invalidate_entry(entry);
+
+	if (child_nodeid != 0 && entry->d_inode) {
+		mutex_lock(&entry->d_inode->i_mutex);
+		if (get_node_id(entry->d_inode) != child_nodeid) {
+			err = -ENOENT;
+			goto badentry;
+		}
+		if (d_mountpoint(entry)) {
+			err = -EBUSY;
+			goto badentry;
+		}
+		if (S_ISDIR(entry->d_inode->i_mode)) {
+			shrink_dcache_parent(entry);
+			if (!simple_empty(entry)) {
+				err = -ENOTEMPTY;
+				goto badentry;
+			}
+			entry->d_inode->i_flags |= S_DEAD;
+		}
+		dont_mount(entry);
+		clear_nlink(entry->d_inode);
+		err = 0;
+ badentry:
+		mutex_unlock(&entry->d_inode->i_mutex);
+		if (!err)
+			d_delete(entry);
+	} else {
+		err = 0;
+	}
 	dput(entry);
-	err = 0;
 
  unlock:
 	mutex_unlock(&parent->i_mutex);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 09337bcc2554..a571584a091a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -755,9 +755,15 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
 /**
  * File-system tells the kernel to invalidate parent attributes and
  * the dentry matching parent/name.
+ *
+ * If the child_nodeid is non-zero and:
+ *    - matches the inode number for the dentry matching parent/name,
+ *    - is not a mount point
+ *    - is a file or oan empty directory
+ * then the dentry is unhashed (d_delete()).
  */
 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
-			     struct qstr *name);
+			     u64 child_nodeid, struct qstr *name);
 
 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 		 bool isdir);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 446c89718b9c..8ba2c9460b28 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -53,6 +53,7 @@
  *
  * 7.18
  *  - add FUSE_IOCTL_DIR flag
+ *  - add FUSE_NOTIFY_DELETE
  */
 
 #ifndef _LINUX_FUSE_H
@@ -288,6 +289,7 @@ enum fuse_notify_code {
 	FUSE_NOTIFY_INVAL_ENTRY = 3,
 	FUSE_NOTIFY_STORE = 4,
 	FUSE_NOTIFY_RETRIEVE = 5,
+	FUSE_NOTIFY_DELETE = 6,
 	FUSE_NOTIFY_CODE_MAX,
 };
 
@@ -611,6 +613,13 @@ struct fuse_notify_inval_entry_out {
 	__u32	padding;
 };
 
+struct fuse_notify_delete_out {
+	__u64	parent;
+	__u64	child;
+	__u32	namelen;
+	__u32	padding;
+};
+
 struct fuse_notify_store_out {
 	__u64	nodeid;
 	__u64	offset;
-- 
cgit v1.2.3


From a648bd0c9f613d8f9954eccff6009ecfb26e2722 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 15:07:49 +0000
Subject: UAPI: Make linux/patchkey.h easier to parse

Make linux/patchkey.h easier to parse by making the #elif case associated with
the __KERNEL__ guard a nested #if in a #else of the __KERNEL__ guard.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/patchkey.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/patchkey.h b/include/linux/patchkey.h
index d974a6e92372..aefda0ec6e62 100644
--- a/include/linux/patchkey.h
+++ b/include/linux/patchkey.h
@@ -32,7 +32,8 @@
 #  else
 #    error "could not determine byte order"
 #  endif
-#elif defined(__BYTE_ORDER)
+#else
+#if defined(__BYTE_ORDER)
 #  if __BYTE_ORDER == __BIG_ENDIAN
 #    define _PATCHKEY(id) (0xfd00|id)
 #  elif __BYTE_ORDER == __LITTLE_ENDIAN
@@ -41,5 +42,6 @@
 #    error "could not determine byte order"
 #  endif
 #endif
+#endif
 
 #endif /* _LINUX_PATCHKEY_H */
-- 
cgit v1.2.3


From 989e986f5b1748fe3ff714954dc0d23780c43a9c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 15:07:49 +0000
Subject: UAPI: Fix AHZ multiple inclusion when __KERNEL__ is removed

Fix AHZ multiple inclusion when __KERNEL__ is removed as part of the separation
of the userspace headers from the kernel headers.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/acct.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 3e4737fa6cce..d537aa0ec414 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -146,6 +146,9 @@ extern void acct_exit_ns(struct pid_namespace *);
  *
  */
 
+#undef ACCT_VERSION
+#undef AHZ
+
 #ifdef CONFIG_BSD_PROCESS_ACCT_V3
 #define ACCT_VERSION	3
 #define AHZ		100
-- 
cgit v1.2.3


From fdc29805bd7cae133303045fc0249d76f3827613 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 15:07:49 +0000
Subject: UAPI: Don't have a #elif clause in a __KERNEL__ guard in
 linux/soundcard.h

Don't have a #elif clause in a __KERNEL__ guard in linux/soundcard.h to make
parsing easier.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/soundcard.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soundcard.h b/include/linux/soundcard.h
index fe204fe39f7c..dfcf86f013a9 100644
--- a/include/linux/soundcard.h
+++ b/include/linux/soundcard.h
@@ -198,7 +198,8 @@ typedef struct seq_event_rec {
 #  else
 #    error "could not determine byte order"
 #  endif
-#elif defined(__BYTE_ORDER)
+#else
+# if defined(__BYTE_ORDER)
 #  if __BYTE_ORDER == __BIG_ENDIAN
 #    define AFMT_S16_NE AFMT_S16_BE
 #  elif __BYTE_ORDER == __LITTLE_ENDIAN
@@ -206,6 +207,7 @@ typedef struct seq_event_rec {
 #  else
 #    error "could not determine byte order"
 #  endif
+# endif
 #endif
 
 /*
-- 
cgit v1.2.3


From 1632b9e2a14ce9f4e08faf6c4380431d63319bd3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 15:07:49 +0000
Subject: UAPI: Split trivial #if defined(__KERNEL__) && X conditionals

Split trivial #if defined(__KERNEL__) && X conditionals to make automated
disintegration easier.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 arch/arm/include/asm/hwcap.h       |  4 +++-
 arch/arm/include/asm/swab.h        |  7 +++++--
 arch/arm/include/asm/unistd.h      |  4 +++-
 arch/ia64/include/asm/intrinsics.h | 21 +++++++++++++--------
 arch/mips/include/asm/types.h      | 10 +++++++---
 arch/s390/include/asm/mman.h       |  4 +++-
 arch/tile/include/asm/signal.h     |  4 +++-
 include/linux/mroute6.h            |  4 +++-
 8 files changed, 40 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index c93a22a8b924..917626128a1d 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -25,7 +25,8 @@
 #define HWCAP_IDIVT	(1 << 18)
 #define HWCAP_IDIV	(HWCAP_IDIVA | HWCAP_IDIVT)
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__)
+#if !defined(__ASSEMBLY__)
 /*
  * This yields a mask that user programs can use to figure out what
  * instruction set this cpu supports.
@@ -33,5 +34,6 @@
 #define ELF_HWCAP	(elf_hwcap)
 extern unsigned int elf_hwcap;
 #endif
+#endif
 
 #endif
diff --git a/arch/arm/include/asm/swab.h b/arch/arm/include/asm/swab.h
index 9997ad20eff1..e82adf64d1dd 100644
--- a/arch/arm/include/asm/swab.h
+++ b/arch/arm/include/asm/swab.h
@@ -22,7 +22,8 @@
 #  define __SWAB_64_THRU_32__
 #endif
 
-#if defined(__KERNEL__) && __LINUX_ARM_ARCH__ >= 6
+#if defined(__KERNEL__)
+#if __LINUX_ARM_ARCH__ >= 6
 
 static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
 {
@@ -38,8 +39,10 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
 }
 #define __arch_swab32 __arch_swab32
 
-#else
+#endif
+#endif
 
+#if !defined(__KERNEL__) || __LINUX_ARM_ARCH__ < 6
 static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
 {
 	__u32 t;
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 4a1123783806..512cd1473454 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -427,7 +427,8 @@
 /*
  * The following syscalls are obsolete and no longer available for EABI.
  */
-#if defined(__ARM_EABI__) && !defined(__KERNEL__)
+#if !defined(__KERNEL__)
+#if defined(__ARM_EABI__)
 #undef __NR_time
 #undef __NR_umount
 #undef __NR_stime
@@ -441,6 +442,7 @@
 #undef __NR_syscall
 #undef __NR_ipc
 #endif
+#endif
 
 #ifdef __KERNEL__
 
diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h
index 111ed5222892..e4076b511829 100644
--- a/arch/ia64/include/asm/intrinsics.h
+++ b/arch/ia64/include/asm/intrinsics.h
@@ -201,16 +201,21 @@ extern long ia64_cmpxchg_called_with_bad_pointer (void);
 #endif
 
 #ifndef __ASSEMBLY__
-#if defined(CONFIG_PARAVIRT) && defined(__KERNEL__)
-#ifdef ASM_SUPPORTED
-# define IA64_INTRINSIC_API(name)	paravirt_ ## name
-#else
-# define IA64_INTRINSIC_API(name)	pv_cpu_ops.name
-#endif
-#define IA64_INTRINSIC_MACRO(name)	paravirt_ ## name
-#else
+
 #define IA64_INTRINSIC_API(name)	ia64_native_ ## name
 #define IA64_INTRINSIC_MACRO(name)	ia64_native_ ## name
+
+#if defined(__KERNEL__)
+#if defined(CONFIG_PARAVIRT)
+# undef IA64_INTRINSIC_API
+# undef IA64_INTRINSIC_MACRO
+# ifdef ASM_SUPPORTED
+#  define IA64_INTRINSIC_API(name)	paravirt_ ## name
+# else
+#  define IA64_INTRINSIC_API(name)	pv_cpu_ops.name
+# endif
+#define IA64_INTRINSIC_MACRO(name)	paravirt_ ## name
+#endif
 #endif
 
 /************************************************/
diff --git a/arch/mips/include/asm/types.h b/arch/mips/include/asm/types.h
index 533812b61881..9b96461bc1e7 100644
--- a/arch/mips/include/asm/types.h
+++ b/arch/mips/include/asm/types.h
@@ -15,10 +15,14 @@
  * We don't use int-l64.h for the kernel anymore but still use it for
  * userspace to avoid code changes.
  */
-#if (_MIPS_SZLONG == 64) && !defined(__KERNEL__)
-# include <asm-generic/int-l64.h>
-#else
+#ifdef __KERNEL__
 # include <asm-generic/int-ll64.h>
+#else
+# if _MIPS_SZLONG == 64
+#  include <asm-generic/int-l64.h>
+# else
+#  include <asm-generic/int-ll64.h>
+# endif
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index 4e9c8ae0a637..d49760e63506 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -11,9 +11,11 @@
 
 #include <asm-generic/mman.h>
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
+#if defined(__KERNEL__)
+#if !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
 int s390_mmap_check(unsigned long addr, unsigned long len);
 #define arch_mmap_check(addr,len,flags)	s390_mmap_check(addr,len)
 #endif
+#endif
 
 #endif /* __S390_MMAN_H__ */
diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
index 1e1e616783eb..1e5e49aad548 100644
--- a/arch/tile/include/asm/signal.h
+++ b/arch/tile/include/asm/signal.h
@@ -23,7 +23,8 @@
 
 #include <asm-generic/signal.h>
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__)
+#if !defined(__ASSEMBLY__)
 struct pt_regs;
 int restore_sigcontext(struct pt_regs *, struct sigcontext __user *);
 int setup_sigcontext(struct sigcontext __user *, struct pt_regs *);
@@ -33,5 +34,6 @@ void signal_fault(const char *type, struct pt_regs *,
 void trace_unhandled_signal(const char *type, struct pt_regs *regs,
 			    unsigned long address, int signo);
 #endif
+#endif
 
 #endif /* _ASM_TILE_SIGNAL_H */
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index a3759cb0ac10..6d8c7251eb8d 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -43,9 +43,11 @@ typedef unsigned short mifi_t;
 typedef	__u32		if_mask;
 #define NIFBITS (sizeof(if_mask) * 8)        /* bits per mask */
 
-#if !defined(__KERNEL__) && !defined(DIV_ROUND_UP)
+#if !defined(__KERNEL__)
+#if !defined(DIV_ROUND_UP)
 #define	DIV_ROUND_UP(x,y)	(((x) + ((y) - 1)) / (y))
 #endif
+#endif
 
 typedef struct if_set {
 	if_mask ifs_bits[DIV_ROUND_UP(IF_SETSIZE, NIFBITS)];
-- 
cgit v1.2.3


From 4af679cd7cbb0a0d8774b5cdb34bffcaa4e86e52 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 13 Dec 2011 10:36:20 +0100
Subject: kref: Inline all functions

These are tiny functions, there's no point in having them out-of-line.

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-8eccvi2ur2fzgi00xdjlbf5z@git.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kref.h | 80 ++++++++++++++++++++++++++++++++++++++++---
 lib/Makefile         |  2 +-
 lib/kref.c           | 97 ----------------------------------------------------
 3 files changed, 76 insertions(+), 103 deletions(-)
 delete mode 100644 lib/kref.c

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index d4a62ab2ee5e..1cbae9f2ef77 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -16,15 +16,85 @@
 #define _KREF_H_
 
 #include <linux/types.h>
+#include <linux/slab.h>
 
 struct kref {
 	atomic_t refcount;
 };
 
-void kref_init(struct kref *kref);
-void kref_get(struct kref *kref);
-int kref_put(struct kref *kref, void (*release) (struct kref *kref));
-int kref_sub(struct kref *kref, unsigned int count,
-	     void (*release) (struct kref *kref));
+/**
+ * kref_init - initialize object.
+ * @kref: object in question.
+ */
+static inline void kref_init(struct kref *kref)
+{
+	atomic_set(&kref->refcount, 1);
+	smp_mb();
+}
+
+/**
+ * kref_get - increment refcount for object.
+ * @kref: object.
+ */
+static inline void kref_get(struct kref *kref)
+{
+	WARN_ON(!atomic_read(&kref->refcount));
+	atomic_inc(&kref->refcount);
+	smp_mb__after_atomic_inc();
+}
+
+/**
+ * kref_put - decrement refcount for object.
+ * @kref: object.
+ * @release: pointer to the function that will clean up the object when the
+ *	     last reference to the object is released.
+ *	     This pointer is required, and it is not acceptable to pass kfree
+ *	     in as this function.
+ *
+ * Decrement the refcount, and if 0, call release().
+ * Return 1 if the object was removed, otherwise return 0.  Beware, if this
+ * function returns 0, you still can not count on the kref from remaining in
+ * memory.  Only use the return value if you want to see if the kref is now
+ * gone, not present.
+ */
+static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
+{
+	WARN_ON(release == NULL);
+	WARN_ON(release == (void (*)(struct kref *))kfree);
+
+	if (atomic_dec_and_test(&kref->refcount)) {
+		release(kref);
+		return 1;
+	}
+	return 0;
+}
+
+
+/**
+ * kref_sub - subtract a number of refcounts for object.
+ * @kref: object.
+ * @count: Number of recounts to subtract.
+ * @release: pointer to the function that will clean up the object when the
+ *	     last reference to the object is released.
+ *	     This pointer is required, and it is not acceptable to pass kfree
+ *	     in as this function.
+ *
+ * Subtract @count from the refcount, and if 0, call release().
+ * Return 1 if the object was removed, otherwise return 0.  Beware, if this
+ * function returns 0, you still can not count on the kref from remaining in
+ * memory.  Only use the return value if you want to see if the kref is now
+ * gone, not present.
+ */
+static inline int kref_sub(struct kref *kref, unsigned int count,
+	     void (*release)(struct kref *kref))
+{
+	WARN_ON(release == NULL);
+	WARN_ON(release == (void (*)(struct kref *))kfree);
 
+	if (atomic_sub_and_test((int) count, &kref->refcount)) {
+		release(kref);
+		return 1;
+	}
+	return 0;
+}
 #endif /* _KREF_H_ */
diff --git a/lib/Makefile b/lib/Makefile
index a4da283f5dc0..6f195ff6a1a1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -17,7 +17,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 
-lib-y	+= kobject.o kref.o klist.o
+lib-y	+= kobject.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
diff --git a/lib/kref.c b/lib/kref.c
deleted file mode 100644
index 3efb882b11db..000000000000
--- a/lib/kref.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * kref.c - library routines for handling generic reference counted objects
- *
- * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
- * Copyright (C) 2004 IBM Corp.
- *
- * based on lib/kobject.c which was:
- * Copyright (C) 2002-2003 Patrick Mochel <mochel@osdl.org>
- *
- * This file is released under the GPLv2.
- *
- */
-
-#include <linux/kref.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-/**
- * kref_init - initialize object.
- * @kref: object in question.
- */
-void kref_init(struct kref *kref)
-{
-	atomic_set(&kref->refcount, 1);
-	smp_mb();
-}
-
-/**
- * kref_get - increment refcount for object.
- * @kref: object.
- */
-void kref_get(struct kref *kref)
-{
-	WARN_ON(!atomic_read(&kref->refcount));
-	atomic_inc(&kref->refcount);
-	smp_mb__after_atomic_inc();
-}
-
-/**
- * kref_put - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object when the
- *	     last reference to the object is released.
- *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.
- *
- * Decrement the refcount, and if 0, call release().
- * Return 1 if the object was removed, otherwise return 0.  Beware, if this
- * function returns 0, you still can not count on the kref from remaining in
- * memory.  Only use the return value if you want to see if the kref is now
- * gone, not present.
- */
-int kref_put(struct kref *kref, void (*release)(struct kref *kref))
-{
-	WARN_ON(release == NULL);
-	WARN_ON(release == (void (*)(struct kref *))kfree);
-
-	if (atomic_dec_and_test(&kref->refcount)) {
-		release(kref);
-		return 1;
-	}
-	return 0;
-}
-
-
-/**
- * kref_sub - subtract a number of refcounts for object.
- * @kref: object.
- * @count: Number of recounts to subtract.
- * @release: pointer to the function that will clean up the object when the
- *	     last reference to the object is released.
- *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.
- *
- * Subtract @count from the refcount, and if 0, call release().
- * Return 1 if the object was removed, otherwise return 0.  Beware, if this
- * function returns 0, you still can not count on the kref from remaining in
- * memory.  Only use the return value if you want to see if the kref is now
- * gone, not present.
- */
-int kref_sub(struct kref *kref, unsigned int count,
-	     void (*release)(struct kref *kref))
-{
-	WARN_ON(release == NULL);
-	WARN_ON(release == (void (*)(struct kref *))kfree);
-
-	if (atomic_sub_and_test((int) count, &kref->refcount)) {
-		release(kref);
-		return 1;
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL(kref_init);
-EXPORT_SYMBOL(kref_get);
-EXPORT_SYMBOL(kref_put);
-EXPORT_SYMBOL(kref_sub);
-- 
cgit v1.2.3


From 47dbd7d90ad80edb67822f327241edcab8f3f46f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 10 Dec 2011 11:43:43 +0100
Subject: kref: Implement kref_put in terms of kref_sub

Less lines of code is better.

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kref.h | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 1cbae9f2ef77..fa9907a541e2 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -44,57 +44,49 @@ static inline void kref_get(struct kref *kref)
 }
 
 /**
- * kref_put - decrement refcount for object.
+ * kref_sub - subtract a number of refcounts for object.
  * @kref: object.
+ * @count: Number of recounts to subtract.
  * @release: pointer to the function that will clean up the object when the
  *	     last reference to the object is released.
  *	     This pointer is required, and it is not acceptable to pass kfree
  *	     in as this function.
  *
- * Decrement the refcount, and if 0, call release().
+ * Subtract @count from the refcount, and if 0, call release().
  * Return 1 if the object was removed, otherwise return 0.  Beware, if this
  * function returns 0, you still can not count on the kref from remaining in
  * memory.  Only use the return value if you want to see if the kref is now
  * gone, not present.
  */
-static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
+static inline int kref_sub(struct kref *kref, unsigned int count,
+	     void (*release)(struct kref *kref))
 {
 	WARN_ON(release == NULL);
 	WARN_ON(release == (void (*)(struct kref *))kfree);
 
-	if (atomic_dec_and_test(&kref->refcount)) {
+	if (atomic_sub_and_test((int) count, &kref->refcount)) {
 		release(kref);
 		return 1;
 	}
 	return 0;
 }
 
-
 /**
- * kref_sub - subtract a number of refcounts for object.
+ * kref_put - decrement refcount for object.
  * @kref: object.
- * @count: Number of recounts to subtract.
  * @release: pointer to the function that will clean up the object when the
  *	     last reference to the object is released.
  *	     This pointer is required, and it is not acceptable to pass kfree
  *	     in as this function.
  *
- * Subtract @count from the refcount, and if 0, call release().
+ * Decrement the refcount, and if 0, call release().
  * Return 1 if the object was removed, otherwise return 0.  Beware, if this
  * function returns 0, you still can not count on the kref from remaining in
  * memory.  Only use the return value if you want to see if the kref is now
  * gone, not present.
  */
-static inline int kref_sub(struct kref *kref, unsigned int count,
-	     void (*release)(struct kref *kref))
+static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
 {
-	WARN_ON(release == NULL);
-	WARN_ON(release == (void (*)(struct kref *))kfree);
-
-	if (atomic_sub_and_test((int) count, &kref->refcount)) {
-		release(kref);
-		return 1;
-	}
-	return 0;
+	return kref_sub(kref, 1, release);
 }
 #endif /* _KREF_H_ */
-- 
cgit v1.2.3


From 3c8ed88974472b928489e3943616500ce2ad0cd8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 10 Dec 2011 11:43:44 +0100
Subject: kref: Remove the memory barriers

Commit 1b0b3b9980e ("kref: fix CPU ordering with respect to krefs")
wrongly adds memory barriers to kref.

It states:

  some atomic operations are only atomic, not ordered. Thus a CPU is allowed
  to reorder memory references to an object to before the reference is
  obtained. This fixes it.

While true, it fails to show why this is a problem. I say it is not a
problem because if there is a race with kref_put() such that we could
end up referencing a free'd object without this memory barrier, we
would still have that race with the memory barrier.

The kref_put() in question could complete (and free the object) before
the atomic_inc() and we'd still be up shit creek.

The kref_init() case is even worse, if your object is published at this
time you're so wrong the memory barrier won't make a difference what
so ever. If its not published, the act of publishing should include
the needed barriers/locks to make sure all writes prior to the act of
publishing are complete such that others will only observe a complete
object.

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kref.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index fa9907a541e2..d66c88a3b48c 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -29,7 +29,6 @@ struct kref {
 static inline void kref_init(struct kref *kref)
 {
 	atomic_set(&kref->refcount, 1);
-	smp_mb();
 }
 
 /**
@@ -40,7 +39,6 @@ static inline void kref_get(struct kref *kref)
 {
 	WARN_ON(!atomic_read(&kref->refcount));
 	atomic_inc(&kref->refcount);
-	smp_mb__after_atomic_inc();
 }
 
 /**
-- 
cgit v1.2.3


From 623ed84b1f9553bc962c2aca92f488aa6f27ecd1 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:10:33 +0000
Subject: mlx4_core: initial header-file changes for SRIOV support

These changes will not affect module operation as yet. They
are only to get some structs and enums in place for use by
subsequent patches (making those smaller).

Added here:
* sriov state structs and inlines (mlx4_is_master/slave/mfunc)
* comm-channel and vhcr support structures
* enum values for new FW and comm-channel virtual commands
  (i.e., commands, passed via the comm channel to the PF-driver).
* prototypes for many command wrapper functions (used by the
  PF context for processing FW commands passed to it by the VFs).
* struct mlx4_eqe is moved from eq.c to mlx4.h (it will be used
  by other mlx4_core source files).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_port.h |   6 -
 drivers/net/ethernet/mellanox/mlx4/eq.c      |  39 ---
 drivers/net/ethernet/mellanox/mlx4/mlx4.h    | 456 ++++++++++++++++++++++++++-
 include/linux/mlx4/cmd.h                     |  29 +-
 include/linux/mlx4/device.h                  |  42 ++-
 5 files changed, 523 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h
index 19eb244f5165..c1bb834414b5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.h
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h
@@ -39,12 +39,6 @@
 #define SET_PORT_PROMISC_SHIFT	31
 #define SET_PORT_MC_PROMISC_SHIFT	30
 
-enum {
-	MLX4_CMD_SET_VLAN_FLTR  = 0x47,
-	MLX4_CMD_SET_MCAST_FLTR = 0x48,
-	MLX4_CMD_DUMP_ETH_STATS = 0x49,
-};
-
 enum {
 	MCAST_DIRECT_ONLY       = 0,
 	MCAST_DIRECT            = 1,
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 24ee96775996..ad9e3770b050 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -102,45 +102,6 @@ struct mlx4_eq_context {
 			       (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT)	    | \
 			       (1ull << MLX4_EVENT_TYPE_CMD))
 
-struct mlx4_eqe {
-	u8			reserved1;
-	u8			type;
-	u8			reserved2;
-	u8			subtype;
-	union {
-		u32		raw[6];
-		struct {
-			__be32	cqn;
-		} __packed comp;
-		struct {
-			u16	reserved1;
-			__be16	token;
-			u32	reserved2;
-			u8	reserved3[3];
-			u8	status;
-			__be64	out_param;
-		} __packed cmd;
-		struct {
-			__be32	qpn;
-		} __packed qp;
-		struct {
-			__be32	srqn;
-		} __packed srq;
-		struct {
-			__be32	cqn;
-			u32	reserved1;
-			u8	reserved2[3];
-			u8	syndrome;
-		} __packed cq_err;
-		struct {
-			u32	reserved1[2];
-			__be32	port;
-		} __packed port_change;
-	}			event;
-	u8			reserved3[3];
-	u8			owner;
-} __packed;
-
 static void eq_set_ci(struct mlx4_eq *eq, int req_not)
 {
 	__raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) |
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 5dfa68ffc11c..69177614666f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -46,6 +46,7 @@
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/driver.h>
 #include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/cmd.h>
 
 #define DRV_NAME	"mlx4_core"
 #define DRV_VERSION	"1.0"
@@ -54,7 +55,9 @@
 enum {
 	MLX4_HCR_BASE		= 0x80680,
 	MLX4_HCR_SIZE		= 0x0001c,
-	MLX4_CLR_INT_SIZE	= 0x00008
+	MLX4_CLR_INT_SIZE	= 0x00008,
+	MLX4_SLAVE_COMM_BASE	= 0x0,
+	MLX4_COMM_PAGESIZE	= 0x1000
 };
 
 enum {
@@ -80,6 +83,94 @@ enum {
 	MLX4_NUM_CMPTS		= MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT
 };
 
+enum mlx4_mr_state {
+	MLX4_MR_DISABLED = 0,
+	MLX4_MR_EN_HW,
+	MLX4_MR_EN_SW
+};
+
+#define MLX4_COMM_TIME		10000
+enum {
+	MLX4_COMM_CMD_RESET,
+	MLX4_COMM_CMD_VHCR0,
+	MLX4_COMM_CMD_VHCR1,
+	MLX4_COMM_CMD_VHCR2,
+	MLX4_COMM_CMD_VHCR_EN,
+	MLX4_COMM_CMD_VHCR_POST,
+	MLX4_COMM_CMD_FLR = 254
+};
+
+/*The flag indicates that the slave should delay the RESET cmd*/
+#define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
+/*indicates how many retries will be done if we are in the middle of FLR*/
+#define NUM_OF_RESET_RETRIES	10
+#define SLEEP_TIME_IN_RESET	(2 * 1000)
+enum mlx4_resource {
+	RES_QP,
+	RES_CQ,
+	RES_SRQ,
+	RES_XRCD,
+	RES_MPT,
+	RES_MTT,
+	RES_MAC,
+	RES_VLAN,
+	RES_EQ,
+	RES_COUNTER,
+	MLX4_NUM_OF_RESOURCE_TYPE
+};
+
+enum mlx4_alloc_mode {
+	RES_OP_RESERVE,
+	RES_OP_RESERVE_AND_MAP,
+	RES_OP_MAP_ICM,
+};
+
+
+/*
+ *Virtual HCR structures.
+ * mlx4_vhcr is the sw representation, in machine endianess
+ *
+ * mlx4_vhcr_cmd is the formalized structure, the one that is passed
+ * to FW to go through communication channel.
+ * It is big endian, and has the same structure as the physical HCR
+ * used by command interface
+ */
+struct mlx4_vhcr {
+	u64	in_param;
+	u64	out_param;
+	u32	in_modifier;
+	u32	errno;
+	u16	op;
+	u16	token;
+	u8	op_modifier;
+	u8	e_bit;
+};
+
+struct mlx4_vhcr_cmd {
+	__be64 in_param;
+	__be32 in_modifier;
+	__be64 out_param;
+	__be16 token;
+	u16 reserved;
+	u8 status;
+	u8 flags;
+	__be16 opcode;
+};
+
+struct mlx4_cmd_info {
+	u16 opcode;
+	bool has_inbox;
+	bool has_outbox;
+	bool out_is_imm;
+	bool encode_slave_id;
+	int (*verify)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+		      struct mlx4_cmd_mailbox *inbox);
+	int (*wrapper)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+		       struct mlx4_cmd_mailbox *inbox,
+		       struct mlx4_cmd_mailbox *outbox,
+		       struct mlx4_cmd_info *cmd);
+};
+
 #ifdef CONFIG_MLX4_DEBUG
 extern int mlx4_debug_level;
 #else /* CONFIG_MLX4_DEBUG */
@@ -99,6 +190,9 @@ do {									\
 #define mlx4_warn(mdev, format, arg...) \
 	dev_warn(&mdev->pdev->dev, format, ##arg)
 
+#define MLX4_MAX_NUM_SLAVES	(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF)
+#define ALL_SLAVES 0xff
+
 struct mlx4_bitmap {
 	u32			last;
 	u32			top;
@@ -130,6 +224,62 @@ struct mlx4_icm_table {
 	struct mlx4_icm	      **icm;
 };
 
+struct mlx4_eqe {
+	u8			reserved1;
+	u8			type;
+	u8			reserved2;
+	u8			subtype;
+	union {
+		u32		raw[6];
+		struct {
+			__be32	cqn;
+		} __packed comp;
+		struct {
+			u16	reserved1;
+			__be16	token;
+			u32	reserved2;
+			u8	reserved3[3];
+			u8	status;
+			__be64	out_param;
+		} __packed cmd;
+		struct {
+			__be32	qpn;
+		} __packed qp;
+		struct {
+			__be32	srqn;
+		} __packed srq;
+		struct {
+			__be32	cqn;
+			u32	reserved1;
+			u8	reserved2[3];
+			u8	syndrome;
+		} __packed cq_err;
+		struct {
+			u32	reserved1[2];
+			__be32	port;
+		} __packed port_change;
+		struct {
+			#define COMM_CHANNEL_BIT_ARRAY_SIZE	4
+			u32 reserved;
+			u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
+		} __packed comm_channel_arm;
+		struct {
+			u8	port;
+			u8	reserved[3];
+			__be64	mac;
+		} __packed mac_update;
+		struct {
+			u8	port;
+		} __packed sw_event;
+		struct {
+			__be32	slave_id;
+		} __packed flr_event;
+	}			event;
+	u8			slave_id;
+	u8			reserved3[2];
+	u8			owner;
+} __packed;
+
 struct mlx4_eq {
 	struct mlx4_dev	       *dev;
 	void __iomem	       *doorbell;
@@ -142,6 +292,18 @@ struct mlx4_eq {
 	struct mlx4_mtt		mtt;
 };
 
+struct mlx4_slave_eqe {
+	u8 type;
+	u8 port;
+	u32 param;
+};
+
+struct mlx4_slave_event_eq_info {
+	u32 eqn;
+	u16 token;
+	u64 event_type;
+};
+
 struct mlx4_profile {
 	int			num_qp;
 	int			rdmarc_per_qp;
@@ -155,17 +317,30 @@ struct mlx4_profile {
 struct mlx4_fw {
 	u64			clr_int_base;
 	u64			catas_offset;
+	u64			comm_base;
 	struct mlx4_icm	       *fw_icm;
 	struct mlx4_icm	       *aux_icm;
 	u32			catas_size;
 	u16			fw_pages;
 	u8			clr_int_bar;
 	u8			catas_bar;
+	u8			comm_bar;
+};
+
+struct mlx4_comm {
+	u32			slave_write;
+	u32			slave_read;
 };
 
 #define MGM_QPN_MASK       0x00FFFFFF
 #define MGM_BLCK_LB_BIT    30
 
+#define VLAN_FLTR_SIZE	128
+
+struct mlx4_vlan_fltr {
+	__be32 entry[VLAN_FLTR_SIZE];
+};
+
 struct mlx4_promisc_qp {
 	struct list_head list;
 	u32 qpn;
@@ -184,12 +359,88 @@ struct mlx4_mgm {
 	u8			gid[16];
 	__be32			qp[MLX4_QP_PER_MGM];
 };
+
+struct mlx4_slave_state {
+	u8 comm_toggle;
+	u8 last_cmd;
+	u8 init_port_mask;
+	bool active;
+	u8 function;
+	dma_addr_t vhcr_dma;
+	u16 mtu[MLX4_MAX_PORTS + 1];
+	__be32 ib_cap_mask[MLX4_MAX_PORTS + 1];
+	struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES];
+	struct list_head mcast_filters[MLX4_MAX_PORTS + 1];
+	struct mlx4_vlan_fltr *vlan_filter[MLX4_MAX_PORTS + 1];
+	struct mlx4_slave_event_eq_info event_eq;
+	u16 eq_pi;
+	u16 eq_ci;
+	spinlock_t lock;
+	/*initialized via the kzalloc*/
+	u8 is_slave_going_down;
+	u32 cookie;
+};
+
+struct slave_list {
+	struct mutex mutex;
+	struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE];
+};
+
+struct mlx4_resource_tracker {
+	spinlock_t lock;
+	/* tree for each resources */
+	struct radix_tree_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
+	/* num_of_slave's lists, one per slave */
+	struct slave_list *slave_list;
+};
+
+#define SLAVE_EVENT_EQ_SIZE	128
+struct mlx4_slave_event_eq {
+	u32 eqn;
+	u32 cons;
+	u32 prod;
+	struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE];
+};
+
+struct mlx4_master_qp0_state {
+	int proxy_qp0_active;
+	int qp0_active;
+	int port_active;
+};
+
+struct mlx4_mfunc_master_ctx {
+	struct mlx4_slave_state *slave_state;
+	struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1];
+	int			init_port_ref[MLX4_MAX_PORTS + 1];
+	u16			max_mtu[MLX4_MAX_PORTS + 1];
+	int			disable_mcast_ref[MLX4_MAX_PORTS + 1];
+	struct mlx4_resource_tracker res_tracker;
+	struct workqueue_struct *comm_wq;
+	struct work_struct	comm_work;
+	struct work_struct	slave_event_work;
+	struct work_struct	slave_flr_event_work;
+	spinlock_t		slave_state_lock;
+	u32			comm_arm_bit_vector[4];
+	struct mlx4_eqe		cmd_eqe;
+	struct mlx4_slave_event_eq slave_eq;
+	struct mutex		gen_eqe_mutex[MLX4_MFUNC_MAX];
+};
+
+struct mlx4_mfunc {
+	struct mlx4_comm __iomem       *comm;
+	struct mlx4_vhcr_cmd	       *vhcr;
+	dma_addr_t			vhcr_dma;
+
+	struct mlx4_mfunc_master_ctx	master;
+};
+
 struct mlx4_cmd {
 	struct pci_pool	       *pool;
 	void __iomem	       *hcr;
 	struct mutex		hcr_mutex;
 	struct semaphore	poll_sem;
 	struct semaphore	event_sem;
+	struct semaphore	slave_sem;
 	int			max_cmds;
 	spinlock_t		context_lock;
 	int			free_head;
@@ -197,6 +448,7 @@ struct mlx4_cmd {
 	u16			token_mask;
 	u8			use_events;
 	u8			toggle;
+	u8			comm_toggle;
 };
 
 struct mlx4_uar_table {
@@ -333,6 +585,7 @@ struct mlx4_priv {
 
 	struct mlx4_fw		fw;
 	struct mlx4_cmd		cmd;
+	struct mlx4_mfunc	mfunc;
 
 	struct mlx4_bitmap	pd_bitmap;
 	struct mlx4_bitmap	xrcd_bitmap;
@@ -404,6 +657,42 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
 
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_SYNC_TPT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+		     struct mlx4_vhcr *vhcr,
+		     struct mlx4_cmd_mailbox *inbox,
+		     struct mlx4_cmd_mailbox *outbox,
+		     struct mlx4_cmd_info *cmd);
+
 void mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
 void mlx4_catas_init(void);
@@ -419,6 +708,101 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 		      struct mlx4_profile *request,
 		      struct mlx4_dev_cap *dev_cap,
 		      struct mlx4_init_hca_param *init_hca);
+void mlx4_master_comm_channel(struct work_struct *work);
+void mlx4_gen_slave_eqe(struct work_struct *work);
+void mlx4_master_handle_slave_flr(struct work_struct *work);
+
+int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
+			struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox,
+			struct mlx4_cmd_mailbox *outbox,
+			struct mlx4_cmd_info *cmd);
+int mlx4_COMM_INT_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+			 struct mlx4_vhcr *vhcr,
+			 struct mlx4_cmd_mailbox *inbox,
+			 struct mlx4_cmd_mailbox *outbox,
+			 struct mlx4_cmd_info *cmd);
+int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave,
+			struct mlx4_vhcr *vhcr,
+			struct mlx4_cmd_mailbox *inbox,
+			struct mlx4_cmd_mailbox *outbox,
+			struct mlx4_cmd_info *cmd);
+int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+			     struct mlx4_vhcr *vhcr,
+			     struct mlx4_cmd_mailbox *inbox,
+			     struct mlx4_cmd_mailbox *outbox,
+			     struct mlx4_cmd_info *cmd);
+int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
+			     struct mlx4_vhcr *vhcr,
+			     struct mlx4_cmd_mailbox *inbox,
+			     struct mlx4_cmd_mailbox *outbox,
+			     struct mlx4_cmd_info *cmd);
+int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
+			 struct mlx4_vhcr *vhcr,
+			 struct mlx4_cmd_mailbox *inbox,
+			 struct mlx4_cmd_mailbox *outbox,
+			 struct mlx4_cmd_info *cmd);
+
+int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
 
 int mlx4_cmd_init(struct mlx4_dev *dev);
 void mlx4_cmd_cleanup(struct mlx4_dev *dev);
@@ -452,12 +836,82 @@ void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
 void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
 
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
+/* resource tracker functions*/
+int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
+				    enum mlx4_resource resource_type,
+				    int resource_id, int *slave);
+void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id);
+int mlx4_init_resource_tracker(struct mlx4_dev *dev);
+
+void mlx4_free_resource_tracker(struct mlx4_dev *dev);
+
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			    struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			    struct mlx4_cmd_info *cmd);
 int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps);
 int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port);
 
+
+int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+
+int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
+			 struct mlx4_vhcr *vhcr,
+			 struct mlx4_cmd_mailbox *inbox,
+			 struct mlx4_cmd_mailbox *outbox,
+			 struct mlx4_cmd_info *cmd);
 int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  enum mlx4_protocol prot, enum mlx4_steer_type steer);
 int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  int block_mcast_loopback, enum mlx4_protocol prot,
 			  enum mlx4_steer_type steer);
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+				struct mlx4_vhcr *vhcr,
+				struct mlx4_cmd_mailbox *inbox,
+				struct mlx4_cmd_mailbox *outbox,
+				struct mlx4_cmd_info *cmd);
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+			       struct mlx4_vhcr *vhcr,
+			       struct mlx4_cmd_mailbox *inbox,
+			       struct mlx4_cmd_mailbox *outbox,
+			       struct mlx4_cmd_info *cmd);
+int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function,
+				     int port, void *buf);
+int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, u32 in_mod,
+				struct mlx4_cmd_mailbox *outbox);
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+				   struct mlx4_vhcr *vhcr,
+				   struct mlx4_cmd_mailbox *inbox,
+				   struct mlx4_cmd_mailbox *outbox,
+				struct mlx4_cmd_info *cmd);
+int mlx4_PKEY_TABLE_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			    struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
+			       struct mlx4_vhcr *vhcr,
+			       struct mlx4_cmd_mailbox *inbox,
+			       struct mlx4_cmd_mailbox *outbox,
+			       struct mlx4_cmd_info *cmd);
 #endif /* MLX4_H */
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index b56e4587208d..e8e92814c8a0 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -59,12 +59,15 @@ enum {
 	MLX4_CMD_HW_HEALTH_CHECK = 0x50,
 	MLX4_CMD_SET_PORT	 = 0xc,
 	MLX4_CMD_SET_NODE	 = 0x5a,
+	MLX4_CMD_QUERY_FUNC	 = 0x56,
 	MLX4_CMD_ACCESS_DDR	 = 0x2e,
 	MLX4_CMD_MAP_ICM	 = 0xffa,
 	MLX4_CMD_UNMAP_ICM	 = 0xff9,
 	MLX4_CMD_MAP_ICM_AUX	 = 0xffc,
 	MLX4_CMD_UNMAP_ICM_AUX	 = 0xffb,
 	MLX4_CMD_SET_ICM_SIZE	 = 0xffd,
+	/*master notify fw on finish for slave's flr*/
+	MLX4_CMD_INFORM_FLR_DONE = 0x5b,
 
 	/* TPT commands */
 	MLX4_CMD_SW2HW_MPT	 = 0xd,
@@ -119,6 +122,26 @@ enum {
 	/* miscellaneous commands */
 	MLX4_CMD_DIAG_RPRT	 = 0x30,
 	MLX4_CMD_NOP		 = 0x31,
+	MLX4_CMD_ACCESS_MEM	 = 0x2e,
+	MLX4_CMD_SET_VEP	 = 0x52,
+
+	/* Ethernet specific commands */
+	MLX4_CMD_SET_VLAN_FLTR	 = 0x47,
+	MLX4_CMD_SET_MCAST_FLTR	 = 0x48,
+	MLX4_CMD_DUMP_ETH_STATS	 = 0x49,
+
+	/* Communication channel commands */
+	MLX4_CMD_ARM_COMM_CHANNEL = 0x57,
+	MLX4_CMD_GEN_EQE	 = 0x58,
+
+	/* virtual commands */
+	MLX4_CMD_ALLOC_RES	 = 0xf00,
+	MLX4_CMD_FREE_RES	 = 0xf01,
+	MLX4_CMD_MCAST_ATTACH	 = 0xf05,
+	MLX4_CMD_UCAST_ATTACH	 = 0xf06,
+	MLX4_CMD_PROMISC         = 0xf08,
+	MLX4_CMD_QUERY_FUNC_CAP  = 0xf0a,
+	MLX4_CMD_QP_ATTACH	 = 0xf0b,
 
 	/* debug commands */
 	MLX4_CMD_QUERY_DEBUG_MSG = 0x2a,
@@ -126,6 +149,7 @@ enum {
 
 	/* statistics commands */
 	MLX4_CMD_QUERY_IF_STAT	 = 0X54,
+	MLX4_CMD_SET_IF_STAT	 = 0X55,
 };
 
 enum {
@@ -135,7 +159,8 @@ enum {
 };
 
 enum {
-	MLX4_MAILBOX_SIZE	=  4096
+	MLX4_MAILBOX_SIZE	= 4096,
+	MLX4_ACCESS_MEM_ALIGN	= 256,
 };
 
 enum {
@@ -192,4 +217,6 @@ static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_para
 struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev);
 void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox);
 
+u32 mlx4_comm_get_version(void);
+
 #endif /* MLX4_CMD_H */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ca2c39771c38..b9466af2348f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -47,6 +47,9 @@
 enum {
 	MLX4_FLAG_MSI_X		= 1 << 0,
 	MLX4_FLAG_OLD_PORT_CMDS	= 1 << 1,
+	MLX4_FLAG_MASTER	= 1 << 2,
+	MLX4_FLAG_SLAVE		= 1 << 3,
+	MLX4_FLAG_SRIOV		= 1 << 4,
 };
 
 enum {
@@ -57,6 +60,15 @@ enum {
 	MLX4_BOARD_ID_LEN = 64
 };
 
+enum {
+	MLX4_MAX_NUM_PF		= 16,
+	MLX4_MAX_NUM_VF		= 64,
+	MLX4_MFUNC_MAX		= 80,
+	MLX4_MFUNC_EQ_NUM	= 4,
+	MLX4_MFUNC_MAX_EQES     = 8,
+	MLX4_MFUNC_EQE_MASK     = (MLX4_MFUNC_MAX_EQES - 1)
+};
+
 enum {
 	MLX4_DEV_CAP_FLAG_RC		= 1LL <<  0,
 	MLX4_DEV_CAP_FLAG_UC		= 1LL <<  1,
@@ -117,7 +129,11 @@ enum mlx4_event {
 	MLX4_EVENT_TYPE_PORT_CHANGE	   = 0x09,
 	MLX4_EVENT_TYPE_EQ_OVERFLOW	   = 0x0f,
 	MLX4_EVENT_TYPE_ECC_DETECT	   = 0x0e,
-	MLX4_EVENT_TYPE_CMD		   = 0x0a
+	MLX4_EVENT_TYPE_CMD		   = 0x0a,
+	MLX4_EVENT_TYPE_VEP_UPDATE	   = 0x19,
+	MLX4_EVENT_TYPE_COMM_CHANNEL	   = 0x18,
+	MLX4_EVENT_TYPE_FLR_EVENT	   = 0x1c,
+	MLX4_EVENT_TYPE_NONE		   = 0xff,
 };
 
 enum {
@@ -184,6 +200,7 @@ enum mlx4_qp_region {
 };
 
 enum mlx4_port_type {
+	MLX4_PORT_TYPE_NONE	= 0,
 	MLX4_PORT_TYPE_IB	= 1,
 	MLX4_PORT_TYPE_ETH	= 2,
 	MLX4_PORT_TYPE_AUTO	= 3
@@ -216,6 +233,7 @@ static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
 
 struct mlx4_caps {
 	u64			fw_ver;
+	u32			function;
 	int			num_ports;
 	int			vl_cap[MLX4_MAX_PORTS + 1];
 	int			ib_mtu_cap[MLX4_MAX_PORTS + 1];
@@ -466,6 +484,7 @@ struct mlx4_counter {
 struct mlx4_dev {
 	struct pci_dev	       *pdev;
 	unsigned long		flags;
+	unsigned long		num_slaves;
 	struct mlx4_caps	caps;
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
@@ -494,8 +513,27 @@ struct mlx4_init_port_param {
 #define mlx4_foreach_ib_transport_port(port, dev)			\
 	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	\
 		if (((dev)->caps.port_mask & 1 << ((port) - 1)) ||	\
-		    ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+		     ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+
+static inline int mlx4_is_master(struct mlx4_dev *dev)
+{
+	return dev->flags & MLX4_FLAG_MASTER;
+}
+
+static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
+{
+	return (qpn < dev->caps.sqp_start + 8);
+}
 
+static inline int mlx4_is_mfunc(struct mlx4_dev *dev)
+{
+	return dev->flags & (MLX4_FLAG_SLAVE | MLX4_FLAG_MASTER);
+}
+
+static inline int mlx4_is_slave(struct mlx4_dev *dev)
+{
+	return dev->flags & MLX4_FLAG_SLAVE;
+}
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		   struct mlx4_buf *buf);
-- 
cgit v1.2.3


From 65dab25deb8da7dba4b6dd0145a9143be7f8369f Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:10:41 +0000
Subject: mlx4: Extanding port_mask functionality

Port mask now has additional state.
Port can be set as "none". In this case neither the mlx4_en or mlx4_ib
drivers take ownership of the port.
In multifunction mode there is an option to set the vfs as single ported devices.
(in single function mode, both physical ports belong to same function)

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c         |  2 +-
 drivers/net/ethernet/mellanox/mlx4/main.c |  4 +---
 include/linux/mlx4/device.h               | 13 ++++++-------
 3 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 77f3dbc0aaa1..6128b2940c49 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -177,7 +177,7 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
 {
 	struct mlx4_dev *dev = to_mdev(device)->dev;
 
-	return dev->caps.port_mask & (1 << (port_num - 1)) ?
+	return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
 		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 94bbc85a532d..64d03f8b23ab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -140,10 +140,8 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
 {
 	int i;
 
-	dev->caps.port_mask = 0;
 	for (i = 1; i <= dev->caps.num_ports; ++i)
-		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB)
-			dev->caps.port_mask |= 1 << (i - 1);
+		dev->caps.port_mask[i] = dev->caps.port_type[i];
 }
 
 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b9466af2348f..3333018d2913 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -302,7 +302,7 @@ struct mlx4_caps {
 	int                     log_num_prios;
 	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
 	u8			supported_type[MLX4_MAX_PORTS + 1];
-	u32			port_mask;
+	u32			port_mask[MLX4_MAX_PORTS + 1];
 	enum mlx4_port_type	possible_type[MLX4_MAX_PORTS + 1];
 	u32			max_counters;
 	u8			ext_port_cap[MLX4_MAX_PORTS + 1];
@@ -507,13 +507,12 @@ struct mlx4_init_port_param {
 
 #define mlx4_foreach_port(port, dev, type)				\
 	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	\
-		if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \
-		     ~(dev)->caps.port_mask) & 1 << ((port) - 1))
+		if ((type) == (dev)->caps.port_mask[(port)])
 
-#define mlx4_foreach_ib_transport_port(port, dev)			\
-	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	\
-		if (((dev)->caps.port_mask & 1 << ((port) - 1)) ||	\
-		     ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+#define mlx4_foreach_ib_transport_port(port, dev)                         \
+	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	  \
+		if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
+			((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
 
 static inline int mlx4_is_master(struct mlx4_dev *dev)
 {
-- 
cgit v1.2.3


From f9baff509f8a05a79626defdbdf4f4aa4efd373b Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:10:51 +0000
Subject: mlx4_core: Add "native" argument to mlx4_cmd and its callers (where
 needed)

For SRIOV, some Hypervisor commands can be executed directly (native = 1).
Others should go through the command wrapper flow (for tracking resource
usage, for example, or for changing some HCA configurations that slaves
need to be notified of).

This patch sets the groundwork for this capability -- adding the correct
value of "native" in each case.

Note that if SRIOV is not activated, this parameter has no effect.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/mad.c                 |  6 ++--
 drivers/infiniband/hw/mlx4/main.c                |  7 ++--
 drivers/net/ethernet/mellanox/mlx4/cmd.c         |  2 +-
 drivers/net/ethernet/mellanox/mlx4/cq.c          |  6 ++--
 drivers/net/ethernet/mellanox/mlx4/en_port.c     | 15 ++++----
 drivers/net/ethernet/mellanox/mlx4/en_selftest.c |  2 +-
 drivers/net/ethernet/mellanox/mlx4/eq.c          |  9 +++--
 drivers/net/ethernet/mellanox/mlx4/fw.c          | 45 ++++++++++++++----------
 drivers/net/ethernet/mellanox/mlx4/icm.c         |  5 +--
 drivers/net/ethernet/mellanox/mlx4/mcg.c         | 10 +++---
 drivers/net/ethernet/mellanox/mlx4/mr.c          |  8 +++--
 drivers/net/ethernet/mellanox/mlx4/port.c        | 12 ++++---
 drivers/net/ethernet/mellanox/mlx4/qp.c          | 11 +++---
 drivers/net/ethernet/mellanox/mlx4/sense.c       |  3 +-
 drivers/net/ethernet/mellanox/mlx4/srq.c         |  8 ++---
 include/linux/mlx4/cmd.h                         | 20 +++++++----
 16 files changed, 102 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index f36da994a85a..95c94d8f0254 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -109,7 +109,8 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
 
 	err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
 			   in_modifier, op_modifier,
-			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
 
 	if (!err)
 		memcpy(response_mad, outmailbox->buf, 256);
@@ -330,7 +331,8 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 		return IB_MAD_RESULT_FAILURE;
 
 	err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
-			   MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
+			   MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_WRAPPED);
 	if (err)
 		err = IB_MAD_RESULT_FAILURE;
 	else {
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 6128b2940c49..34f8a5d9da75 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -434,7 +434,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
 	memset(mailbox->buf, 0, 256);
 	memcpy(mailbox->buf, props->node_desc, 64);
 	mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
-		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A);
+		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
 
@@ -463,7 +463,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
 	}
 
 	err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 
 	mlx4_free_cmd_mailbox(dev->dev, mailbox);
 	return err;
@@ -899,7 +899,8 @@ static void update_gids_task(struct work_struct *work)
 	memcpy(gids, gw->gids, sizeof gw->gids);
 
 	err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
-		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
+		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_NATIVE);
 	if (err)
 		printk(KERN_WARNING "set port command failed\n");
 	else {
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 78f5a1a0b8c8..b27654e5d544 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -311,7 +311,7 @@ out:
 
 int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 	       int out_is_imm, u32 in_modifier, u8 op_modifier,
-	       u16 op, unsigned long timeout)
+	       u16 op, unsigned long timeout, int native)
 {
 	if (mlx4_priv(dev)->cmd.use_events)
 		return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm,
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 499a5168892a..ebd0eb234f14 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -118,14 +118,14 @@ static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			 int cq_num)
 {
 	return mlx4_cmd(dev, mailbox->dma, cq_num, 0, MLX4_CMD_SW2HW_CQ,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			 int cq_num, u32 opmod)
 {
 	return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -133,7 +133,7 @@ static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 {
 	return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
 			    mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 03c84cd78cde..ae120effb8a5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -45,7 +45,8 @@ int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
 			u64 mac, u64 clear, u8 mode)
 {
 	return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
-			MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
+			MLX4_CMD_WRAPPED);
 }
 
 int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
@@ -72,7 +73,7 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
 		filter->entry[i] = cpu_to_be32(entry);
 	}
 	err = mlx4_cmd(dev, mailbox->dma, priv->port, 0, MLX4_CMD_SET_VLAN_FLTR,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
 }
@@ -101,7 +102,7 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 
 	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
 	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -140,7 +141,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 
 	in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
 	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -159,7 +160,8 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
 		return PTR_ERR(mailbox);
 	memset(mailbox->buf, 0, sizeof(*qport_context));
 	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
-			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B);
+			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+			   MLX4_CMD_WRAPPED);
 	if (err)
 		goto out;
 	qport_context = mailbox->buf;
@@ -204,7 +206,8 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 		return PTR_ERR(mailbox);
 	memset(mailbox->buf, 0, sizeof(*mlx4_en_stats));
 	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
-			   MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B);
+			   MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
+			   MLX4_CMD_WRAPPED);
 	if (err)
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
index 9fdbcecd499d..bf2e5d3f177c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
@@ -43,7 +43,7 @@
 static int mlx4_en_test_registers(struct mlx4_en_priv *priv)
 {
 	return mlx4_cmd(priv->mdev->dev, 0, 0, 0, MLX4_CMD_HW_HEALTH_CHECK,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index ad9e3770b050..9e5863dfa60a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -255,21 +255,24 @@ static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
 			int eq_num)
 {
 	return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
-			0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B);
+			0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
+			MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			 int eq_num)
 {
 	return mlx4_cmd(dev, mailbox->dma, eq_num, 0, MLX4_CMD_SW2HW_EQ,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A,
+			MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			 int eq_num)
 {
 	return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, 0, MLX4_CMD_HW2SW_EQ,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A,
+			    MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_num_eq_uar(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 435ca6e49734..9659fb085e5e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -139,7 +139,7 @@ int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg)
 	MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET);
 
 	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -229,7 +229,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	outbox = mailbox->buf;
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
-			   MLX4_CMD_TIME_CLASS_A);
+			   MLX4_CMD_TIME_CLASS_A, !mlx4_is_slave(dev));
 	if (err)
 		goto out;
 
@@ -396,7 +396,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 
 		for (i = 1; i <= dev_cap->num_ports; ++i) {
 			err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT,
-					   MLX4_CMD_TIME_CLASS_B);
+					   MLX4_CMD_TIME_CLASS_B,
+					   !mlx4_is_slave(dev));
 			if (err)
 				goto out;
 
@@ -519,7 +520,8 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
 
 			if (++nent == MLX4_MAILBOX_SIZE / 16) {
 				err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
-						MLX4_CMD_TIME_CLASS_B);
+						MLX4_CMD_TIME_CLASS_B,
+						MLX4_CMD_NATIVE);
 				if (err)
 					goto out;
 				nent = 0;
@@ -528,7 +530,8 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
 	}
 
 	if (nent)
-		err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B);
+		err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
+			       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 	if (err)
 		goto out;
 
@@ -557,13 +560,15 @@ int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm)
 
 int mlx4_UNMAP_FA(struct mlx4_dev *dev)
 {
-	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B);
+	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA,
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
 
 int mlx4_RUN_FW(struct mlx4_dev *dev)
 {
-	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A);
+	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW,
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 }
 
 int mlx4_QUERY_FW(struct mlx4_dev *dev)
@@ -595,7 +600,7 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev)
 	outbox = mailbox->buf;
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 	if (err)
 		goto out;
 
@@ -711,7 +716,7 @@ int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter)
 	outbox = mailbox->buf;
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER,
-			   MLX4_CMD_TIME_CLASS_A);
+			   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 	if (err)
 		goto out;
 
@@ -834,7 +839,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
 	MLX4_PUT(inbox, param->log_uar_sz,      INIT_HCA_LOG_UAR_SZ_OFFSET);
 
-	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000);
+	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000,
+		       MLX4_CMD_NATIVE);
 
 	if (err)
 		mlx4_err(dev, "INIT_HCA returns %d\n", err);
@@ -886,12 +892,12 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port)
 		MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET);
 
 		err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT,
-			       MLX4_CMD_TIME_CLASS_A);
+			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 
 		mlx4_free_cmd_mailbox(dev, mailbox);
 	} else
 		err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
-			       MLX4_CMD_TIME_CLASS_A);
+			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 
 	return err;
 }
@@ -899,20 +905,22 @@ EXPORT_SYMBOL_GPL(mlx4_INIT_PORT);
 
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port)
 {
-	return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000);
+	return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000,
+			MLX4_CMD_WRAPPED);
 }
 EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT);
 
 int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic)
 {
-	return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000);
+	return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000,
+			MLX4_CMD_NATIVE);
 }
 
 int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
 {
 	int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0,
 			       MLX4_CMD_SET_ICM_SIZE,
-			       MLX4_CMD_TIME_CLASS_A);
+			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 	if (ret)
 		return ret;
 
@@ -929,7 +937,7 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
 int mlx4_NOP(struct mlx4_dev *dev)
 {
 	/* Input modifier of 0x1f means "finish as soon as possible." */
-	return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100);
+	return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100, MLX4_CMD_NATIVE);
 }
 
 #define MLX4_WOL_SETUP_MODE (5 << 28)
@@ -938,7 +946,8 @@ int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port)
 	u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
 
 	return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3,
-			    MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A,
+			    MLX4_CMD_NATIVE);
 }
 EXPORT_SYMBOL_GPL(mlx4_wol_read);
 
@@ -947,6 +956,6 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port)
 	u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
 
 	return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG,
-					MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 }
 EXPORT_SYMBOL_GPL(mlx4_wol_write);
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 02393fdf44c1..a9ade1c3cad5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -213,7 +213,7 @@ static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt)
 static int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
 {
 	return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM,
-			MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
 int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
@@ -223,7 +223,8 @@ int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
 
 int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
 {
-	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX, MLX4_CMD_TIME_CLASS_B);
+	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX,
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
 int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 978688c31046..4187f7bbd793 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -48,14 +48,14 @@ static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index,
 			   struct mlx4_cmd_mailbox *mailbox)
 {
 	return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 }
 
 static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index,
 			    struct mlx4_cmd_mailbox *mailbox)
 {
 	return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 }
 
 static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 vep_num, u8 port, u8 steer,
@@ -65,7 +65,8 @@ static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 vep_num, u8 port, u8 stee
 
 	in_mod = (u32) vep_num << 24 | (u32) port << 16 | steer << 1;
 	return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1,
-			MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A,
+			MLX4_CMD_NATIVE);
 }
 
 static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -75,7 +76,8 @@ static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 	int err;
 
 	err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod,
-			   MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A);
+			   MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A,
+			   MLX4_CMD_NATIVE);
 
 	if (!err)
 		*hash = imm;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index efa3e77355e4..057b22d64a05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -254,14 +254,15 @@ static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 			  int mpt_index)
 {
 	return mlx4_cmd(dev, mailbox->dma, mpt_index, 0, MLX4_CMD_SW2HW_MPT,
-			MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			  int mpt_index)
 {
 	return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
-			    !mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B);
+			    !mailbox, MLX4_CMD_HW2SW_MPT,
+			    MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 }
 
 int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
@@ -663,6 +664,7 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_free);
 
 int mlx4_SYNC_TPT(struct mlx4_dev *dev)
 {
-	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000);
+	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000,
+			MLX4_CMD_WRAPPED);
 }
 EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index d942aea4927b..da9f85c6da7e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -85,7 +85,7 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
 
 	in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
 	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -326,7 +326,7 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
 	memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE);
 	in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port;
 	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 
@@ -462,7 +462,8 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
 	*(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
 
 	err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
-			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
 	if (!err)
 		*caps = *(__be32 *) (outbuf + 84);
 	mlx4_free_cmd_mailbox(dev, inmailbox);
@@ -499,7 +500,8 @@ int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port)
 	*(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
 
 	err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
-			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
 
 	packet_error = be16_to_cpu(*(__be16 *) (outbuf + 4));
 
@@ -528,7 +530,7 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
 
 	((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port];
 	err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 15f870cb2590..e721f4cd34f8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -119,7 +119,8 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 
 	if (op[cur_state][new_state] == MLX4_CMD_2RST_QP)
 		return mlx4_cmd(dev, 0, qp->qpn, 2,
-				MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A);
+				MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A,
+				MLX4_CMD_WRAPPED);
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
@@ -140,7 +141,8 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 
 	ret = mlx4_cmd(dev, mailbox->dma, qp->qpn | (!!sqd_event << 31),
 		       new_state == MLX4_QP_STATE_RST ? 2 : 0,
-		       op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C);
+		       op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C,
+		       MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return ret;
@@ -265,7 +267,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_free);
 static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
 {
 	return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
-			MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
 int mlx4_init_qp_table(struct mlx4_dev *dev)
@@ -342,7 +344,8 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
 		return PTR_ERR(mailbox);
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
-			   MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A);
+			   MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A,
+			   MLX4_CMD_WRAPPED);
 	if (!err)
 		memcpy(context, mailbox->buf + 8, sizeof *context);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/sense.c b/drivers/net/ethernet/mellanox/mlx4/sense.c
index e2337a7411d9..802498293528 100644
--- a/drivers/net/ethernet/mellanox/mlx4/sense.c
+++ b/drivers/net/ethernet/mellanox/mlx4/sense.c
@@ -45,7 +45,8 @@ int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
 	int err = 0;
 
 	err = mlx4_cmd_imm(dev, 0, &out_param, port, 0,
-			   MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B);
+			   MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B,
+			   MLX4_CMD_WRAPPED);
 	if (err) {
 		mlx4_err(dev, "Sense command failed for port: %d\n", port);
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 9cbf3fce0145..f4ca096db62a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -86,7 +86,7 @@ static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 			  int srq_num)
 {
 	return mlx4_cmd(dev, mailbox->dma, srq_num, 0, MLX4_CMD_SW2HW_SRQ,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -94,20 +94,20 @@ static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 {
 	return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
 			    mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
 {
 	return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ,
-			MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			  int srq_num)
 {
 	return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index e8e92814c8a0..ae62630a665e 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -173,6 +173,11 @@ enum {
 	MLX4_SET_PORT_GID_TABLE = 0x5,
 };
 
+enum {
+	MLX4_CMD_WRAPPED,
+	MLX4_CMD_NATIVE
+};
+
 struct mlx4_dev;
 
 struct mlx4_cmd_mailbox {
@@ -182,23 +187,24 @@ struct mlx4_cmd_mailbox {
 
 int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 	       int out_is_imm, u32 in_modifier, u8 op_modifier,
-	       u16 op, unsigned long timeout);
+	       u16 op, unsigned long timeout, int native);
 
 /* Invoke a command with no output parameter */
 static inline int mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u32 in_modifier,
-			   u8 op_modifier, u16 op, unsigned long timeout)
+			   u8 op_modifier, u16 op, unsigned long timeout,
+			   int native)
 {
 	return __mlx4_cmd(dev, in_param, NULL, 0, in_modifier,
-			  op_modifier, op, timeout);
+			  op_modifier, op, timeout, native);
 }
 
 /* Invoke a command with an output mailbox */
 static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param,
 			       u32 in_modifier, u8 op_modifier, u16 op,
-			       unsigned long timeout)
+			       unsigned long timeout, int native)
 {
 	return __mlx4_cmd(dev, in_param, &out_param, 0, in_modifier,
-			  op_modifier, op, timeout);
+			  op_modifier, op, timeout, native);
 }
 
 /*
@@ -208,10 +214,10 @@ static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param
  */
 static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 			       u32 in_modifier, u8 op_modifier, u16 op,
-			       unsigned long timeout)
+			       unsigned long timeout, int native)
 {
 	return __mlx4_cmd(dev, in_param, out_param, 1, in_modifier,
-			  op_modifier, op, timeout);
+			  op_modifier, op, timeout, native);
 }
 
 struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev);
-- 
cgit v1.2.3


From f5311ac109b21c9b47118655a5b6d887bcc686f8 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:12:13 +0000
Subject: mlx4_core: Reduce number of PD bits to 17

When SRIOV is enabled on the chip (at FW burning time),
the HCA uses only 17 bits for the PD. The remaining 7 high-order bits
are ignored.

Change the allocator to return only 17 bits for the PD.  The MSB 7
bits will be used to encode the slave number for consistency
checking later on in the resource tracker.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  5 ++++-
 drivers/net/ethernet/mellanox/mlx4/pd.c   | 19 ++++++++++++++-----
 include/linux/mlx4/device.h               |  1 +
 3 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 69177614666f..51cba262bafc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -420,7 +420,7 @@ struct mlx4_mfunc_master_ctx {
 	struct work_struct	slave_event_work;
 	struct work_struct	slave_flr_event_work;
 	spinlock_t		slave_state_lock;
-	u32			comm_arm_bit_vector[4];
+	__be32			comm_arm_bit_vector[4];
 	struct mlx4_eqe		cmd_eqe;
 	struct mlx4_slave_event_eq slave_eq;
 	struct mutex		gen_eqe_mutex[MLX4_MFUNC_MAX];
@@ -914,4 +914,7 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
 			       struct mlx4_cmd_mailbox *inbox,
 			       struct mlx4_cmd_mailbox *outbox,
 			       struct mlx4_cmd_info *cmd);
+
+#define NOT_MASKED_PD_BITS 17
+
 #endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 260ed259ce9b..5c9a54df17ab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/io-mapping.h>
@@ -51,7 +52,8 @@ int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn)
 	*pdn = mlx4_bitmap_alloc(&priv->pd_bitmap);
 	if (*pdn == -1)
 		return -ENOMEM;
-
+	if (mlx4_is_mfunc(dev))
+		*pdn |= (dev->caps.function + 1) << NOT_MASKED_PD_BITS;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_pd_alloc);
@@ -85,7 +87,8 @@ int mlx4_init_pd_table(struct mlx4_dev *dev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds,
-				(1 << 24) - 1, dev->caps.reserved_pds, 0);
+				(1 << NOT_MASKED_PD_BITS) - 1,
+				 dev->caps.reserved_pds, 0);
 }
 
 void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
@@ -108,13 +111,19 @@ void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev)
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
 {
+	int offset;
+
 	uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap);
 	if (uar->index == -1)
 		return -ENOMEM;
 
-	uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
+	if (mlx4_is_slave(dev))
+		offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) /
+				       dev->caps.uar_page_size);
+	else
+		offset = uar->index;
+	uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset;
 	uar->map = NULL;
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_uar_alloc);
@@ -232,7 +241,7 @@ int mlx4_init_uar_table(struct mlx4_dev *dev)
 
 	return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
 				dev->caps.num_uars, dev->caps.num_uars - 1,
-				max(128, dev->caps.reserved_uars), 0);
+				dev->caps.reserved_uars, 0);
 }
 
 void mlx4_cleanup_uar_table(struct mlx4_dev *dev)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3333018d2913..e4be34a908a7 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -248,6 +248,7 @@ struct mlx4_caps {
 	u64			trans_code[MLX4_MAX_PORTS + 1];
 	int			local_ca_ack_delay;
 	int			num_uars;
+	u32			uar_page_size;
 	int			bf_reg_size;
 	int			bf_regs_per_page;
 	int			max_sq_sg;
-- 
cgit v1.2.3


From ffe455ad04681f3fc48eef595fe526a795f809a3 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.co.il>
Date: Tue, 13 Dec 2011 04:16:21 +0000
Subject: mlx4: Ethernet port management modifications

The physical port is now common to the PF and VFs.
The port resources and configuration is managed by the PF, VFs can
only influence the MTU of the port, it is set as max among all functions,
Each function allocates RX buffers of required size to meet it's MTU enforcement.
Port management code was moved to mlx4_core, as the mlx4_en module is
virtualization unaware

Move handling qp functionality to mlx4_get_eth_qp/mlx4_put_eth_qp
including reserve/release range and add/release unicast steering.
Let mlx4_register/unregister_mac deal only with MAC (un)registration.

Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c           |  37 ++
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     |  32 +-
 drivers/net/ethernet/mellanox/mlx4/en_port.c       |  77 ---
 drivers/net/ethernet/mellanox/mlx4/en_port.h       |  37 --
 drivers/net/ethernet/mellanox/mlx4/mcg.c           |   4 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  53 ++
 drivers/net/ethernet/mellanox/mlx4/port.c          | 606 +++++++++++++++++----
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  33 +-
 include/linux/mlx4/device.h                        |  12 +-
 9 files changed, 630 insertions(+), 261 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 0f2069d98274..8e6e4b20b0e2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -653,6 +653,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_QUERY_PORT_wrapper
 	},
+	{
+		.opcode = MLX4_CMD_SET_PORT,
+		.has_inbox = true,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_SET_PORT_wrapper
+	},
 	{
 		.opcode = MLX4_CMD_MAP_EQ,
 		.has_inbox = false,
@@ -1005,6 +1014,34 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_PROMISC_wrapper
 	},
+	/* Ethernet specific commands */
+	{
+		.opcode = MLX4_CMD_SET_VLAN_FLTR,
+		.has_inbox = true,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_SET_VLAN_FLTR_wrapper
+	},
+	{
+		.opcode = MLX4_CMD_SET_MCAST_FLTR,
+		.has_inbox = false,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_SET_MCAST_FLTR_wrapper
+	},
+	{
+		.opcode = MLX4_CMD_DUMP_ETH_STATS,
+		.has_inbox = false,
+		.has_outbox = true,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_DUMP_ETH_STATS_wrapper
+	},
 	{
 		.opcode = MLX4_CMD_INFORM_FLR_DONE,
 		.has_inbox = false,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 2083f3b5d689..1db6fea495bf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -136,7 +136,7 @@ static void mlx4_en_do_set_mac(struct work_struct *work)
 	if (priv->port_up) {
 		/* Remove old MAC and insert the new one */
 		err = mlx4_replace_mac(mdev->dev, priv->port,
-				       priv->base_qpn, priv->mac, 0);
+				       priv->base_qpn, priv->mac);
 		if (err)
 			en_err(priv, "Failed changing HW MAC address\n");
 	} else
@@ -207,6 +207,16 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
 		goto out;
 	}
 
+	if (!netif_carrier_ok(dev)) {
+		if (!mlx4_en_QUERY_PORT(mdev, priv->port)) {
+			if (priv->port_state.link_state) {
+				priv->last_link_state = MLX4_DEV_EVENT_PORT_UP;
+				netif_carrier_on(dev);
+				en_dbg(LINK, priv, "Link Up\n");
+			}
+		}
+	}
+
 	/*
 	 * Promsicuous mode: disable all filters
 	 */
@@ -602,12 +612,12 @@ int mlx4_en_start_port(struct net_device *dev)
 		++rx_index;
 	}
 
-	/* Set port mac number */
-	en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port);
-	err = mlx4_register_mac(mdev->dev, priv->port,
-				priv->mac, &priv->base_qpn, 0);
+	/* Set qp number */
+	en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port);
+	err = mlx4_get_eth_qp(mdev->dev, priv->port,
+				priv->mac, &priv->base_qpn);
 	if (err) {
-		en_err(priv, "Failed setting port mac\n");
+		en_err(priv, "Failed getting eth qp\n");
 		goto cq_err;
 	}
 	mdev->mac_removed[priv->port] = 0;
@@ -702,7 +712,7 @@ tx_err:
 
 	mlx4_en_release_rss_steer(priv);
 mac_err:
-	mlx4_unregister_mac(mdev->dev, priv->port, priv->base_qpn);
+	mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn);
 cq_err:
 	while (rx_index--)
 		mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]);
@@ -748,10 +758,6 @@ void mlx4_en_stop_port(struct net_device *dev)
 	/* Flush multicast filter */
 	mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG);
 
-	/* Unregister Mac address for the port */
-	mlx4_unregister_mac(mdev->dev, priv->port, priv->base_qpn);
-	mdev->mac_removed[priv->port] = 1;
-
 	/* Free TX Rings */
 	for (i = 0; i < priv->tx_ring_num; i++) {
 		mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]);
@@ -765,6 +771,10 @@ void mlx4_en_stop_port(struct net_device *dev)
 	/* Free RSS qps */
 	mlx4_en_release_rss_steer(priv);
 
+	/* Unregister Mac address for the port */
+	mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn);
+	mdev->mac_removed[priv->port] = 1;
+
 	/* Free RX Rings */
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index ae120effb8a5..331791467a22 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -41,14 +41,6 @@
 #include "mlx4_en.h"
 
 
-int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
-			u64 mac, u64 clear, u8 mode)
-{
-	return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
-			MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
-			MLX4_CMD_WRAPPED);
-}
-
 int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
 {
 	struct mlx4_cmd_mailbox *mailbox;
@@ -78,75 +70,6 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
 	return err;
 }
 
-
-int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
-			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
-{
-	struct mlx4_cmd_mailbox *mailbox;
-	struct mlx4_set_port_general_context *context;
-	int err;
-	u32 in_mod;
-
-	mailbox = mlx4_alloc_cmd_mailbox(dev);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
-	context = mailbox->buf;
-	memset(context, 0, sizeof *context);
-
-	context->flags = SET_PORT_GEN_ALL_VALID;
-	context->mtu = cpu_to_be16(mtu);
-	context->pptx = (pptx * (!pfctx)) << 7;
-	context->pfctx = pfctx;
-	context->pprx = (pprx * (!pfcrx)) << 7;
-	context->pfcrx = pfcrx;
-
-	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
-
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
-}
-
-int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
-			   u8 promisc)
-{
-	struct mlx4_cmd_mailbox *mailbox;
-	struct mlx4_set_port_rqp_calc_context *context;
-	int err;
-	u32 in_mod;
-	u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ?
-						MCAST_DIRECT : MCAST_DEFAULT;
-
-	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER  &&
-			dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)
-		return 0;
-
-	mailbox = mlx4_alloc_cmd_mailbox(dev);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
-	context = mailbox->buf;
-	memset(context, 0, sizeof *context);
-
-	context->base_qpn = cpu_to_be32(base_qpn);
-	context->n_mac = dev->caps.log_num_macs;
-	context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
-				       base_qpn);
-	context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
-				     base_qpn);
-	context->intra_no_vlan = 0;
-	context->no_vlan = MLX4_NO_VLAN_IDX;
-	context->intra_vlan_miss = 0;
-	context->vlan_miss = MLX4_VLAN_MISS_IDX;
-
-	in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
-
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
-}
-
 int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
 {
 	struct mlx4_en_query_port_context *qport_context;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h
index c1bb834414b5..6934fd7e66ed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.h
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h
@@ -39,43 +39,6 @@
 #define SET_PORT_PROMISC_SHIFT	31
 #define SET_PORT_MC_PROMISC_SHIFT	30
 
-enum {
-	MCAST_DIRECT_ONLY       = 0,
-	MCAST_DIRECT            = 1,
-	MCAST_DEFAULT           = 2
-};
-
-struct mlx4_set_port_general_context {
-	u8 reserved[3];
-	u8 flags;
-	u16 reserved2;
-	__be16 mtu;
-	u8 pptx;
-	u8 pfctx;
-	u16 reserved3;
-	u8 pprx;
-	u8 pfcrx;
-	u16 reserved4;
-};
-
-struct mlx4_set_port_rqp_calc_context {
-	__be32 base_qpn;
-	u8 rererved;
-	u8 n_mac;
-	u8 n_vlan;
-	u8 n_prio;
-	u8 reserved2[3];
-	u8 mac_miss;
-	u8 intra_no_vlan;
-	u8 no_vlan;
-	u8 intra_vlan_miss;
-	u8 vlan_miss;
-	u8 reserved3[3];
-	u8 no_vlan_prio;
-	__be32 promisc;
-	__be32 mcast;
-};
-
 #define VLAN_FLTR_SIZE	128
 struct mlx4_set_vlan_fltr_mbox {
 	__be32 entry[VLAN_FLTR_SIZE];
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index b36c279bcca0..0785d9b2a265 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -913,7 +913,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 }
 EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
 
-static int mlx4_unicast_attach(struct mlx4_dev *dev,
+int mlx4_unicast_attach(struct mlx4_dev *dev,
 			struct mlx4_qp *qp, u8 gid[16],
 			int block_mcast_loopback, enum mlx4_protocol prot)
 {
@@ -933,7 +933,7 @@ static int mlx4_unicast_attach(struct mlx4_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx4_unicast_attach);
 
-static int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp,
 			       u8 gid[16], enum mlx4_protocol prot)
 {
 	if (prot == MLX4_PROT_ETH &&
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index a38ffc997367..abf65d8af48d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -419,12 +419,23 @@ struct mlx4_comm {
 	u32			slave_read;
 };
 
+enum {
+	MLX4_MCAST_CONFIG       = 0,
+	MLX4_MCAST_DISABLE      = 1,
+	MLX4_MCAST_ENABLE       = 2,
+};
+
 #define VLAN_FLTR_SIZE	128
 
 struct mlx4_vlan_fltr {
 	__be32 entry[VLAN_FLTR_SIZE];
 };
 
+struct mlx4_mcast_entry {
+	struct list_head list;
+	u64 addr;
+};
+
 struct mlx4_promisc_qp {
 	struct list_head list;
 	u32 qpn;
@@ -615,6 +626,48 @@ struct mlx4_vlan_table {
 	int			max;
 };
 
+#define SET_PORT_GEN_ALL_VALID		0x7
+#define SET_PORT_PROMISC_SHIFT		31
+#define SET_PORT_MC_PROMISC_SHIFT	30
+
+enum {
+	MCAST_DIRECT_ONLY	= 0,
+	MCAST_DIRECT		= 1,
+	MCAST_DEFAULT		= 2
+};
+
+
+struct mlx4_set_port_general_context {
+	u8 reserved[3];
+	u8 flags;
+	u16 reserved2;
+	__be16 mtu;
+	u8 pptx;
+	u8 pfctx;
+	u16 reserved3;
+	u8 pprx;
+	u8 pfcrx;
+	u16 reserved4;
+};
+
+struct mlx4_set_port_rqp_calc_context {
+	__be32 base_qpn;
+	u8 rererved;
+	u8 n_mac;
+	u8 n_vlan;
+	u8 n_prio;
+	u8 reserved2[3];
+	u8 mac_miss;
+	u8 intra_no_vlan;
+	u8 no_vlan;
+	u8 intra_vlan_miss;
+	u8 vlan_miss;
+	u8 reserved3[3];
+	u8 no_vlan_prio;
+	__be32 promisc;
+	__be32 mcast;
+};
+
 struct mlx4_mac_entry {
 	u64 mac;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index da9f85c6da7e..00a9547773c1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -70,41 +70,12 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
 	table->total = 0;
 }
 
-static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
-				   __be64 *entries)
-{
-	struct mlx4_cmd_mailbox *mailbox;
-	u32 in_mod;
-	int err;
-
-	mailbox = mlx4_alloc_cmd_mailbox(dev);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
-
-	memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
-
-	in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
-
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
-}
-
-static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port,
-			     u64 mac, int *qpn, u8 reserve)
+static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
 {
 	struct mlx4_qp qp;
 	u8 gid[16] = {0};
 	int err;
 
-	if (reserve) {
-		err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
-		if (err) {
-			mlx4_err(dev, "Failed to reserve qp for mac registration\n");
-			return err;
-		}
-	}
 	qp.qpn = *qpn;
 
 	mac &= 0xffffffffffffULL;
@@ -113,16 +84,15 @@ static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port,
 	gid[5] = port;
 	gid[7] = MLX4_UC_STEER << 1;
 
-	err = mlx4_qp_attach_common(dev, &qp, gid, 0,
-				    MLX4_PROT_ETH, MLX4_UC_STEER);
-	if (err && reserve)
-		mlx4_qp_release_range(dev, *qpn, 1);
+	err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
+	if (err)
+		mlx4_warn(dev, "Failed Attaching Unicast\n");
 
 	return err;
 }
 
 static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port,
-				  u64 mac, int qpn, u8 free)
+				  u64 mac, int qpn)
 {
 	struct mlx4_qp qp;
 	u8 gid[16] = {0};
@@ -134,60 +104,164 @@ static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port,
 	gid[5] = port;
 	gid[7] = MLX4_UC_STEER << 1;
 
-	mlx4_qp_detach_common(dev, &qp, gid, MLX4_PROT_ETH, MLX4_UC_STEER);
-	if (free)
-		mlx4_qp_release_range(dev, qpn, 1);
+	mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
+}
+
+static int validate_index(struct mlx4_dev *dev,
+			  struct mlx4_mac_table *table, int index)
+{
+	int err = 0;
+
+	if (index < 0 || index >= table->max || !table->entries[index]) {
+		mlx4_warn(dev, "No valid Mac entry for the given index\n");
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static int find_index(struct mlx4_dev *dev,
+		      struct mlx4_mac_table *table, u64 mac)
+{
+	int i;
+
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if ((mac & MLX4_MAC_MASK) ==
+		    (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
+			return i;
+	}
+	/* Mac not found */
+	return -EINVAL;
 }
 
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap)
+int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
-	struct mlx4_mac_table *table = &info->mac_table;
 	struct mlx4_mac_entry *entry;
-	int i, err = 0;
-	int free = -1;
+	int index = 0;
+	int err = 0;
 
-	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
-		err = mlx4_uc_steer_add(dev, port, mac, qpn, 1);
-		if (err)
-			return err;
+	mlx4_dbg(dev, "Registering MAC: 0x%llx for adding\n",
+			(unsigned long long) mac);
+	index = mlx4_register_mac(dev, port, mac);
+	if (index < 0) {
+		err = index;
+		mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+			 (unsigned long long) mac);
+		return err;
+	}
 
-		entry = kmalloc(sizeof *entry, GFP_KERNEL);
-		if (!entry) {
-			mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
-			return -ENOMEM;
-		}
+	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) {
+		*qpn = info->base_qpn + index;
+		return 0;
+	}
+
+	err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
+	mlx4_dbg(dev, "Reserved qp %d\n", *qpn);
+	if (err) {
+		mlx4_err(dev, "Failed to reserve qp for mac registration\n");
+		goto qp_err;
+	}
+
+	err = mlx4_uc_steer_add(dev, port, mac, qpn);
+	if (err)
+		goto steer_err;
+
+	entry = kmalloc(sizeof *entry, GFP_KERNEL);
+	if (!entry) {
+		err = -ENOMEM;
+		goto alloc_err;
+	}
+	entry->mac = mac;
+	err = radix_tree_insert(&info->mac_tree, *qpn, entry);
+	if (err)
+		goto insert_err;
+	return 0;
+
+insert_err:
+	kfree(entry);
+
+alloc_err:
+	mlx4_uc_steer_release(dev, port, mac, *qpn);
+
+steer_err:
+	mlx4_qp_release_range(dev, *qpn, 1);
 
-		entry->mac = mac;
-		err = radix_tree_insert(&info->mac_tree, *qpn, entry);
-		if (err) {
+qp_err:
+	mlx4_unregister_mac(dev, port, mac);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_eth_qp);
+
+void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+	struct mlx4_mac_entry *entry;
+
+	mlx4_dbg(dev, "Registering MAC: 0x%llx for deleting\n",
+		 (unsigned long long) mac);
+	mlx4_unregister_mac(dev, port, mac);
+
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
+		entry = radix_tree_lookup(&info->mac_tree, qpn);
+		if (entry) {
+			mlx4_dbg(dev, "Releasing qp: port %d, mac 0x%llx,"
+				 " qpn %d\n", port,
+				 (unsigned long long) mac, qpn);
+			mlx4_uc_steer_release(dev, port, entry->mac, qpn);
+			mlx4_qp_release_range(dev, qpn, 1);
+			radix_tree_delete(&info->mac_tree, qpn);
 			kfree(entry);
-			mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
-			return err;
 		}
 	}
+}
+EXPORT_SYMBOL_GPL(mlx4_put_eth_qp);
+
+static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
+				   __be64 *entries)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	u32 in_mod;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
+
+	in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
 
-	mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac);
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+	struct mlx4_mac_table *table = &info->mac_table;
+	int i, err = 0;
+	int free = -1;
+
+	mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n",
+		 (unsigned long long) mac, port);
 
 	mutex_lock(&table->mutex);
-	for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
-		if (free < 0 && !table->refs[i]) {
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if (free < 0 && !table->entries[i]) {
 			free = i;
 			continue;
 		}
 
 		if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
-			/* MAC already registered, increase references count */
-			++table->refs[i];
+			/* MAC already registered, Must not have duplicates */
+			err = -EEXIST;
 			goto out;
 		}
 	}
 
-	if (free < 0) {
-		err = -ENOMEM;
-		goto out;
-	}
-
 	mlx4_dbg(dev, "Free MAC index is %d\n", free);
 
 	if (table->total == table->max) {
@@ -197,103 +271,103 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap)
 	}
 
 	/* Register new MAC */
-	table->refs[free] = 1;
 	table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID);
 
 	err = mlx4_set_port_mac_table(dev, port, table->entries);
 	if (unlikely(err)) {
-		mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) mac);
-		table->refs[free] = 0;
+		mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+			 (unsigned long long) mac);
 		table->entries[free] = 0;
 		goto out;
 	}
 
-	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER))
-		*qpn = info->base_qpn + free;
+	err = free;
 	++table->total;
 out:
 	mutex_unlock(&table->mutex);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx4_register_mac);
+EXPORT_SYMBOL_GPL(__mlx4_register_mac);
 
-static int validate_index(struct mlx4_dev *dev,
-			  struct mlx4_mac_table *table, int index)
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
-	int err = 0;
+	u64 out_param;
+	int err;
 
-	if (index < 0 || index >= table->max || !table->entries[index]) {
-		mlx4_warn(dev, "No valid Mac entry for the given index\n");
-		err = -EINVAL;
-	}
-	return err;
-}
+	if (mlx4_is_mfunc(dev)) {
+		set_param_l(&out_param, port);
+		err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		if (err)
+			return err;
 
-static int find_index(struct mlx4_dev *dev,
-		      struct mlx4_mac_table *table, u64 mac)
-{
-	int i;
-	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
-		if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
-			return i;
+		return get_param_l(&out_param);
 	}
-	/* Mac not found */
-	return -EINVAL;
+	return __mlx4_register_mac(dev, port, mac);
 }
+EXPORT_SYMBOL_GPL(mlx4_register_mac);
+
 
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int qpn)
+void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
 	struct mlx4_mac_table *table = &info->mac_table;
-	int index = qpn - info->base_qpn;
-	struct mlx4_mac_entry *entry;
+	int index;
 
-	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
-		entry = radix_tree_lookup(&info->mac_tree, qpn);
-		if (entry) {
-			mlx4_uc_steer_release(dev, port, entry->mac, qpn, 1);
-			radix_tree_delete(&info->mac_tree, qpn);
-			index = find_index(dev, table, entry->mac);
-			kfree(entry);
-		}
-	}
+	index = find_index(dev, table, mac);
 
 	mutex_lock(&table->mutex);
 
 	if (validate_index(dev, table, index))
 		goto out;
 
-	/* Check whether this address has reference count */
-	if (!(--table->refs[index])) {
-		table->entries[index] = 0;
-		mlx4_set_port_mac_table(dev, port, table->entries);
-		--table->total;
-	}
+	table->entries[index] = 0;
+	mlx4_set_port_mac_table(dev, port, table->entries);
+	--table->total;
 out:
 	mutex_unlock(&table->mutex);
 }
+EXPORT_SYMBOL_GPL(__mlx4_unregister_mac);
+
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+	u64 out_param;
+	int err;
+
+	if (mlx4_is_mfunc(dev)) {
+		set_param_l(&out_param, port);
+		err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		return;
+	}
+	__mlx4_unregister_mac(dev, port, mac);
+	return;
+}
 EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
 
-int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac, u8 wrap)
+int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
 	struct mlx4_mac_table *table = &info->mac_table;
-	int index = qpn - info->base_qpn;
 	struct mlx4_mac_entry *entry;
-	int err;
+	int index = qpn - info->base_qpn;
+	int err = 0;
 
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
 		entry = radix_tree_lookup(&info->mac_tree, qpn);
 		if (!entry)
 			return -EINVAL;
-		index = find_index(dev, table, entry->mac);
-		mlx4_uc_steer_release(dev, port, entry->mac, qpn, 0);
+		mlx4_uc_steer_release(dev, port, entry->mac, qpn);
+		mlx4_unregister_mac(dev, port, entry->mac);
 		entry->mac = new_mac;
-		err = mlx4_uc_steer_add(dev, port, entry->mac, &qpn, 0);
-		if (err || index < 0)
-			return err;
+		mlx4_register_mac(dev, port, new_mac);
+		err = mlx4_uc_steer_add(dev, port, entry->mac, &qpn);
+		return err;
 	}
 
+	/* CX1 doesn't support multi-functions */
 	mutex_lock(&table->mutex);
 
 	err = validate_index(dev, table, index);
@@ -304,7 +378,8 @@ int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac, u8 wra
 
 	err = mlx4_set_port_mac_table(dev, port, table->entries);
 	if (unlikely(err)) {
-		mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) new_mac);
+		mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+			 (unsigned long long) new_mac);
 		table->entries[index] = 0;
 	}
 out:
@@ -312,6 +387,7 @@ out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx4_replace_mac);
+
 static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
 				    __be32 *entries)
 {
@@ -352,7 +428,8 @@ int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx)
 }
 EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan);
 
-int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+static int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
+				int *index)
 {
 	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
 	int i, err = 0;
@@ -387,7 +464,7 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
 		goto out;
 	}
 
-	/* Register new MAC */
+	/* Register new VLAN */
 	table->refs[free] = 1;
 	table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
 
@@ -405,9 +482,27 @@ out:
 	mutex_unlock(&table->mutex);
 	return err;
 }
+
+int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+{
+	u64 out_param;
+	int err;
+
+	if (mlx4_is_mfunc(dev)) {
+		set_param_l(&out_param, port);
+		err = mlx4_cmd_imm(dev, vlan, &out_param, RES_VLAN,
+				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		if (!err)
+			*index = get_param_l(&out_param);
+
+		return err;
+	}
+	return __mlx4_register_vlan(dev, port, vlan, index);
+}
 EXPORT_SYMBOL_GPL(mlx4_register_vlan);
 
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+static void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
 {
 	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
 
@@ -432,6 +527,25 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
 out:
 	mutex_unlock(&table->mutex);
 }
+
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+{
+	u64 in_param;
+	int err;
+
+	if (mlx4_is_mfunc(dev)) {
+		set_param_l(&in_param, port);
+		err = mlx4_cmd(dev, in_param, RES_VLAN, RES_OP_RESERVE_AND_MAP,
+			       MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+			       MLX4_CMD_WRAPPED);
+		if (!err)
+			mlx4_warn(dev, "Failed freeing vlan at index:%d\n",
+					index);
+
+		return;
+	}
+	__mlx4_unregister_vlan(dev, port, index);
+}
 EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
 
 int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
@@ -514,6 +628,139 @@ int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port)
 	return err;
 }
 
+static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
+				u8 op_mod, struct mlx4_cmd_mailbox *inbox)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_port_info *port_info;
+	struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master;
+	struct mlx4_slave_state *slave_st = &master->slave_state[slave];
+	struct mlx4_set_port_rqp_calc_context *qpn_context;
+	struct mlx4_set_port_general_context *gen_context;
+	int reset_qkey_viols;
+	int port;
+	int is_eth;
+	u32 in_modifier;
+	u32 promisc;
+	u16 mtu, prev_mtu;
+	int err;
+	int i;
+	__be32 agg_cap_mask;
+	__be32 slave_cap_mask;
+	__be32 new_cap_mask;
+
+	port = in_mod & 0xff;
+	in_modifier = in_mod >> 8;
+	is_eth = op_mod;
+	port_info = &priv->port[port];
+
+	/* Slaves cannot perform SET_PORT operations except changing MTU */
+	if (is_eth) {
+		if (slave != dev->caps.function &&
+		    in_modifier != MLX4_SET_PORT_GENERAL) {
+			mlx4_warn(dev, "denying SET_PORT for slave:%d\n",
+					slave);
+			return -EINVAL;
+		}
+		switch (in_modifier) {
+		case MLX4_SET_PORT_RQP_CALC:
+			qpn_context = inbox->buf;
+			qpn_context->base_qpn =
+				cpu_to_be32(port_info->base_qpn);
+			qpn_context->n_mac = 0x7;
+			promisc = be32_to_cpu(qpn_context->promisc) >>
+				SET_PORT_PROMISC_SHIFT;
+			qpn_context->promisc = cpu_to_be32(
+				promisc << SET_PORT_PROMISC_SHIFT |
+				port_info->base_qpn);
+			promisc = be32_to_cpu(qpn_context->mcast) >>
+				SET_PORT_MC_PROMISC_SHIFT;
+			qpn_context->mcast = cpu_to_be32(
+				promisc << SET_PORT_MC_PROMISC_SHIFT |
+				port_info->base_qpn);
+			break;
+		case MLX4_SET_PORT_GENERAL:
+			gen_context = inbox->buf;
+			/* Mtu is configured as the max MTU among all the
+			 * the functions on the port. */
+			mtu = be16_to_cpu(gen_context->mtu);
+			mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port]);
+			prev_mtu = slave_st->mtu[port];
+			slave_st->mtu[port] = mtu;
+			if (mtu > master->max_mtu[port])
+				master->max_mtu[port] = mtu;
+			if (mtu < prev_mtu && prev_mtu ==
+						master->max_mtu[port]) {
+				slave_st->mtu[port] = mtu;
+				master->max_mtu[port] = mtu;
+				for (i = 0; i < dev->num_slaves; i++) {
+					master->max_mtu[port] =
+					max(master->max_mtu[port],
+					    master->slave_state[i].mtu[port]);
+				}
+			}
+
+			gen_context->mtu = cpu_to_be16(master->max_mtu[port]);
+			break;
+		}
+		return mlx4_cmd(dev, inbox->dma, in_mod, op_mod,
+				MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+				MLX4_CMD_NATIVE);
+	}
+
+	/* For IB, we only consider:
+	 * - The capability mask, which is set to the aggregate of all
+	 *   slave function capabilities
+	 * - The QKey violatin counter - reset according to each request.
+	 */
+
+	if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+		reset_qkey_viols = (*(u8 *) inbox->buf) & 0x40;
+		new_cap_mask = ((__be32 *) inbox->buf)[2];
+	} else {
+		reset_qkey_viols = ((u8 *) inbox->buf)[3] & 0x1;
+		new_cap_mask = ((__be32 *) inbox->buf)[1];
+	}
+
+	agg_cap_mask = 0;
+	slave_cap_mask =
+		priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+	priv->mfunc.master.slave_state[slave].ib_cap_mask[port] = new_cap_mask;
+	for (i = 0; i < dev->num_slaves; i++)
+		agg_cap_mask |=
+			priv->mfunc.master.slave_state[i].ib_cap_mask[port];
+
+	/* only clear mailbox for guests.  Master may be setting
+	* MTU or PKEY table size
+	*/
+	if (slave != dev->caps.function)
+		memset(inbox->buf, 0, 256);
+	if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+		*(u8 *) inbox->buf	   = !!reset_qkey_viols << 6;
+		((__be32 *) inbox->buf)[2] = agg_cap_mask;
+	} else {
+		((u8 *) inbox->buf)[3]     = !!reset_qkey_viols;
+		((__be32 *) inbox->buf)[1] = agg_cap_mask;
+	}
+
+	err = mlx4_cmd(dev, inbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	if (err)
+		priv->mfunc.master.slave_state[slave].ib_cap_mask[port] =
+			slave_cap_mask;
+	return err;
+}
+
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd)
+{
+	return mlx4_common_set_port(dev, slave, vhcr->in_modifier,
+				    vhcr->op_modifier, inbox);
+}
+
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
 {
 	struct mlx4_cmd_mailbox *mailbox;
@@ -535,3 +782,122 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
 }
+
+static int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_general_context *context;
+	int err;
+	u32 in_mod;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	context = mailbox->buf;
+	memset(context, 0, sizeof *context);
+
+	context->flags = SET_PORT_GEN_ALL_VALID;
+	context->mtu = cpu_to_be16(mtu);
+	context->pptx = (pptx * (!pfctx)) << 7;
+	context->pfctx = pfctx;
+	context->pprx = (pprx * (!pfcrx)) << 7;
+	context->pfcrx = pfcrx;
+
+	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B,  MLX4_CMD_WRAPPED);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_general);
+
+static int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+			   u8 promisc)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_rqp_calc_context *context;
+	int err;
+	u32 in_mod;
+	u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ?
+		MCAST_DIRECT : MCAST_DEFAULT;
+
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER  &&
+	    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)
+		return 0;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	context = mailbox->buf;
+	memset(context, 0, sizeof *context);
+
+	context->base_qpn = cpu_to_be32(base_qpn);
+	context->n_mac = dev->caps.log_num_macs;
+	context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
+				       base_qpn);
+	context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
+				     base_qpn);
+	context->intra_no_vlan = 0;
+	context->no_vlan = MLX4_NO_VLAN_IDX;
+	context->intra_vlan_miss = 0;
+	context->vlan_miss = MLX4_VLAN_MISS_IDX;
+
+	in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B,  MLX4_CMD_WRAPPED);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc);
+
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+				struct mlx4_vhcr *vhcr,
+				struct mlx4_cmd_mailbox *inbox,
+				struct mlx4_cmd_mailbox *outbox,
+				struct mlx4_cmd_info *cmd)
+{
+	int err = 0;
+
+	return err;
+}
+
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
+			u64 mac, u64 clear, u8 mode)
+{
+	return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
+			MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
+			MLX4_CMD_WRAPPED);
+}
+EXPORT_SYMBOL(mlx4_SET_MCAST_FLTR);
+
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+			       struct mlx4_vhcr *vhcr,
+			       struct mlx4_cmd_mailbox *inbox,
+			       struct mlx4_cmd_mailbox *outbox,
+			       struct mlx4_cmd_info *cmd)
+{
+	int err = 0;
+
+	return err;
+}
+
+int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave,
+			       u32 in_mod, struct mlx4_cmd_mailbox *outbox)
+{
+	return mlx4_cmd_box(dev, 0, outbox->dma, in_mod, 0,
+			    MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
+			    MLX4_CMD_NATIVE);
+}
+
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+				struct mlx4_vhcr *vhcr,
+				struct mlx4_cmd_mailbox *inbox,
+				struct mlx4_cmd_mailbox *outbox,
+				struct mlx4_cmd_info *cmd)
+{
+	return mlx4_common_dump_eth_stats(dev, slave,
+					  vhcr->in_modifier, outbox);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 59fc35ee66ad..0d99f57f9c8c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -223,17 +223,6 @@ static const char *ResourceType(enum mlx4_resource rt)
 	};
 }
 
-/* dummy procedures */
-int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
-{
-	return 0;
-}
-
-void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
-{
-}
-/* end dummies */
-
 int mlx4_init_resource_tracker(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1271,6 +1260,12 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 	return err;
 }
 
+static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+			 u64 in_param, u64 *out_param)
+{
+	return 0;
+}
+
 int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
 			   struct mlx4_vhcr *vhcr,
 			   struct mlx4_cmd_mailbox *inbox,
@@ -1311,6 +1306,11 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
 				    vhcr->in_param, &vhcr->out_param);
 		break;
 
+	case RES_VLAN:
+		err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop,
+				    vhcr->in_param, &vhcr->out_param);
+		break;
+
 	default:
 		err = -EINVAL;
 		break;
@@ -1487,6 +1487,12 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 
 }
 
+static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+			    u64 in_param, u64 *out_param)
+{
+	return 0;
+}
+
 int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
 			  struct mlx4_vhcr *vhcr,
 			  struct mlx4_cmd_mailbox *inbox,
@@ -1527,6 +1533,11 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
 				   vhcr->in_param, &vhcr->out_param);
 		break;
 
+	case RES_VLAN:
+		err = vlan_free_res(dev, slave, vhcr->op_modifier, alop,
+				   vhcr->in_param, &vhcr->out_param);
+		break;
+
 	default:
 		break;
 	}
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e4be34a908a7..3ef73b05e24e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -599,6 +599,10 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm
 int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
 
+int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			int block_mcast_loopback, enum mlx4_protocol prot);
+int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			enum mlx4_protocol prot);
 int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  int block_mcast_loopback, enum mlx4_protocol protocol);
 int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
@@ -609,9 +613,11 @@ int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
 int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
 int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
 
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap);
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int qpn);
-int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac, u8 wrap);
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
+int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn);
+void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn);
 
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-- 
cgit v1.2.3


From 2b8fb2867ca2736a715a88067fd0ec2904777cbe Mon Sep 17 00:00:00 2001
From: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:16:56 +0000
Subject: mlx4_core: mtts resources units changed to offset

In the previous implementation mtts are managed by:
1. order     - log(mtt segments), 'mtt segment' groups several mtts together.
2. first_seg - segment location relative to mtt table.
In the current implementation:
1. order     - log(mtts) rather than segments
2. offset    - mtt index in mtt table

Note: The actual mtt allocation is made in segments but it is
      transparent to callers.

Rational: The mtt resource holders are not interested on how the allocation
          of mtt is done, but rather on how they will use it.

Signed-off-by: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Reviewed-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c            |   2 +-
 drivers/net/ethernet/mellanox/mlx4/main.c          |  11 +--
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |   3 +-
 drivers/net/ethernet/mellanox/mlx4/mr.c            | 107 +++++++++++----------
 drivers/net/ethernet/mellanox/mlx4/profile.c       |   4 +-
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  59 +++++-------
 include/linux/mlx4/device.h                        |   5 +-
 7 files changed, 92 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 49bb2ead805a..99415fec9fdb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -209,7 +209,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		size = dev->caps.num_mpts;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
 
-		size = dev->caps.num_mtt_segs * dev->caps.mtts_per_seg;
+		size = dev->caps.num_mtts;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
 
 		size = dev->caps.num_mgms + dev->caps.num_amgms;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 8be56326b04a..19363b618295 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -112,7 +112,7 @@ module_param_named(use_prio, use_prio, bool, 0444);
 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
 		  "(0/1, default 0)");
 
-static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
+int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
 
@@ -222,9 +222,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
-	dev->caps.mtts_per_seg	     = 1 << log_mtts_per_seg;
-	dev->caps.reserved_mtts	     = DIV_ROUND_UP(dev_cap->reserved_mtts,
-						    dev->caps.mtts_per_seg);
+	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
 	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
@@ -232,7 +230,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 					dev_cap->reserved_xrcds : 0;
 	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 					dev_cap->max_xrcds : 0;
-	dev->caps.mtt_entry_sz	     = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
+	dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
+
 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
 	dev->caps.flags		     = dev_cap->flags;
@@ -569,7 +568,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
 				  init_hca->mtt_base,
 				  dev->caps.mtt_entry_sz,
-				  dev->caps.num_mtt_segs,
+				  dev->caps.num_mtts,
 				  dev->caps.reserved_mtts, 1, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index abf65d8af48d..879f825c6f6a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -191,6 +191,7 @@ do {									\
 	dev_warn(&mdev->pdev->dev, format, ##arg)
 
 extern int mlx4_log_num_mgm_entry_size;
+extern int log_mtts_per_seg;
 
 #define MLX4_MAX_NUM_SLAVES	(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF)
 #define ALL_SLAVES 0xff
@@ -240,7 +241,7 @@ struct mlx4_mpt_entry {
 	__be32 win_cnt;
 	u8	reserved1[3];
 	u8	mtt_rep;
-	__be64 mtt_seg;
+	__be64 mtt_addr;
 	__be32 mtt_sz;
 	__be32 entity_size;
 	__be32 first_byte_offset;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index f8fd0a1d73af..f7243b26bdf5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -166,18 +166,24 @@ u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
 {
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 	u32 seg;
+	int seg_order;
+	u32 offset;
 
-	seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, order);
+	seg_order = max_t(int, order - log_mtts_per_seg, 0);
+
+	seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order);
 	if (seg == -1)
 		return -1;
 
-	if (mlx4_table_get_range(dev, &mr_table->mtt_table, seg,
-				 seg + (1 << order) - 1)) {
-		mlx4_buddy_free(&mr_table->mtt_buddy, seg, order);
+	offset = seg * (1 << log_mtts_per_seg);
+
+	if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset,
+				 offset + (1 << order) - 1)) {
+		mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order);
 		return -1;
 	}
 
-	return seg;
+	return offset;
 }
 
 static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
@@ -212,45 +218,49 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
 	} else
 		mtt->page_shift = page_shift;
 
-	for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1)
+	for (mtt->order = 0, i = 1; i < npages; i <<= 1)
 		++mtt->order;
 
-	mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order);
-	if (mtt->first_seg == -1)
+	mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order);
+	if (mtt->offset == -1)
 		return -ENOMEM;
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_mtt_init);
 
-void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg,
-				  int order)
+void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
 {
+	u32 first_seg;
+	int seg_order;
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 
-	mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, order);
+	seg_order = max_t(int, order - log_mtts_per_seg, 0);
+	first_seg = offset / (1 << log_mtts_per_seg);
+
+	mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order);
 	mlx4_table_put_range(dev, &mr_table->mtt_table, first_seg,
-				     first_seg + (1 << order) - 1);
+			     first_seg + (1 << seg_order) - 1);
 }
 
-static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order)
+static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
 {
 	u64 in_param;
 	int err;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&in_param, first_seg);
+		set_param_l(&in_param, offset);
 		set_param_h(&in_param, order);
 		err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP,
 						       MLX4_CMD_FREE_RES,
 						       MLX4_CMD_TIME_CLASS_A,
 						       MLX4_CMD_WRAPPED);
 		if (err)
-			mlx4_warn(dev, "Failed to free mtt range at:%d"
-				  " order:%d\n", first_seg, order);
+			mlx4_warn(dev, "Failed to free mtt range at:"
+				  "%d order:%d\n", offset, order);
 		return;
 	}
-	__mlx4_free_mtt_range(dev, first_seg, order);
+	 __mlx4_free_mtt_range(dev, offset, order);
 }
 
 void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
@@ -258,13 +268,13 @@ void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
 	if (mtt->order < 0)
 		return;
 
-	mlx4_free_mtt_range(dev, mtt->first_seg, mtt->order);
+	mlx4_free_mtt_range(dev, mtt->offset, mtt->order);
 }
 EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
 
 u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
 {
-	return (u64) mtt->first_seg * dev->caps.mtt_entry_sz;
+	return (u64) mtt->offset * dev->caps.mtt_entry_sz;
 }
 EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
 
@@ -504,9 +514,10 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 
 	if (mr->mtt.order < 0) {
 		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
-		mpt_entry->mtt_seg = 0;
+		mpt_entry->mtt_addr = 0;
 	} else {
-		mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
+		mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+						  &mr->mtt));
 	}
 
 	if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
@@ -514,8 +525,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
 		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
 						   MLX4_MPT_PD_FLAG_RAE);
-		mpt_entry->mtt_sz    = cpu_to_be32((1 << mr->mtt.order) *
-						   dev->caps.mtts_per_seg);
+		mpt_entry->mtt_sz    = cpu_to_be32(1 << mr->mtt.order);
 	} else {
 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
 	}
@@ -548,18 +558,10 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 	__be64 *mtts;
 	dma_addr_t dma_handle;
 	int i;
-	int s = start_index * sizeof (u64);
-
-	/* All MTTs must fit in the same page */
-	if (start_index / (PAGE_SIZE / sizeof (u64)) !=
-	    (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
-		return -EINVAL;
 
-	if (start_index & (dev->caps.mtts_per_seg - 1))
-		return -EINVAL;
+	mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset +
+			       start_index, &dma_handle);
 
-	mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
-				s / dev->caps.mtt_entry_sz, &dma_handle);
 	if (!mtts)
 		return -ENOMEM;
 
@@ -580,15 +582,25 @@ int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 {
 	int err = 0;
 	int chunk;
+	int mtts_per_page;
+	int max_mtts_first_page;
+
+	/* compute how may mtts fit in the first page */
+	mtts_per_page = PAGE_SIZE / sizeof(u64);
+	max_mtts_first_page = mtts_per_page - (mtt->offset + start_index)
+			      % mtts_per_page;
+
+	chunk = min_t(int, max_mtts_first_page, npages);
 
 	while (npages > 0) {
-		chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
 		err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
 		if (err)
 			return err;
 		npages      -= chunk;
 		start_index += chunk;
 		page_list   += chunk;
+
+		chunk = min_t(int, mtts_per_page, npages);
 	}
 	return err;
 }
@@ -612,18 +624,9 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		inbox = mailbox->buf;
 
 		while (npages > 0) {
-			int s = mtt->first_seg * dev->caps.mtts_per_seg +
-				start_index;
-			chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) -
-				      dev->caps.mtts_per_seg, npages);
-			if (s / (PAGE_SIZE / sizeof(u64)) !=
-			    (s + chunk - 1) / (PAGE_SIZE / sizeof(u64)))
-				chunk = PAGE_SIZE / sizeof(u64) -
-					(s % (PAGE_SIZE / sizeof(u64)));
-
-			inbox[0] = cpu_to_be64(mtt->first_seg *
-					       dev->caps.mtts_per_seg +
-					       start_index);
+			chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2,
+				      npages);
+			inbox[0] = cpu_to_be64(mtt->offset + start_index);
 			inbox[1] = 0;
 			for (i = 0; i < chunk; ++i)
 				inbox[i + 2] = cpu_to_be64(page_list[i] |
@@ -690,7 +693,8 @@ int mlx4_init_mr_table(struct mlx4_dev *dev)
 		return err;
 
 	err = mlx4_buddy_init(&mr_table->mtt_buddy,
-			      ilog2(dev->caps.num_mtt_segs));
+			      ilog2(dev->caps.num_mtts /
+			      (1 << log_mtts_per_seg)));
 	if (err)
 		goto err_buddy;
 
@@ -809,7 +813,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
 		   int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	u64 mtt_seg;
+	u64 mtt_offset;
 	int err = -ENOMEM;
 
 	if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
@@ -829,11 +833,12 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
 	if (err)
 		return err;
 
-	mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
+	mtt_offset = fmr->mr.mtt.offset * dev->caps.mtt_entry_sz;
 
 	fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
-				    fmr->mr.mtt.first_seg,
+				    fmr->mr.mtt.offset,
 				    &fmr->dma_handle);
+
 	if (!fmr->mtts) {
 		err = -ENOMEM;
 		goto err_free;
@@ -872,7 +877,7 @@ static int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx,
 		return err;
 
 	fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
-				    fmr->mr.mtt.first_seg,
+				    fmr->mr.mtt.offset,
 				    &fmr->dma_handle);
 	if (!fmr->mtts) {
 		err = -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c
index 771c4605ef86..66f91ca7a7c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/profile.c
+++ b/drivers/net/ethernet/mellanox/mlx4/profile.c
@@ -98,7 +98,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 	profile[MLX4_RES_EQ].size     = dev_cap->eqc_entry_sz;
 	profile[MLX4_RES_DMPT].size   = dev_cap->dmpt_entry_sz;
 	profile[MLX4_RES_CMPT].size   = dev_cap->cmpt_entry_sz;
-	profile[MLX4_RES_MTT].size    = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
+	profile[MLX4_RES_MTT].size    = dev_cap->mtt_entry_sz;
 	profile[MLX4_RES_MCG].size    = mlx4_get_mgm_entry_size(dev);
 
 	profile[MLX4_RES_QP].num      = request->num_qp;
@@ -210,7 +210,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 			init_hca->cmpt_base	 = profile[i].start;
 			break;
 		case MLX4_RES_MTT:
-			dev->caps.num_mtt_segs	 = profile[i].num;
+			dev->caps.num_mtts	 = profile[i].num;
 			priv->mr_table.mtt_base	 = profile[i].start;
 			init_hca->mtt_base	 = profile[i].start;
 			break;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0d99f57f9c8c..bdd61c35d044 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1550,9 +1550,9 @@ static int mr_phys_mpt(struct mlx4_mpt_entry *mpt)
 	return (be32_to_cpu(mpt->flags) >> 9) & 1;
 }
 
-static int mr_get_mtt_seg(struct mlx4_mpt_entry *mpt)
+static int mr_get_mtt_addr(struct mlx4_mpt_entry *mpt)
 {
-	return (int)be64_to_cpu(mpt->mtt_seg) & 0xfffffff8;
+	return (int)be64_to_cpu(mpt->mtt_addr) & 0xfffffff8;
 }
 
 static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt)
@@ -1565,12 +1565,12 @@ static int mr_get_pdn(struct mlx4_mpt_entry *mpt)
 	return be32_to_cpu(mpt->pd_flags) & 0xffffff;
 }
 
-static int qp_get_mtt_seg(struct mlx4_qp_context *qpc)
+static int qp_get_mtt_addr(struct mlx4_qp_context *qpc)
 {
 	return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8;
 }
 
-static int srq_get_mtt_seg(struct mlx4_srq_context *srqc)
+static int srq_get_mtt_addr(struct mlx4_srq_context *srqc)
 {
 	return be32_to_cpu(srqc->mtt_base_addr_l) & 0xfffffff8;
 }
@@ -1614,8 +1614,8 @@ static int pdn2slave(int pdn)
 static int check_mtt_range(struct mlx4_dev *dev, int slave, int start,
 			   int size, struct res_mtt *mtt)
 {
-	int res_start = mtt->com.res_id * dev->caps.mtts_per_seg;
-	int res_size = (1 << mtt->order) * dev->caps.mtts_per_seg;
+	int res_start = mtt->com.res_id;
+	int res_size = (1 << mtt->order);
 
 	if (start < res_start || start + size > res_start + res_size)
 		return -EPERM;
@@ -1632,8 +1632,7 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
 	int index = vhcr->in_modifier;
 	struct res_mtt *mtt;
 	struct res_mpt *mpt;
-	int mtt_base = (mr_get_mtt_seg(inbox->buf) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz;
 	int phys;
 	int id;
 
@@ -1644,8 +1643,7 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
 
 	phys = mr_phys_mpt(inbox->buf);
 	if (!phys) {
-		err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg,
-			      RES_MTT, &mtt);
+		err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 		if (err)
 			goto ex_abort;
 
@@ -1769,8 +1767,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
 	struct res_mtt *mtt;
 	struct res_qp *qp;
 	struct mlx4_qp_context *qpc = inbox->buf + 8;
-	int mtt_base = (qp_get_mtt_seg(qpc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = qp_get_mtt_addr(qpc) / dev->caps.mtt_entry_sz;
 	int mtt_size = qp_get_mtt_size(qpc);
 	struct res_cq *rcq;
 	struct res_cq *scq;
@@ -1786,8 +1783,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
 		return err;
 	qp->local_qpn = local_qpn;
 
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT,
-		      &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto ex_abort;
 
@@ -1836,7 +1832,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
 		qp->srq = srq;
 	}
 	put_res(dev, slave, rcqn, RES_CQ);
-	put_res(dev, slave, mtt_base  / dev->caps.mtts_per_seg, RES_MTT);
+	put_res(dev, slave, mtt_base, RES_MTT);
 	res_end_move(dev, slave, RES_QP, qpn);
 
 	return 0;
@@ -1850,14 +1846,14 @@ ex_put_scq:
 ex_put_rcq:
 	put_res(dev, slave, rcqn, RES_CQ);
 ex_put_mtt:
-	put_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT);
+	put_res(dev, slave, mtt_base, RES_MTT);
 ex_abort:
 	res_abort_move(dev, slave, RES_QP, qpn);
 
 	return err;
 }
 
-static int eq_get_mtt_seg(struct mlx4_eq_context *eqc)
+static int eq_get_mtt_addr(struct mlx4_eq_context *eqc)
 {
 	return be32_to_cpu(eqc->mtt_base_addr_l) & 0xfffffff8;
 }
@@ -1873,7 +1869,7 @@ static int eq_get_mtt_size(struct mlx4_eq_context *eqc)
 	return 1 << (log_eq_size + 5 - page_shift);
 }
 
-static int cq_get_mtt_seg(struct mlx4_cq_context *cqc)
+static int cq_get_mtt_addr(struct mlx4_cq_context *cqc)
 {
 	return be32_to_cpu(cqc->mtt_base_addr_l) & 0xfffffff8;
 }
@@ -1899,8 +1895,7 @@ int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
 	int eqn = vhcr->in_modifier;
 	int res_id = (slave << 8) | eqn;
 	struct mlx4_eq_context *eqc = inbox->buf;
-	int mtt_base = (eq_get_mtt_seg(eqc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = eq_get_mtt_addr(eqc) / dev->caps.mtt_entry_sz;
 	int mtt_size = eq_get_mtt_size(eqc);
 	struct res_eq *eq;
 	struct res_mtt *mtt;
@@ -1912,8 +1907,7 @@ int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
 	if (err)
 		goto out_add;
 
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT,
-		      &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto out_move;
 
@@ -1986,7 +1980,8 @@ int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
 	/* Call the SW implementation of write_mtt:
 	 * - Prepare a dummy mtt struct
 	 * - Translate inbox contents to simple addresses in host endianess */
-	mtt.first_seg = 0;
+	mtt.offset = 0;  /* TBD this is broken but I don't handle it since
+			    we don't really use it */
 	mtt.order = 0;
 	mtt.page_shift = 0;
 	for (i = 0; i < npages; ++i)
@@ -2137,16 +2132,14 @@ int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
 	int err;
 	int cqn = vhcr->in_modifier;
 	struct mlx4_cq_context *cqc = inbox->buf;
-	int mtt_base = (cq_get_mtt_seg(cqc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
 	struct res_cq *cq;
 	struct res_mtt *mtt;
 
 	err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq);
 	if (err)
 		return err;
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT,
-		      &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto out_move;
 	err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt);
@@ -2228,8 +2221,7 @@ static int handle_resize(struct mlx4_dev *dev, int slave,
 	struct res_mtt *orig_mtt;
 	struct res_mtt *mtt;
 	struct mlx4_cq_context *cqc = inbox->buf;
-	int mtt_base = (cq_get_mtt_seg(cqc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
 
 	err = get_res(dev, slave, cq->mtt->com.res_id, RES_MTT, &orig_mtt);
 	if (err)
@@ -2240,8 +2232,7 @@ static int handle_resize(struct mlx4_dev *dev, int slave,
 		goto ex_put;
 	}
 
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT,
-		      &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto ex_put;
 
@@ -2325,8 +2316,7 @@ int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
 	struct res_mtt *mtt;
 	struct res_srq *srq;
 	struct mlx4_srq_context *srqc = inbox->buf;
-	int mtt_base = (srq_get_mtt_seg(srqc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz;
 
 	if (srqn != (be32_to_cpu(srqc->state_logsize_srqn) & 0xffffff))
 		return -EINVAL;
@@ -2334,8 +2324,7 @@ int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
 	err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_HW, &srq);
 	if (err)
 		return err;
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg,
-		      RES_MTT, &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto ex_abort;
 	err = check_mtt_range(dev, slave, mtt_base, srq_get_mtt_size(srqc),
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3ef73b05e24e..65bb466c575f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -272,8 +272,7 @@ struct mlx4_caps {
 	int			num_comp_vectors;
 	int			comp_pool;
 	int			num_mpts;
-	int			num_mtt_segs;
-	int			mtts_per_seg;
+	int			num_mtts;
 	int			fmr_reserved_mtts;
 	int			reserved_mtts;
 	int			reserved_mrws;
@@ -323,7 +322,7 @@ struct mlx4_buf {
 };
 
 struct mlx4_mtt {
-	u32			first_seg;
+	u32			offset;
 	int			order;
 	int			page_shift;
 };
-- 
cgit v1.2.3


From ab9c17a009ee8eb8c667f22dc0be0709effceab9 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:18:30 +0000
Subject: mlx4_core: Modify driver initialization flow to accommodate SRIOV for
 Ethernet

1. Added module parameters sr_iov and probe_vf for controlling enablement of
   SRIOV mode.
2. Increased default max num-qps, num-mpts and log_num_macs to accomodate
   SRIOV mode
3. Added port_type_array as a module parameter to allow driver startup with
   ports configured as desired.
   In SRIOV mode, only ETH is supported, and this array is ignored; otherwise,
   for the case where the FW supports both port types (ETH and IB), the
   port_type_array parameter is used.
   By default, the port_type_array is set to configure both ports as IB.
4. When running in sriov mode, the master needs to initialize the ICM eq table
   to hold the eq's for itself and also for all the slaves.
5. mlx4_set_port_mask() now invoked from mlx4_init_hca, instead of in mlx4_dev_cap.
6. Introduced sriov VF (slave) device startup/teardown logic (mainly procedures
   mlx4_init_slave, mlx4_slave_exit, mlx4_slave_cap, mlx4_slave_exit and flow
   modifications in __mlx4_init_one, mlx4_init_hca, and mlx4_setup_hca).
   VFs obtain their startup information from the PF (master) device via the
   comm channel.
7. In SRIOV mode (both PF and VF), MSI_X must be enabled, or the driver
   aborts loading the device.
8. Do not allow setting port type via sysfs when running in SRIOV mode.
9. mlx4_get_ownership:  Currently, only one PF is supported by the driver.
   If the HCA is burned with FW which enables more than one PF, only one
   of the PFs is allowed to run.  The first one up grabs a FW ownership
   semaphone -- all other PFs will find that semaphore taken, and the
   driver will not allow them to run.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: Liran Liss <liranl@mellanox.co.il>
Signed-off-by: Marcel Apfelbaum <marcela@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c  | 170 +++++-
 drivers/net/ethernet/mellanox/mlx4/fw.c   |  68 ++-
 drivers/net/ethernet/mellanox/mlx4/fw.h   |   2 +
 drivers/net/ethernet/mellanox/mlx4/main.c | 842 ++++++++++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |   6 +
 include/linux/mlx4/cmd.h                  |   2 +
 include/linux/mlx4/device.h               |   1 +
 7 files changed, 934 insertions(+), 157 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 8e6e4b20b0e2..c4fef839168c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -257,7 +257,7 @@ out:
 	return err;
 }
 
-static int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
 		  unsigned long timeout)
 {
 	if (mlx4_priv(dev)->cmd.use_events)
@@ -1390,6 +1390,153 @@ void mlx4_master_comm_channel(struct work_struct *work)
 		mlx4_warn(dev, "Failed to arm comm channel events\n");
 }
 
+static int sync_toggles(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int wr_toggle;
+	int rd_toggle;
+	unsigned long end;
+
+	wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31;
+	end = jiffies + msecs_to_jiffies(5000);
+
+	while (time_before(jiffies, end)) {
+		rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31;
+		if (rd_toggle == wr_toggle) {
+			priv->cmd.comm_toggle = rd_toggle;
+			return 0;
+		}
+
+		cond_resched();
+	}
+
+	/*
+	 * we could reach here if for example the previous VM using this
+	 * function misbehaved and left the channel with unsynced state. We
+	 * should fix this here and give this VM a chance to use a properly
+	 * synced channel
+	 */
+	mlx4_warn(dev, "recovering from previously mis-behaved VM\n");
+	__raw_writel((__force u32) 0, &priv->mfunc.comm->slave_read);
+	__raw_writel((__force u32) 0, &priv->mfunc.comm->slave_write);
+	priv->cmd.comm_toggle = 0;
+
+	return 0;
+}
+
+int mlx4_multi_func_init(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_slave_state *s_state;
+	int i, err, port;
+
+	priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
+					    &priv->mfunc.vhcr_dma,
+					    GFP_KERNEL);
+	if (!priv->mfunc.vhcr) {
+		mlx4_err(dev, "Couldn't allocate vhcr.\n");
+		return -ENOMEM;
+	}
+
+	if (mlx4_is_master(dev))
+		priv->mfunc.comm =
+		ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) +
+			priv->fw.comm_base, MLX4_COMM_PAGESIZE);
+	else
+		priv->mfunc.comm =
+		ioremap(pci_resource_start(dev->pdev, 2) +
+			MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE);
+	if (!priv->mfunc.comm) {
+		mlx4_err(dev, "Couldn't map communication vector.\n");
+		goto err_vhcr;
+	}
+
+	if (mlx4_is_master(dev)) {
+		priv->mfunc.master.slave_state =
+			kzalloc(dev->num_slaves *
+				sizeof(struct mlx4_slave_state), GFP_KERNEL);
+		if (!priv->mfunc.master.slave_state)
+			goto err_comm;
+
+		for (i = 0; i < dev->num_slaves; ++i) {
+			s_state = &priv->mfunc.master.slave_state[i];
+			s_state->last_cmd = MLX4_COMM_CMD_RESET;
+			__raw_writel((__force u32) 0,
+				     &priv->mfunc.comm[i].slave_write);
+			__raw_writel((__force u32) 0,
+				     &priv->mfunc.comm[i].slave_read);
+			mmiowb();
+			for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+				s_state->vlan_filter[port] =
+					kzalloc(sizeof(struct mlx4_vlan_fltr),
+						GFP_KERNEL);
+				if (!s_state->vlan_filter[port]) {
+					if (--port)
+						kfree(s_state->vlan_filter[port]);
+					goto err_slaves;
+				}
+				INIT_LIST_HEAD(&s_state->mcast_filters[port]);
+			}
+			spin_lock_init(&s_state->lock);
+		}
+
+		memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe));
+		priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
+		INIT_WORK(&priv->mfunc.master.comm_work,
+			  mlx4_master_comm_channel);
+		INIT_WORK(&priv->mfunc.master.slave_event_work,
+			  mlx4_gen_slave_eqe);
+		INIT_WORK(&priv->mfunc.master.slave_flr_event_work,
+			  mlx4_master_handle_slave_flr);
+		spin_lock_init(&priv->mfunc.master.slave_state_lock);
+		priv->mfunc.master.comm_wq =
+			create_singlethread_workqueue("mlx4_comm");
+		if (!priv->mfunc.master.comm_wq)
+			goto err_slaves;
+
+		if (mlx4_init_resource_tracker(dev))
+			goto err_thread;
+
+		sema_init(&priv->cmd.slave_sem, 1);
+		err = mlx4_ARM_COMM_CHANNEL(dev);
+		if (err) {
+			mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
+				 err);
+			goto err_resource;
+		}
+
+	} else {
+		err = sync_toggles(dev);
+		if (err) {
+			mlx4_err(dev, "Couldn't sync toggles\n");
+			goto err_comm;
+		}
+
+		sema_init(&priv->cmd.slave_sem, 1);
+	}
+	return 0;
+
+err_resource:
+	mlx4_free_resource_tracker(dev);
+err_thread:
+	flush_workqueue(priv->mfunc.master.comm_wq);
+	destroy_workqueue(priv->mfunc.master.comm_wq);
+err_slaves:
+	while (--i) {
+		for (port = 1; port <= MLX4_MAX_PORTS; port++)
+			kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+	}
+	kfree(priv->mfunc.master.slave_state);
+err_comm:
+	iounmap(priv->mfunc.comm);
+err_vhcr:
+	dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+					     priv->mfunc.vhcr,
+					     priv->mfunc.vhcr_dma);
+	priv->mfunc.vhcr = NULL;
+	return -ENOMEM;
+}
+
 int mlx4_cmd_init(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1425,6 +1572,27 @@ err_hcr:
 	return -ENOMEM;
 }
 
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int i, port;
+
+	if (mlx4_is_master(dev)) {
+		flush_workqueue(priv->mfunc.master.comm_wq);
+		destroy_workqueue(priv->mfunc.master.comm_wq);
+		for (i = 0; i < dev->num_slaves; i++) {
+			for (port = 1; port <= MLX4_MAX_PORTS; port++)
+				kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+		}
+		kfree(priv->mfunc.master.slave_state);
+		iounmap(priv->mfunc.comm);
+		dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+						     priv->mfunc.vhcr,
+						     priv->mfunc.vhcr_dma);
+		priv->mfunc.vhcr = NULL;
+	}
+}
+
 void mlx4_cmd_cleanup(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 99415fec9fdb..f03b54e0aa53 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -1071,7 +1071,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 
 	/* UAR attributes */
 
-	MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
+	MLX4_PUT(inbox, param->uar_page_sz,	INIT_HCA_UAR_PAGE_SZ_OFFSET);
 	MLX4_PUT(inbox, param->log_uar_sz,      INIT_HCA_LOG_UAR_SZ_OFFSET);
 
 	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000,
@@ -1084,6 +1084,72 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	return err;
 }
 
+int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+		   struct mlx4_init_hca_param *param)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	__be32 *outbox;
+	int err;
+
+#define QUERY_HCA_GLOBAL_CAPS_OFFSET	0x04
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
+
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0,
+			   MLX4_CMD_QUERY_HCA,
+			   MLX4_CMD_TIME_CLASS_B,
+			   !mlx4_is_slave(dev));
+	if (err)
+		goto out;
+
+	MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET);
+
+	/* QPC/EEC/CQC/EQC/RDMARC attributes */
+
+	MLX4_GET(param->qpc_base,      outbox, INIT_HCA_QPC_BASE_OFFSET);
+	MLX4_GET(param->log_num_qps,   outbox, INIT_HCA_LOG_QP_OFFSET);
+	MLX4_GET(param->srqc_base,     outbox, INIT_HCA_SRQC_BASE_OFFSET);
+	MLX4_GET(param->log_num_srqs,  outbox, INIT_HCA_LOG_SRQ_OFFSET);
+	MLX4_GET(param->cqc_base,      outbox, INIT_HCA_CQC_BASE_OFFSET);
+	MLX4_GET(param->log_num_cqs,   outbox, INIT_HCA_LOG_CQ_OFFSET);
+	MLX4_GET(param->altc_base,     outbox, INIT_HCA_ALTC_BASE_OFFSET);
+	MLX4_GET(param->auxc_base,     outbox, INIT_HCA_AUXC_BASE_OFFSET);
+	MLX4_GET(param->eqc_base,      outbox, INIT_HCA_EQC_BASE_OFFSET);
+	MLX4_GET(param->log_num_eqs,   outbox, INIT_HCA_LOG_EQ_OFFSET);
+	MLX4_GET(param->rdmarc_base,   outbox, INIT_HCA_RDMARC_BASE_OFFSET);
+	MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
+
+	/* multicast attributes */
+
+	MLX4_GET(param->mc_base,         outbox, INIT_HCA_MC_BASE_OFFSET);
+	MLX4_GET(param->log_mc_entry_sz, outbox,
+		 INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+	MLX4_GET(param->log_mc_hash_sz,  outbox,
+		 INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+	MLX4_GET(param->log_mc_table_sz, outbox,
+		 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+
+	/* TPT attributes */
+
+	MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);
+	MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
+	MLX4_GET(param->mtt_base,   outbox, INIT_HCA_MTT_BASE_OFFSET);
+	MLX4_GET(param->cmpt_base,  outbox, INIT_HCA_CMPT_BASE_OFFSET);
+
+	/* UAR attributes */
+
+	MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+	MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
+
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+	return err;
+}
+
 int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
 			   struct mlx4_vhcr *vhcr,
 			   struct mlx4_cmd_mailbox *inbox,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 8f0f4cf7d2c0..3368363a8ec5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -161,6 +161,7 @@ struct mlx4_init_hca_param {
 	u8  log_mc_table_sz;
 	u8  log_mpt_sz;
 	u8  log_uar_sz;
+	u8  uar_page_sz; /* log pg sz in 4k chunks */
 };
 
 struct mlx4_init_ib_param {
@@ -197,6 +198,7 @@ int mlx4_RUN_FW(struct mlx4_dev *dev);
 int mlx4_QUERY_FW(struct mlx4_dev *dev);
 int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter);
 int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
+int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
 int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic);
 int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt);
 int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 19363b618295..b969bfb569e3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -40,6 +40,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/io-mapping.h>
+#include <linux/delay.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
@@ -75,6 +76,14 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
 
 #endif /* CONFIG_PCI_MSI */
 
+static int num_vfs;
+module_param(num_vfs, int, 0444);
+MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0");
+
+static int probe_vf;
+module_param(probe_vf, int, 0644);
+MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)");
+
 int mlx4_log_num_mgm_entry_size = 10;
 module_param_named(log_num_mgm_entry_size,
 			mlx4_log_num_mgm_entry_size, int, 0444);
@@ -83,21 +92,26 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
 					 " 10 gives 248.range: 9<="
 					 " log_num_mgm_entry_size <= 12");
 
+#define MLX4_VF                                        (1 << 0)
+
+#define HCA_GLOBAL_CAP_MASK            0
+#define PF_CONTEXT_BEHAVIOUR_MASK      0
+
 static char mlx4_version[] __devinitdata =
 	DRV_NAME ": Mellanox ConnectX core driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
 static struct mlx4_profile default_profile = {
-	.num_qp		= 1 << 17,
+	.num_qp		= 1 << 18,
 	.num_srq	= 1 << 16,
 	.rdmarc_per_qp	= 1 << 4,
 	.num_cq		= 1 << 16,
 	.num_mcg	= 1 << 13,
-	.num_mpt	= 1 << 17,
+	.num_mpt	= 1 << 19,
 	.num_mtt	= 1 << 20,
 };
 
-static int log_num_mac = 2;
+static int log_num_mac = 7;
 module_param_named(log_num_mac, log_num_mac, int, 0444);
 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
 
@@ -116,6 +130,23 @@ int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
 
+static int port_type_array[2] = {1, 1};
+static int arr_argc = 2;
+module_param_array(port_type_array, int, &arr_argc, 0444);
+MODULE_PARM_DESC(port_type_array, "Array of port types: IB by default");
+
+struct mlx4_port_config {
+	struct list_head list;
+	enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
+	struct pci_dev *pdev;
+};
+
+static inline int mlx4_master_get_num_eqs(struct mlx4_dev *dev)
+{
+	return dev->caps.reserved_eqs +
+		MLX4_MFUNC_EQ_NUM * (dev->num_slaves + 1);
+}
+
 int mlx4_check_port_params(struct mlx4_dev *dev,
 			   enum mlx4_port_type *port_type)
 {
@@ -200,6 +231,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.trans_code[i]     = dev_cap->trans_code[i];
 	}
 
+	dev->caps.uar_page_size	     = PAGE_SIZE;
 	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
 	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
 	dev->caps.bf_reg_size	     = dev_cap->bf_reg_size;
@@ -224,7 +256,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
 	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
-	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
+
+	/* The first 128 UARs are used for EQ doorbells */
+	dev->caps.reserved_uars	     = max_t(int, 128, dev_cap->reserved_uars);
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
 	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 					dev_cap->reserved_xrcds : 0;
@@ -245,10 +279,36 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.log_num_prios = use_prio ? 3 : 0;
 
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
-		if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH)
-			dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
-		else
-			dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
+		if (dev->caps.supported_type[i]) {
+			/* if only ETH is supported - assign ETH */
+			if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
+				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+			/* if only IB is supported,
+			 * assign IB only if SRIOV is off*/
+			else if (dev->caps.supported_type[i] ==
+				 MLX4_PORT_TYPE_IB) {
+				if (dev->flags & MLX4_FLAG_SRIOV)
+					dev->caps.port_type[i] =
+						MLX4_PORT_TYPE_NONE;
+				else
+					dev->caps.port_type[i] =
+						MLX4_PORT_TYPE_IB;
+			/* if IB and ETH are supported,
+			 * first of all check if SRIOV is on */
+			} else if (dev->flags & MLX4_FLAG_SRIOV)
+				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+			/* if IB and ETH are supported and SRIOV is off
+			 * use module parameters */
+			else {
+				if (port_type_array[i-1])
+					dev->caps.port_type[i] =
+						MLX4_PORT_TYPE_IB;
+				else
+					dev->caps.port_type[i] =
+						MLX4_PORT_TYPE_ETH;
+			}
+		}
 		dev->caps.possible_type[i] = dev->caps.port_type[i];
 		mlx4_priv(dev)->sense.sense_allowed[i] =
 			dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
@@ -267,8 +327,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		}
 	}
 
-	mlx4_set_port_mask(dev);
-
 	dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
 
 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
@@ -287,6 +345,149 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 
 	return 0;
 }
+/*The function checks if there are live vf, return the num of them*/
+static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_slave_state *s_state;
+	int i;
+	int ret = 0;
+
+	for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
+		s_state = &priv->mfunc.master.slave_state[i];
+		if (s_state->active && s_state->last_cmd !=
+		    MLX4_COMM_CMD_RESET) {
+			mlx4_warn(dev, "%s: slave: %d is still active\n",
+				  __func__, i);
+			ret++;
+		}
+	}
+	return ret;
+}
+
+static int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_slave_state *s_slave;
+
+	if (!mlx4_is_master(dev))
+		return 0;
+
+	s_slave = &priv->mfunc.master.slave_state[slave];
+	return !!s_slave->active;
+}
+EXPORT_SYMBOL(mlx4_is_slave_active);
+
+static int mlx4_slave_cap(struct mlx4_dev *dev)
+{
+	int			   err;
+	u32			   page_size;
+	struct mlx4_dev_cap	   dev_cap;
+	struct mlx4_func_cap	   func_cap;
+	struct mlx4_init_hca_param hca_param;
+	int			   i;
+
+	memset(&hca_param, 0, sizeof(hca_param));
+	err = mlx4_QUERY_HCA(dev, &hca_param);
+	if (err) {
+		mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
+		return err;
+	}
+
+	/*fail if the hca has an unknown capability */
+	if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
+	    HCA_GLOBAL_CAP_MASK) {
+		mlx4_err(dev, "Unknown hca global capabilities\n");
+		return -ENOSYS;
+	}
+
+	mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
+
+	memset(&dev_cap, 0, sizeof(dev_cap));
+	err = mlx4_dev_cap(dev, &dev_cap);
+	if (err) {
+		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+		return err;
+	}
+
+	page_size = ~dev->caps.page_size_cap + 1;
+	mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
+	if (page_size > PAGE_SIZE) {
+		mlx4_err(dev, "HCA minimum page size of %d bigger than "
+			 "kernel PAGE_SIZE of %ld, aborting.\n",
+			 page_size, PAGE_SIZE);
+		return -ENODEV;
+	}
+
+	/* slave gets uar page size from QUERY_HCA fw command */
+	dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
+
+	/* TODO: relax this assumption */
+	if (dev->caps.uar_page_size != PAGE_SIZE) {
+		mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
+			 dev->caps.uar_page_size, PAGE_SIZE);
+		return -ENODEV;
+	}
+
+	memset(&func_cap, 0, sizeof(func_cap));
+	err = mlx4_QUERY_FUNC_CAP(dev, &func_cap);
+	if (err) {
+		mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n");
+		return err;
+	}
+
+	if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
+	    PF_CONTEXT_BEHAVIOUR_MASK) {
+		mlx4_err(dev, "Unknown pf context behaviour\n");
+		return -ENOSYS;
+	}
+
+	dev->caps.function		= func_cap.function;
+	dev->caps.num_ports		= func_cap.num_ports;
+	dev->caps.num_qps		= func_cap.qp_quota;
+	dev->caps.num_srqs		= func_cap.srq_quota;
+	dev->caps.num_cqs		= func_cap.cq_quota;
+	dev->caps.num_eqs               = func_cap.max_eq;
+	dev->caps.reserved_eqs          = func_cap.reserved_eq;
+	dev->caps.num_mpts		= func_cap.mpt_quota;
+	dev->caps.num_mtts		= func_cap.mtt_quota;
+	dev->caps.num_pds               = MLX4_NUM_PDS;
+	dev->caps.num_mgms              = 0;
+	dev->caps.num_amgms             = 0;
+
+	for (i = 1; i <= dev->caps.num_ports; ++i)
+		dev->caps.port_mask[i] = dev->caps.port_type[i];
+
+	if (dev->caps.num_ports > MLX4_MAX_PORTS) {
+		mlx4_err(dev, "HCA has %d ports, but we only support %d, "
+			 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
+		return -ENODEV;
+	}
+
+	if (dev->caps.uar_page_size * (dev->caps.num_uars -
+				       dev->caps.reserved_uars) >
+				       pci_resource_len(dev->pdev, 2)) {
+		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
+			 "PCI resource 2 size of 0x%llx, aborting.\n",
+			 dev->caps.uar_page_size * dev->caps.num_uars,
+			 (unsigned long long) pci_resource_len(dev->pdev, 2));
+		return -ENODEV;
+	}
+
+#if 0
+	mlx4_warn(dev, "sqp_demux:%d\n", dev->caps.sqp_demux);
+	mlx4_warn(dev, "num_uars:%d reserved_uars:%d uar region:0x%x bar2:0x%llx\n",
+		  dev->caps.num_uars, dev->caps.reserved_uars,
+		  dev->caps.uar_page_size * dev->caps.num_uars,
+		  pci_resource_len(dev->pdev, 2));
+	mlx4_warn(dev, "num_eqs:%d reserved_eqs:%d\n", dev->caps.num_eqs,
+		  dev->caps.reserved_eqs);
+	mlx4_warn(dev, "num_pds:%d reserved_pds:%d slave_pd_shift:%d pd_base:%d\n",
+		  dev->caps.num_pds, dev->caps.reserved_pds,
+		  dev->caps.slave_pd_shift, dev->caps.pd_base);
+#endif
+	return 0;
+}
 
 /*
  * Change the port configuration of the device.
@@ -456,6 +657,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int err;
+	int num_eqs;
 
 	err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
 				  cmpt_base +
@@ -485,12 +687,14 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 	if (err)
 		goto err_srq;
 
+	num_eqs = (mlx4_is_master(dev)) ?
+		roundup_pow_of_two(mlx4_master_get_num_eqs(dev)) :
+		dev->caps.num_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
 				  cmpt_base +
 				  ((u64) (MLX4_CMPT_TYPE_EQ *
 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
-				  cmpt_entry_sz,
-				  dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
+				  cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
 	if (err)
 		goto err_cq;
 
@@ -514,6 +718,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	u64 aux_pages;
+	int num_eqs;
 	int err;
 
 	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
@@ -545,10 +750,13 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 		goto err_unmap_aux;
 	}
 
+
+	num_eqs = (mlx4_is_master(dev)) ?
+		roundup_pow_of_two(mlx4_master_get_num_eqs(dev)) :
+		dev->caps.num_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
-				  dev->caps.num_eqs, dev->caps.num_eqs,
-				  0, 0);
+				  num_eqs, num_eqs, 0, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
 		goto err_unmap_cmpt;
@@ -732,6 +940,16 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
 }
 
+static void mlx4_slave_exit(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	down(&priv->cmd.slave_sem);
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
+		mlx4_warn(dev, "Failed to close slave function.\n");
+	up(&priv->cmd.slave_sem);
+}
+
 static int map_bf_area(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -739,8 +957,10 @@ static int map_bf_area(struct mlx4_dev *dev)
 	resource_size_t bf_len;
 	int err = 0;
 
-	bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT);
-	bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT);
+	bf_start = pci_resource_start(dev->pdev, 2) +
+			(dev->caps.num_uars << PAGE_SHIFT);
+	bf_len = pci_resource_len(dev->pdev, 2) -
+			(dev->caps.num_uars << PAGE_SHIFT);
 	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
 	if (!priv->bf_mapping)
 		err = -ENOMEM;
@@ -757,10 +977,81 @@ static void unmap_bf_area(struct mlx4_dev *dev)
 static void mlx4_close_hca(struct mlx4_dev *dev)
 {
 	unmap_bf_area(dev);
-	mlx4_CLOSE_HCA(dev, 0);
-	mlx4_free_icms(dev);
-	mlx4_UNMAP_FA(dev);
-	mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+	if (mlx4_is_slave(dev))
+		mlx4_slave_exit(dev);
+	else {
+		mlx4_CLOSE_HCA(dev, 0);
+		mlx4_free_icms(dev);
+		mlx4_UNMAP_FA(dev);
+		mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+	}
+}
+
+static int mlx4_init_slave(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	u64 dma = (u64) priv->mfunc.vhcr_dma;
+	int num_of_reset_retries = NUM_OF_RESET_RETRIES;
+	int ret_from_reset = 0;
+	u32 slave_read;
+	u32 cmd_channel_ver;
+
+	down(&priv->cmd.slave_sem);
+	priv->cmd.max_cmds = 1;
+	mlx4_warn(dev, "Sending reset\n");
+	ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
+				       MLX4_COMM_TIME);
+	/* if we are in the middle of flr the slave will try
+	 * NUM_OF_RESET_RETRIES times before leaving.*/
+	if (ret_from_reset) {
+		if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
+			msleep(SLEEP_TIME_IN_RESET);
+			while (ret_from_reset && num_of_reset_retries) {
+				mlx4_warn(dev, "slave is currently in the"
+					  "middle of FLR. retrying..."
+					  "(try num:%d)\n",
+					  (NUM_OF_RESET_RETRIES -
+					   num_of_reset_retries  + 1));
+				ret_from_reset =
+					mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET,
+						      0, MLX4_COMM_TIME);
+				num_of_reset_retries = num_of_reset_retries - 1;
+			}
+		} else
+			goto err;
+	}
+
+	/* check the driver version - the slave I/F revision
+	 * must match the master's */
+	slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+	cmd_channel_ver = mlx4_comm_get_version();
+
+	if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
+		MLX4_COMM_GET_IF_REV(slave_read)) {
+		mlx4_err(dev, "slave driver version is not supported"
+			 " by the master\n");
+		goto err;
+	}
+
+	mlx4_warn(dev, "Sending vhcr0\n");
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
+						    MLX4_COMM_TIME))
+		goto err;
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
+						    MLX4_COMM_TIME))
+		goto err;
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
+						    MLX4_COMM_TIME))
+		goto err;
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
+		goto err;
+	up(&priv->cmd.slave_sem);
+	return 0;
+
+err:
+	mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+	up(&priv->cmd.slave_sem);
+	return -EIO;
 }
 
 static int mlx4_init_hca(struct mlx4_dev *dev)
@@ -774,56 +1065,76 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 	u64 icm_size;
 	int err;
 
-	err = mlx4_QUERY_FW(dev);
-	if (err) {
-		if (err == -EACCES)
-			mlx4_info(dev, "non-primary physical function, skipping.\n");
-		else
-			mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
-		return err;
-	}
+	if (!mlx4_is_slave(dev)) {
+		err = mlx4_QUERY_FW(dev);
+		if (err) {
+			if (err == -EACCES)
+				mlx4_info(dev, "non-primary physical function, skipping.\n");
+			else
+				mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
+			goto unmap_bf;
+		}
 
-	err = mlx4_load_fw(dev);
-	if (err) {
-		mlx4_err(dev, "Failed to start FW, aborting.\n");
-		return err;
-	}
+		err = mlx4_load_fw(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to start FW, aborting.\n");
+			goto unmap_bf;
+		}
 
-	mlx4_cfg.log_pg_sz_m = 1;
-	mlx4_cfg.log_pg_sz = 0;
-	err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
-	if (err)
-		mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
+		mlx4_cfg.log_pg_sz_m = 1;
+		mlx4_cfg.log_pg_sz = 0;
+		err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
+		if (err)
+			mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
 
-	err = mlx4_dev_cap(dev, &dev_cap);
-	if (err) {
-		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
-		goto err_stop_fw;
-	}
+		err = mlx4_dev_cap(dev, &dev_cap);
+		if (err) {
+			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+			goto err_stop_fw;
+		}
 
-	profile = default_profile;
+		profile = default_profile;
 
-	icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
-	if ((long long) icm_size < 0) {
-		err = icm_size;
-		goto err_stop_fw;
-	}
+		icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
+					     &init_hca);
+		if ((long long) icm_size < 0) {
+			err = icm_size;
+			goto err_stop_fw;
+		}
 
-	if (map_bf_area(dev))
-		mlx4_dbg(dev, "Failed to map blue flame area\n");
+		init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
+		init_hca.uar_page_sz = PAGE_SHIFT - 12;
 
-	init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
+		err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
+		if (err)
+			goto err_stop_fw;
 
-	err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
-	if (err)
-		goto err_stop_fw;
+		err = mlx4_INIT_HCA(dev, &init_hca);
+		if (err) {
+			mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
+			goto err_free_icm;
+		}
+	} else {
+		err = mlx4_init_slave(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to initialize slave\n");
+			goto unmap_bf;
+		}
 
-	err = mlx4_INIT_HCA(dev, &init_hca);
-	if (err) {
-		mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
-		goto err_free_icm;
+		err = mlx4_slave_cap(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to obtain slave caps\n");
+			goto err_close;
+		}
 	}
 
+	if (map_bf_area(dev))
+		mlx4_dbg(dev, "Failed to map blue flame area\n");
+
+	/*Only the master set the ports, all the rest got it from it.*/
+	if (!mlx4_is_slave(dev))
+		mlx4_set_port_mask(dev);
+
 	err = mlx4_QUERY_ADAPTER(dev, &adapter);
 	if (err) {
 		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
@@ -836,16 +1147,19 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 	return 0;
 
 err_close:
-	mlx4_CLOSE_HCA(dev, 0);
+	mlx4_close_hca(dev);
 
 err_free_icm:
-	mlx4_free_icms(dev);
+	if (!mlx4_is_slave(dev))
+		mlx4_free_icms(dev);
 
 err_stop_fw:
+	if (!mlx4_is_slave(dev)) {
+		mlx4_UNMAP_FA(dev);
+		mlx4_free_icm(dev, priv->fw.fw_icm, 0);
+	}
+unmap_bf:
 	unmap_bf_area(dev);
-	mlx4_UNMAP_FA(dev);
-	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
-
 	return err;
 }
 
@@ -992,55 +1306,62 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 		goto err_srq_table_free;
 	}
 
-	err = mlx4_init_mcg_table(dev);
-	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "multicast group table, aborting.\n");
-		goto err_qp_table_free;
+	if (!mlx4_is_slave(dev)) {
+		err = mlx4_init_mcg_table(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to initialize "
+				 "multicast group table, aborting.\n");
+			goto err_qp_table_free;
+		}
 	}
 
 	err = mlx4_init_counters_table(dev);
 	if (err && err != -ENOENT) {
 		mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
-		goto err_counters_table_free;
+		goto err_mcg_table_free;
 	}
 
-	for (port = 1; port <= dev->caps.num_ports; port++) {
-		enum mlx4_port_type port_type = 0;
-		mlx4_SENSE_PORT(dev, port, &port_type);
-		if (port_type)
-			dev->caps.port_type[port] = port_type;
-		ib_port_default_caps = 0;
-		err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps);
-		if (err)
-			mlx4_warn(dev, "failed to get port %d default "
-				  "ib capabilities (%d). Continuing with "
-				  "caps = 0\n", port, err);
-		dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
-
-		err = mlx4_check_ext_port_caps(dev, port);
-		if (err)
-			mlx4_warn(dev, "failed to get port %d extended "
-				  "port capabilities support info (%d)."
-				  " Assuming not supported\n", port, err);
+	if (!mlx4_is_slave(dev)) {
+		for (port = 1; port <= dev->caps.num_ports; port++) {
+			if (!mlx4_is_mfunc(dev)) {
+				enum mlx4_port_type port_type = 0;
+				mlx4_SENSE_PORT(dev, port, &port_type);
+				if (port_type)
+					dev->caps.port_type[port] = port_type;
+			}
+			ib_port_default_caps = 0;
+			err = mlx4_get_port_ib_caps(dev, port,
+						    &ib_port_default_caps);
+			if (err)
+				mlx4_warn(dev, "failed to get port %d default "
+					  "ib capabilities (%d). Continuing "
+					  "with caps = 0\n", port, err);
+			dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
+
+			err = mlx4_check_ext_port_caps(dev, port);
+			if (err)
+				mlx4_warn(dev, "failed to get port %d extended "
+					  "port capabilities support info (%d)."
+					  " Assuming not supported\n",
+					  port, err);
 
-		err = mlx4_SET_PORT(dev, port);
-		if (err) {
-			mlx4_err(dev, "Failed to set port %d, aborting\n",
-				port);
-			goto err_mcg_table_free;
+			err = mlx4_SET_PORT(dev, port);
+			if (err) {
+				mlx4_err(dev, "Failed to set port %d, aborting\n",
+					port);
+				goto err_counters_table_free;
+			}
 		}
 	}
-	mlx4_set_port_mask(dev);
 
 	return 0;
 
-err_mcg_table_free:
-	mlx4_cleanup_mcg_table(dev);
-
 err_counters_table_free:
 	mlx4_cleanup_counters_table(dev);
 
+err_mcg_table_free:
+	mlx4_cleanup_mcg_table(dev);
+
 err_qp_table_free:
 	mlx4_cleanup_qp_table(dev);
 
@@ -1087,8 +1408,16 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 	int i;
 
 	if (msi_x) {
-		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
-			     nreq);
+		/* In multifunction mode each function gets 2 msi-X vectors
+		 * one for data path completions anf the other for asynch events
+		 * or command completions */
+		if (mlx4_is_mfunc(dev)) {
+			nreq = 2;
+		} else {
+			nreq = min_t(int, dev->caps.num_eqs -
+				     dev->caps.reserved_eqs, nreq);
+		}
+
 		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
 		if (!entries)
 			goto no_msi;
@@ -1144,16 +1473,24 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 
 	info->dev = dev;
 	info->port = port;
-	mlx4_init_mac_table(dev, &info->mac_table);
-	mlx4_init_vlan_table(dev, &info->vlan_table);
-	info->base_qpn = dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
+	if (!mlx4_is_slave(dev)) {
+		INIT_RADIX_TREE(&info->mac_tree, GFP_KERNEL);
+		mlx4_init_mac_table(dev, &info->mac_table);
+		mlx4_init_vlan_table(dev, &info->vlan_table);
+		info->base_qpn =
+			dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
 			(port - 1) * (1 << log_num_mac);
+	}
 
 	sprintf(info->dev_name, "mlx4_port%d", port);
 	info->port_attr.attr.name = info->dev_name;
-	info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+	if (mlx4_is_mfunc(dev))
+		info->port_attr.attr.mode = S_IRUGO;
+	else {
+		info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+		info->port_attr.store     = set_port_type;
+	}
 	info->port_attr.show      = show_port_type;
-	info->port_attr.store     = set_port_type;
 	sysfs_attr_init(&info->port_attr.attr);
 
 	err = device_create_file(&dev->pdev->dev, &info->port_attr);
@@ -1226,6 +1563,46 @@ static void mlx4_clear_steering(struct mlx4_dev *dev)
 	kfree(priv->steer);
 }
 
+static int extended_func_num(struct pci_dev *pdev)
+{
+	return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
+}
+
+#define MLX4_OWNER_BASE	0x8069c
+#define MLX4_OWNER_SIZE	4
+
+static int mlx4_get_ownership(struct mlx4_dev *dev)
+{
+	void __iomem *owner;
+	u32 ret;
+
+	owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+			MLX4_OWNER_SIZE);
+	if (!owner) {
+		mlx4_err(dev, "Failed to obtain ownership bit\n");
+		return -ENOMEM;
+	}
+
+	ret = readl(owner);
+	iounmap(owner);
+	return (int) !!ret;
+}
+
+static void mlx4_free_ownership(struct mlx4_dev *dev)
+{
+	void __iomem *owner;
+
+	owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+			MLX4_OWNER_SIZE);
+	if (!owner) {
+		mlx4_err(dev, "Failed to obtain ownership bit\n");
+		return;
+	}
+	writel(0, owner);
+	msleep(1000);
+	iounmap(owner);
+}
+
 static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct mlx4_priv *priv;
@@ -1241,13 +1618,20 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			"aborting.\n");
 		return err;
 	}
-
+	if (num_vfs > MLX4_MAX_NUM_VF) {
+		printk(KERN_ERR "There are more VF's (%d) than allowed(%d)\n",
+		       num_vfs, MLX4_MAX_NUM_VF);
+		return -EINVAL;
+	}
 	/*
-	 * Check for BARs.  We expect 0: 1MB
+	 * Check for BARs.
 	 */
-	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
-	    pci_resource_len(pdev, 0) != 1 << 20) {
-		dev_err(&pdev->dev, "Missing DCS, aborting.\n");
+	if (((id == NULL) || !(id->driver_data & MLX4_VF)) &&
+	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Missing DCS, aborting."
+			"(id == 0X%p, id->driver_data: 0x%lx,"
+			" pci_resource_flags(pdev, 0):0x%lx)\n", id,
+			id ? id->driver_data : 0, pci_resource_flags(pdev, 0));
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
@@ -1311,42 +1695,132 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	mutex_init(&priv->bf_mutex);
 
 	dev->rev_id = pdev->revision;
+	/* Detect if this device is a virtual function */
+	if (id && id->driver_data & MLX4_VF) {
+		/* When acting as pf, we normally skip vfs unless explicitly
+		 * requested to probe them. */
+		if (num_vfs && extended_func_num(pdev) > probe_vf) {
+			mlx4_warn(dev, "Skipping virtual function:%d\n",
+						extended_func_num(pdev));
+			err = -ENODEV;
+			goto err_free_dev;
+		}
+		mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
+		dev->flags |= MLX4_FLAG_SLAVE;
+	} else {
+		/* We reset the device and enable SRIOV only for physical
+		 * devices.  Try to claim ownership on the device;
+		 * if already taken, skip -- do not allow multiple PFs */
+		err = mlx4_get_ownership(dev);
+		if (err) {
+			if (err < 0)
+				goto err_free_dev;
+			else {
+				mlx4_warn(dev, "Multiple PFs not yet supported."
+					  " Skipping PF.\n");
+				err = -EINVAL;
+				goto err_free_dev;
+			}
+		}
 
-	/*
-	 * Now reset the HCA before we touch the PCI capabilities or
-	 * attempt a firmware command, since a boot ROM may have left
-	 * the HCA in an undefined state.
-	 */
-	err = mlx4_reset(dev);
-	if (err) {
-		mlx4_err(dev, "Failed to reset HCA, aborting.\n");
-		goto err_free_dev;
+		if (num_vfs) {
+			mlx4_warn(dev, "Enabling sriov with:%d vfs\n", num_vfs);
+			err = pci_enable_sriov(pdev, num_vfs);
+			if (err) {
+				mlx4_err(dev, "Failed to enable sriov,"
+					 "continuing without sriov enabled"
+					 " (err = %d).\n", err);
+				num_vfs = 0;
+				err = 0;
+			} else {
+				mlx4_warn(dev, "Running in master mode\n");
+				dev->flags |= MLX4_FLAG_SRIOV |
+					      MLX4_FLAG_MASTER;
+				dev->num_vfs = num_vfs;
+			}
+		}
+
+		/*
+		 * Now reset the HCA before we touch the PCI capabilities or
+		 * attempt a firmware command, since a boot ROM may have left
+		 * the HCA in an undefined state.
+		 */
+		err = mlx4_reset(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to reset HCA, aborting.\n");
+			goto err_rel_own;
+		}
 	}
 
+slave_start:
 	if (mlx4_cmd_init(dev)) {
 		mlx4_err(dev, "Failed to init command interface, aborting.\n");
-		goto err_free_dev;
+		goto err_sriov;
+	}
+
+	/* In slave functions, the communication channel must be initialized
+	 * before posting commands. Also, init num_slaves before calling
+	 * mlx4_init_hca */
+	if (mlx4_is_mfunc(dev)) {
+		if (mlx4_is_master(dev))
+			dev->num_slaves = MLX4_MAX_NUM_SLAVES;
+		else {
+			dev->num_slaves = 0;
+			if (mlx4_multi_func_init(dev)) {
+				mlx4_err(dev, "Failed to init slave mfunc"
+					 " interface, aborting.\n");
+				goto err_cmd;
+			}
+		}
 	}
 
 	err = mlx4_init_hca(dev);
-	if (err)
-		goto err_cmd;
+	if (err) {
+		if (err == -EACCES) {
+			/* Not primary Physical function
+			 * Running in slave mode */
+			mlx4_cmd_cleanup(dev);
+			dev->flags |= MLX4_FLAG_SLAVE;
+			dev->flags &= ~MLX4_FLAG_MASTER;
+			goto slave_start;
+		} else
+			goto err_mfunc;
+	}
+
+	/* In master functions, the communication channel must be initialized
+	 * after obtaining its address from fw */
+	if (mlx4_is_master(dev)) {
+		if (mlx4_multi_func_init(dev)) {
+			mlx4_err(dev, "Failed to init master mfunc"
+				 "interface, aborting.\n");
+			goto err_close;
+		}
+	}
 
 	err = mlx4_alloc_eq_table(dev);
 	if (err)
-		goto err_close;
+		goto err_master_mfunc;
 
 	priv->msix_ctl.pool_bm = 0;
 	spin_lock_init(&priv->msix_ctl.pool_lock);
 
 	mlx4_enable_msi_x(dev);
-
-	err = mlx4_init_steering(dev);
-	if (err)
+	if ((mlx4_is_mfunc(dev)) &&
+	    !(dev->flags & MLX4_FLAG_MSI_X)) {
+		mlx4_err(dev, "INTx is not supported in multi-function mode."
+			 " aborting.\n");
 		goto err_free_eq;
+	}
+
+	if (!mlx4_is_slave(dev)) {
+		err = mlx4_init_steering(dev);
+		if (err)
+			goto err_free_eq;
+	}
 
 	err = mlx4_setup_hca(dev);
-	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
+	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
+	    !mlx4_is_mfunc(dev)) {
 		dev->flags &= ~MLX4_FLAG_MSI_X;
 		pci_disable_msix(pdev);
 		err = mlx4_setup_hca(dev);
@@ -1389,20 +1863,37 @@ err_port:
 	mlx4_cleanup_uar_table(dev);
 
 err_steer:
-	mlx4_clear_steering(dev);
+	if (!mlx4_is_slave(dev))
+		mlx4_clear_steering(dev);
 
 err_free_eq:
 	mlx4_free_eq_table(dev);
 
+err_master_mfunc:
+	if (mlx4_is_master(dev))
+		mlx4_multi_func_cleanup(dev);
+
 err_close:
 	if (dev->flags & MLX4_FLAG_MSI_X)
 		pci_disable_msix(pdev);
 
 	mlx4_close_hca(dev);
 
+err_mfunc:
+	if (mlx4_is_slave(dev))
+		mlx4_multi_func_cleanup(dev);
+
 err_cmd:
 	mlx4_cmd_cleanup(dev);
 
+err_sriov:
+	if (num_vfs && (dev->flags & MLX4_FLAG_SRIOV))
+		pci_disable_sriov(pdev);
+
+err_rel_own:
+	if (!mlx4_is_slave(dev))
+		mlx4_free_ownership(dev);
+
 err_free_dev:
 	kfree(priv);
 
@@ -1430,6 +1921,12 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	int p;
 
 	if (dev) {
+		/* in SRIOV it is not allowed to unload the pf's
+		 * driver while there are alive vf's */
+		if (mlx4_is_master(dev)) {
+			if (mlx4_how_many_lives_vf(dev))
+				printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
+		}
 		mlx4_stop_sense(dev);
 		mlx4_unregister_device(dev);
 
@@ -1449,17 +1946,31 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 		mlx4_cleanup_xrcd_table(dev);
 		mlx4_cleanup_pd_table(dev);
 
+		if (mlx4_is_master(dev))
+			mlx4_free_resource_tracker(dev);
+
 		iounmap(priv->kar);
 		mlx4_uar_free(dev, &priv->driver_uar);
 		mlx4_cleanup_uar_table(dev);
-		mlx4_clear_steering(dev);
+		if (!mlx4_is_slave(dev))
+			mlx4_clear_steering(dev);
 		mlx4_free_eq_table(dev);
+		if (mlx4_is_master(dev))
+			mlx4_multi_func_cleanup(dev);
 		mlx4_close_hca(dev);
+		if (mlx4_is_slave(dev))
+			mlx4_multi_func_cleanup(dev);
 		mlx4_cmd_cleanup(dev);
 
 		if (dev->flags & MLX4_FLAG_MSI_X)
 			pci_disable_msix(pdev);
+		if (num_vfs && (dev->flags & MLX4_FLAG_SRIOV)) {
+			mlx4_warn(dev, "Disabling sriov\n");
+			pci_disable_sriov(pdev);
+		}
 
+		if (!mlx4_is_slave(dev))
+			mlx4_free_ownership(dev);
 		kfree(priv);
 		pci_release_regions(pdev);
 		pci_disable_device(pdev);
@@ -1474,33 +1985,48 @@ int mlx4_restart_one(struct pci_dev *pdev)
 }
 
 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
-	{ PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
-	{ PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
-	{ PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
-	{ PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
-	{ PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
-	{ PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */
-	{ PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
-	{ PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */
-	{ PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
-	{ PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/
-	{ PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
-	{ PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX2 40GigE PCIe gen2 */
-	{ PCI_VDEVICE(MELLANOX, 0x1002) }, /* MT25400 Family [ConnectX-2 Virtual Function] */
-	{ PCI_VDEVICE(MELLANOX, 0x1003) }, /* MT27500 Family [ConnectX-3] */
-	{ PCI_VDEVICE(MELLANOX, 0x1004) }, /* MT27500 Family [ConnectX-3 Virtual Function] */
-	{ PCI_VDEVICE(MELLANOX, 0x1005) }, /* MT27510 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1006) }, /* MT27511 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1007) }, /* MT27520 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1008) }, /* MT27521 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1009) }, /* MT27530 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100a) }, /* MT27531 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100b) }, /* MT27540 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100c) }, /* MT27541 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100d) }, /* MT27550 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100e) }, /* MT27551 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100f) }, /* MT27560 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1010) }, /* MT27561 Family */
+	/* MT25408 "Hermon" SDR */
+	{ PCI_VDEVICE(MELLANOX, 0x6340), 0 },
+	/* MT25408 "Hermon" DDR */
+	{ PCI_VDEVICE(MELLANOX, 0x634a), 0 },
+	/* MT25408 "Hermon" QDR */
+	{ PCI_VDEVICE(MELLANOX, 0x6354), 0 },
+	/* MT25408 "Hermon" DDR PCIe gen2 */
+	{ PCI_VDEVICE(MELLANOX, 0x6732), 0 },
+	/* MT25408 "Hermon" QDR PCIe gen2 */
+	{ PCI_VDEVICE(MELLANOX, 0x673c), 0 },
+	/* MT25408 "Hermon" EN 10GigE */
+	{ PCI_VDEVICE(MELLANOX, 0x6368), 0 },
+	/* MT25408 "Hermon" EN 10GigE PCIe gen2 */
+	{ PCI_VDEVICE(MELLANOX, 0x6750), 0 },
+	/* MT25458 ConnectX EN 10GBASE-T 10GigE */
+	{ PCI_VDEVICE(MELLANOX, 0x6372), 0 },
+	/* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
+	{ PCI_VDEVICE(MELLANOX, 0x675a), 0 },
+	/* MT26468 ConnectX EN 10GigE PCIe gen2*/
+	{ PCI_VDEVICE(MELLANOX, 0x6764), 0 },
+	/* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
+	{ PCI_VDEVICE(MELLANOX, 0x6746), 0 },
+	/* MT26478 ConnectX2 40GigE PCIe gen2 */
+	{ PCI_VDEVICE(MELLANOX, 0x676e), 0 },
+	/* MT25400 Family [ConnectX-2 Virtual Function] */
+	{ PCI_VDEVICE(MELLANOX, 0x1002), MLX4_VF },
+	/* MT27500 Family [ConnectX-3] */
+	{ PCI_VDEVICE(MELLANOX, 0x1003), 0 },
+	/* MT27500 Family [ConnectX-3 Virtual Function] */
+	{ PCI_VDEVICE(MELLANOX, 0x1004), MLX4_VF },
+	{ PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
 	{ 0, }
 };
 
@@ -1529,6 +2055,12 @@ static int __init mlx4_verify_params(void)
 		return -1;
 	}
 
+	/* Check if module param for ports type has legal combination */
+	if (port_type_array[0] == false && port_type_array[1] == true) {
+		printk(KERN_WARNING "Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
+		port_type_array[0] = true;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 879f825c6f6a..3921dbf01da1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -49,6 +49,7 @@
 #include <linux/mlx4/cmd.h>
 
 #define DRV_NAME	"mlx4_core"
+#define PFX		DRV_NAME ": "
 #define DRV_VERSION	"1.0"
 #define DRV_RELDATE	"July 14, 2011"
 
@@ -957,10 +958,15 @@ int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
 
 int mlx4_cmd_init(struct mlx4_dev *dev);
 void mlx4_cmd_cleanup(struct mlx4_dev *dev);
+int mlx4_multi_func_init(struct mlx4_dev *dev);
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
 void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
 int mlx4_cmd_use_events(struct mlx4_dev *dev);
 void mlx4_cmd_use_polling(struct mlx4_dev *dev);
 
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+		  unsigned long timeout);
+
 void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
 void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
 
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index ae62630a665e..9958ff2cad3c 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -225,4 +225,6 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo
 
 u32 mlx4_comm_get_version(void);
 
+#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
+
 #endif /* MLX4_CMD_H */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 65bb466c575f..5f784ff6a36e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -489,6 +489,7 @@ struct mlx4_dev {
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
+	int			num_vfs;
 };
 
 struct mlx4_init_port_param {
-- 
cgit v1.2.3


From 8cb25e14fe80d0fac42412364df573eb3e8e83cc Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Thu, 8 Dec 2011 13:11:54 +0100
Subject: ieee80211: Introduce ieee80211_is_first_frag

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 17f2a768e2ad..210e2c325534 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -544,6 +544,15 @@ static inline int ieee80211_is_qos_nullfunc(__le16 fc)
 	       cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC);
 }
 
+/**
+ * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set
+ * @seq_ctrl: frame sequence control bytes in little-endian byteorder
+ */
+static inline int ieee80211_is_first_frag(__le16 seq_ctrl)
+{
+	return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0;
+}
+
 struct ieee80211s_hdr {
 	u8 flags;
 	u8 ttl;
-- 
cgit v1.2.3


From 8a5ac6ecd56756ee72588627aa23ab6cf9b790db Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Thu, 8 Dec 2011 18:02:21 +0100
Subject: ssb: extract FEM info from SPROM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/pci.c            | 23 +++++++++++++++++++++++
 include/linux/ssb/ssb.h      |  9 +++++++++
 include/linux/ssb/ssb_regs.h | 17 +++++++++++++++++
 3 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 34c3bab90b9a..973223f5de8e 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -607,6 +607,29 @@ static void sprom_extract_r8(struct ssb_sprom *out, const u16 *in)
 	memcpy(&out->antenna_gain.ghz5, &out->antenna_gain.ghz24,
 	       sizeof(out->antenna_gain.ghz5));
 
+	/* Extract FEM info */
+	SPEX(fem.ghz2.tssipos, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_TSSIPOS, SSB_SROM8_FEM_TSSIPOS_SHIFT);
+	SPEX(fem.ghz2.extpa_gain, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_EXTPA_GAIN, SSB_SROM8_FEM_EXTPA_GAIN_SHIFT);
+	SPEX(fem.ghz2.pdet_range, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_PDET_RANGE, SSB_SROM8_FEM_PDET_RANGE_SHIFT);
+	SPEX(fem.ghz2.tr_iso, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_TR_ISO, SSB_SROM8_FEM_TR_ISO_SHIFT);
+	SPEX(fem.ghz2.antswlut, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_ANTSWLUT, SSB_SROM8_FEM_ANTSWLUT_SHIFT);
+
+	SPEX(fem.ghz5.tssipos, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_TSSIPOS, SSB_SROM8_FEM_TSSIPOS_SHIFT);
+	SPEX(fem.ghz5.extpa_gain, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_EXTPA_GAIN, SSB_SROM8_FEM_EXTPA_GAIN_SHIFT);
+	SPEX(fem.ghz5.pdet_range, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_PDET_RANGE, SSB_SROM8_FEM_PDET_RANGE_SHIFT);
+	SPEX(fem.ghz5.tr_iso, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_TR_ISO, SSB_SROM8_FEM_TR_ISO_SHIFT);
+	SPEX(fem.ghz5.antswlut, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_ANTSWLUT, SSB_SROM8_FEM_ANTSWLUT_SHIFT);
+
 	sprom_extract_r458(out, in);
 
 	/* TODO - get remaining rev 8 stuff needed */
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 061e560251b4..dcf35b0f303a 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -94,6 +94,15 @@ struct ssb_sprom {
 		} ghz5;		/* 5GHz band */
 	} antenna_gain;
 
+	struct {
+		struct {
+			u8 tssipos, extpa_gain, pdet_range, tr_iso, antswlut;
+		} ghz2;
+		struct {
+			u8 tssipos, extpa_gain, pdet_range, tr_iso, antswlut;
+		} ghz5;
+	} fem;
+
 	/* TODO - add any parameters needed from rev 2, 3, 4, 5 or 8 SPROMs */
 };
 
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index 98941203a27f..c814ae6eeb22 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -432,6 +432,23 @@
 #define  SSB_SPROM8_RXPO2G		0x00FF	/* 2GHz RX power offset */
 #define  SSB_SPROM8_RXPO5G		0xFF00	/* 5GHz RX power offset */
 #define  SSB_SPROM8_RXPO5G_SHIFT	8
+#define SSB_SPROM8_FEM2G		0x00AE
+#define SSB_SPROM8_FEM5G		0x00B0
+#define  SSB_SROM8_FEM_TSSIPOS		0x0001
+#define  SSB_SROM8_FEM_TSSIPOS_SHIFT	0
+#define  SSB_SROM8_FEM_EXTPA_GAIN	0x0006
+#define  SSB_SROM8_FEM_EXTPA_GAIN_SHIFT	1
+#define  SSB_SROM8_FEM_PDET_RANGE	0x00F8
+#define  SSB_SROM8_FEM_PDET_RANGE_SHIFT	3
+#define  SSB_SROM8_FEM_TR_ISO		0x0700
+#define  SSB_SROM8_FEM_TR_ISO_SHIFT	8
+#define  SSB_SROM8_FEM_ANTSWLUT		0xF800
+#define  SSB_SROM8_FEM_ANTSWLUT_SHIFT	11
+#define SSB_SPROM8_THERMAL		0x00B2
+#define SSB_SPROM8_MPWR_RAWTS		0x00B4
+#define SSB_SPROM8_TS_SLP_OPT_CORRX	0x00B6
+#define SSB_SPROM8_FOC_HWIQ_IQSWP	0x00B8
+#define SSB_SPROM8_PHYCAL_TEMPDELTA	0x00BA
 #define SSB_SPROM8_MAXP_BG		0x00C0  /* Max Power 2GHz in path 1 */
 #define  SSB_SPROM8_MAXP_BG_MASK	0x00FF  /* Mask for Max Power 2GHz */
 #define  SSB_SPROM8_ITSSI_BG		0xFF00	/* Mask for path 1 itssi_bg */
-- 
cgit v1.2.3


From aee5ed563d56c713d2a51d6f16e08b83fd9665d5 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Thu, 8 Dec 2011 18:02:22 +0100
Subject: bcma: extract FEM info from SPROM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/sprom.c                        | 22 ++++++++++++++++++++++
 include/linux/bcma/bcma_driver_chipcommon.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/sprom.c b/drivers/bcma/sprom.c
index d7292390d236..b6c474bbd572 100644
--- a/drivers/bcma/sprom.c
+++ b/drivers/bcma/sprom.c
@@ -142,6 +142,28 @@ static void bcma_sprom_extract_r8(struct bcma_bus *bus, const u16 *sprom)
 	bus->sprom.boardflags2_hi = sprom[SPOFF(SSB_SPROM8_BFL2HI)];
 
 	bus->sprom.country_code = sprom[SPOFF(SSB_SPROM8_CCODE)];
+
+	bus->sprom.fem.ghz2.tssipos = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_TSSIPOS) >> SSB_SROM8_FEM_TSSIPOS_SHIFT;
+	bus->sprom.fem.ghz2.extpa_gain = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_EXTPA_GAIN) >> SSB_SROM8_FEM_EXTPA_GAIN_SHIFT;
+	bus->sprom.fem.ghz2.pdet_range = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_PDET_RANGE) >> SSB_SROM8_FEM_PDET_RANGE_SHIFT;
+	bus->sprom.fem.ghz2.tr_iso = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_TR_ISO) >> SSB_SROM8_FEM_TR_ISO_SHIFT;
+	bus->sprom.fem.ghz2.antswlut = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_ANTSWLUT) >> SSB_SROM8_FEM_ANTSWLUT_SHIFT;
+
+	bus->sprom.fem.ghz5.tssipos = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_TSSIPOS) >> SSB_SROM8_FEM_TSSIPOS_SHIFT;
+	bus->sprom.fem.ghz5.extpa_gain = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_EXTPA_GAIN) >> SSB_SROM8_FEM_EXTPA_GAIN_SHIFT;
+	bus->sprom.fem.ghz5.pdet_range = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_PDET_RANGE) >> SSB_SROM8_FEM_PDET_RANGE_SHIFT;
+	bus->sprom.fem.ghz5.tr_iso = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_TR_ISO) >> SSB_SROM8_FEM_TR_ISO_SHIFT;
+	bus->sprom.fem.ghz5.antswlut = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_ANTSWLUT) >> SSB_SROM8_FEM_ANTSWLUT_SHIFT;
 }
 
 int bcma_sprom_get(struct bcma_bus *bus)
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 1526d965ed06..a33086a7530b 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -203,6 +203,7 @@
 #define BCMA_CC_PMU_CTL			0x0600 /* PMU control */
 #define  BCMA_CC_PMU_CTL_ILP_DIV	0xFFFF0000 /* ILP div mask */
 #define  BCMA_CC_PMU_CTL_ILP_DIV_SHIFT	16
+#define  BCMA_CC_PMU_CTL_PLL_UPD	0x00000400
 #define  BCMA_CC_PMU_CTL_NOILPONW	0x00000200 /* No ILP on wait */
 #define  BCMA_CC_PMU_CTL_HTREQEN	0x00000100 /* HT req enable */
 #define  BCMA_CC_PMU_CTL_ALPREQEN	0x00000080 /* ALP req enable */
-- 
cgit v1.2.3


From 9d08f10d355afd500310738ff09b4d921a447102 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Thu, 8 Dec 2011 15:06:41 -0800
Subject: bcma: add set/mask macros for 16-bit register access

The BCMA header only had definitions for 32-bit register access. Used
those as a template for the 16-bit flavour. Also changed them to inline
functions to be on the safe side. As offset parameter is used twice there
would be a problem when used like this: bcma_set32(core, offset++, val);

Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Alwin Beukers <alwin@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Franky Lin <frankyl@broadcom.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma.h | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 4d4b59de9467..de6057f16987 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -254,12 +254,32 @@ void bcma_awrite32(struct bcma_device *core, u16 offset, u32 value)
 	core->bus->ops->awrite32(core, offset, value);
 }
 
-#define bcma_mask32(cc, offset, mask) \
-	bcma_write32(cc, offset, bcma_read32(cc, offset) & (mask))
-#define bcma_set32(cc, offset, set) \
-	bcma_write32(cc, offset, bcma_read32(cc, offset) | (set))
-#define bcma_maskset32(cc, offset, mask, set) \
-	bcma_write32(cc, offset, (bcma_read32(cc, offset) & (mask)) | (set))
+static inline void bcma_mask32(struct bcma_device *cc, u16 offset, u32 mask)
+{
+	bcma_write32(cc, offset, bcma_read32(cc, offset) & mask);
+}
+static inline void bcma_set32(struct bcma_device *cc, u16 offset, u32 set)
+{
+	bcma_write32(cc, offset, bcma_read32(cc, offset) | set);
+}
+static inline void bcma_maskset32(struct bcma_device *cc,
+				  u16 offset, u32 mask, u32 set)
+{
+	bcma_write32(cc, offset, (bcma_read32(cc, offset) & mask) | set);
+}
+static inline void bcma_mask16(struct bcma_device *cc, u16 offset, u16 mask)
+{
+	bcma_write16(cc, offset, bcma_read16(cc, offset) & mask);
+}
+static inline void bcma_set16(struct bcma_device *cc, u16 offset, u16 set)
+{
+	bcma_write16(cc, offset, bcma_read16(cc, offset) | set);
+}
+static inline void bcma_maskset16(struct bcma_device *cc,
+				  u16 offset, u16 mask, u16 set)
+{
+	bcma_write16(cc, offset, (bcma_read16(cc, offset) & mask) | set);
+}
 
 extern bool bcma_core_is_enabled(struct bcma_device *core);
 extern void bcma_core_disable(struct bcma_device *core, u32 flags);
-- 
cgit v1.2.3


From 084455524f0d46dd210b4397898aff73579b97e8 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Thu, 8 Dec 2011 15:06:42 -0800
Subject: bcma: use static keyword for inline function declaration in bcma.h

Just scratching an itch here, but it makes more sense to use the
static keyword if you think about how the compiler treats inline
functions.

Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Alwin Beukers <alwin@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Franky Lin <frankyl@broadcom.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index de6057f16987..f4b8346b1a33 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -205,50 +205,51 @@ struct bcma_bus {
 	struct ssb_sprom sprom;
 };
 
-extern inline u32 bcma_read8(struct bcma_device *core, u16 offset)
+static inline u32 bcma_read8(struct bcma_device *core, u16 offset)
 {
 	return core->bus->ops->read8(core, offset);
 }
-extern inline u32 bcma_read16(struct bcma_device *core, u16 offset)
+static inline u32 bcma_read16(struct bcma_device *core, u16 offset)
 {
 	return core->bus->ops->read16(core, offset);
 }
-extern inline u32 bcma_read32(struct bcma_device *core, u16 offset)
+static inline u32 bcma_read32(struct bcma_device *core, u16 offset)
 {
 	return core->bus->ops->read32(core, offset);
 }
-extern inline
+static inline
 void bcma_write8(struct bcma_device *core, u16 offset, u32 value)
 {
 	core->bus->ops->write8(core, offset, value);
 }
-extern inline
+static inline
 void bcma_write16(struct bcma_device *core, u16 offset, u32 value)
 {
 	core->bus->ops->write16(core, offset, value);
 }
-extern inline
+static inline
 void bcma_write32(struct bcma_device *core, u16 offset, u32 value)
 {
 	core->bus->ops->write32(core, offset, value);
 }
 #ifdef CONFIG_BCMA_BLOCKIO
-extern inline void bcma_block_read(struct bcma_device *core, void *buffer,
+static inline void bcma_block_read(struct bcma_device *core, void *buffer,
 				   size_t count, u16 offset, u8 reg_width)
 {
 	core->bus->ops->block_read(core, buffer, count, offset, reg_width);
 }
-extern inline void bcma_block_write(struct bcma_device *core, const void *buffer,
-				    size_t count, u16 offset, u8 reg_width)
+static inline void bcma_block_write(struct bcma_device *core,
+				    const void *buffer, size_t count,
+				    u16 offset, u8 reg_width)
 {
 	core->bus->ops->block_write(core, buffer, count, offset, reg_width);
 }
 #endif
-extern inline u32 bcma_aread32(struct bcma_device *core, u16 offset)
+static inline u32 bcma_aread32(struct bcma_device *core, u16 offset)
 {
 	return core->bus->ops->aread32(core, offset);
 }
-extern inline
+static inline
 void bcma_awrite32(struct bcma_device *core, u16 offset, u32 value)
 {
 	core->bus->ops->awrite32(core, offset, value);
-- 
cgit v1.2.3


From 5c3ddec73d01a1fae9409c197078cb02c42238c3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 13 Dec 2011 16:44:22 -0500
Subject: net: Remove unused neighbour layer ops.

It's simpler to just keep these things out until there is a real user
of them, so we can see what the needs actually are, rather than keep
these things around as useless overhead.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 include/net/neighbour.h   |  1 -
 net/core/neighbour.c      | 10 ----------
 3 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 603730804da5..6b9d4edb7c26 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -974,7 +974,6 @@ struct net_device_ops {
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
 	int			(*ndo_neigh_construct)(struct neighbour *n);
-	void			(*ndo_neigh_destroy)(struct neighbour *n);
 };
 
 /*
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index e31f0a86f9b7..6814c4d61c1c 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -43,7 +43,6 @@ struct neigh_parms {
 #endif
 	struct net_device *dev;
 	struct neigh_parms *next;
-	int	(*neigh_setup)(struct neighbour *);
 	void	(*neigh_cleanup)(struct neighbour *);
 	struct neigh_table *tbl;
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4af151e1bf5d..d57a40a2598c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -497,13 +497,6 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		}
 	}
 
-	/* Device specific setup. */
-	if (n->parms->neigh_setup &&
-	    (error = n->parms->neigh_setup(n)) < 0) {
-		rc = ERR_PTR(error);
-		goto out_neigh_release;
-	}
-
 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 
 	write_lock_bh(&tbl->lock);
@@ -717,9 +710,6 @@ void neigh_destroy(struct neighbour *neigh)
 	skb_queue_purge(&neigh->arp_queue);
 	neigh->arp_queue_len_bytes = 0;
 
-	if (dev->netdev_ops->ndo_neigh_destroy)
-		dev->netdev_ops->ndo_neigh_destroy(neigh);
-
 	dev_put(dev);
 	neigh_parms_put(neigh->parms);
 
-- 
cgit v1.2.3


From 1ba64edef6051d2ec79bb2fbd3a0c8f0df00ab55 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:37 +0100
Subject: block, sx8: kill blk_insert_request()

The only user left for blk_insert_request() is sx8 and it can be
trivially switched to use blk_execute_rq_nowait() - special requests
aren't included in io stat and sx8 doesn't use block layer tagging.
Switch sx8 and kill blk_insert_requeset().

This patch doesn't introduce any functional difference.

Only compile tested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jeff Garzik <jgarzik@pobox.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 48 ------------------------------------------------
 drivers/block/sx8.c    | 12 ++++++++----
 include/linux/blkdev.h |  1 -
 3 files changed, 8 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index ea70e6c80cd3..435af2378614 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1010,54 +1010,6 @@ static void add_acct_request(struct request_queue *q, struct request *rq,
 	__elv_add_request(q, rq, where);
 }
 
-/**
- * blk_insert_request - insert a special request into a request queue
- * @q:		request queue where request should be inserted
- * @rq:		request to be inserted
- * @at_head:	insert request at head or tail of queue
- * @data:	private data
- *
- * Description:
- *    Many block devices need to execute commands asynchronously, so they don't
- *    block the whole kernel from preemption during request execution.  This is
- *    accomplished normally by inserting aritficial requests tagged as
- *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
- *    be scheduled for actual execution by the request queue.
- *
- *    We have the option of inserting the head or the tail of the queue.
- *    Typically we use the tail for new ioctls and so forth.  We use the head
- *    of the queue for things like a QUEUE_FULL message from a device, or a
- *    host that is unable to accept a particular command.
- */
-void blk_insert_request(struct request_queue *q, struct request *rq,
-			int at_head, void *data)
-{
-	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
-	unsigned long flags;
-
-	/*
-	 * tell I/O scheduler that this isn't a regular read/write (ie it
-	 * must not attempt merges on this) and that it acts as a soft
-	 * barrier
-	 */
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-
-	rq->special = data;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-
-	/*
-	 * If command is tagged, release the tag
-	 */
-	if (blk_rq_tagged(rq))
-		blk_queue_end_tag(q, rq);
-
-	add_acct_request(q, rq, where);
-	__blk_run_queue(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-EXPORT_SYMBOL(blk_insert_request);
-
 static void part_round_stats_single(int cpu, struct hd_struct *part,
 				    unsigned long now)
 {
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index b70f0fca9a42..e7472f567c9d 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -619,8 +619,10 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
 	       host->state == HST_DEV_SCAN);
 	spin_unlock_irq(&host->lock);
 
-	DPRINTK("blk_insert_request, tag == %u\n", idx);
-	blk_insert_request(host->oob_q, crq->rq, 1, crq);
+	DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
+	crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+	crq->rq->special = crq;
+	blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
 
 	return 0;
 
@@ -658,8 +660,10 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
 	BUG_ON(rc < 0);
 	crq->msg_bucket = (u32) rc;
 
-	DPRINTK("blk_insert_request, tag == %u\n", idx);
-	blk_insert_request(host->oob_q, crq->rq, 1, crq);
+	DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
+	crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+	crq->rq->special = crq;
+	blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
 
 	return 0;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c7a6d3b5bc7b..8a6b51b13a1c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -660,7 +660,6 @@ extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern struct request *blk_make_request(struct request_queue *, struct bio *,
 					gfp_t);
-extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
-- 
cgit v1.2.3


From 34f6055c80285e4efb3f602a9119db75239744dc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:37 +0100
Subject: block: add blk_queue_dead()

There are a number of QUEUE_FLAG_DEAD tests.  Add blk_queue_dead()
macro and use it.

This patch doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 6 +++---
 block/blk-exec.c       | 2 +-
 block/blk-sysfs.c      | 4 ++--
 block/blk-throttle.c   | 4 ++--
 block/blk.h            | 2 +-
 include/linux/blkdev.h | 1 +
 6 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 435af2378614..b5ed4f4a8d96 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -608,7 +608,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue_node);
 
 int blk_get_queue(struct request_queue *q)
 {
-	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+	if (likely(!blk_queue_dead(q))) {
 		kobject_get(&q->kobj);
 		return 0;
 	}
@@ -755,7 +755,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	int may_queue;
 
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+	if (unlikely(blk_queue_dead(q)))
 		return NULL;
 
 	may_queue = elv_may_queue(q, rw_flags);
@@ -875,7 +875,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 		struct io_context *ioc;
 		struct request_list *rl = &q->rq;
 
-		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+		if (unlikely(blk_queue_dead(q)))
 			return NULL;
 
 		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
diff --git a/block/blk-exec.c b/block/blk-exec.c
index a1ebceb332f9..60532852b3ab 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -50,7 +50,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 {
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+	if (unlikely(blk_queue_dead(q))) {
 		rq->errors = -ENXIO;
 		if (rq->end_io)
 			rq->end_io(rq, rq->errors);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e7f9f657f105..f0b2ca8f66d0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -425,7 +425,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	if (!entry->show)
 		return -EIO;
 	mutex_lock(&q->sysfs_lock);
-	if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
+	if (blk_queue_dead(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
@@ -447,7 +447,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 
 	q = container_of(kobj, struct request_queue, kobj);
 	mutex_lock(&q->sysfs_lock);
-	if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
+	if (blk_queue_dead(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 4553245d9317..5eed6a76721d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -310,7 +310,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
 	struct request_queue *q = td->queue;
 
 	/* no throttling for dead queue */
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+	if (unlikely(blk_queue_dead(q)))
 		return NULL;
 
 	rcu_read_lock();
@@ -335,7 +335,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
 	spin_lock_irq(q->queue_lock);
 
 	/* Make sure @q is still alive */
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+	if (unlikely(blk_queue_dead(q))) {
 		kfree(tg);
 		return NULL;
 	}
diff --git a/block/blk.h b/block/blk.h
index 3f6551b3c92d..e38691dbb329 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -85,7 +85,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			q->flush_queue_delayed = 1;
 			return NULL;
 		}
-		if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) ||
+		if (unlikely(blk_queue_dead(q)) ||
 		    !q->elevator->ops->elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8a6b51b13a1c..783f97c14d0a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -481,6 +481,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
-- 
cgit v1.2.3


From a73f730d013ff2788389fd0c46ad3e5510f124e6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:37 +0100
Subject: block, cfq: move cfqd->cic_index to q->id

cfq allocates per-queue id using ida and uses it to index cic radix
tree from io_context.  Move it to q->id and allocate on queue init and
free on queue release.  This simplifies cfq a bit and will allow for
further improvements of io context life-cycle management.

This patch doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 24 +++++++++++++++--------
 block/blk-sysfs.c      |  2 ++
 block/blk.h            |  3 +++
 block/cfq-iosched.c    | 52 +++++---------------------------------------------
 include/linux/blkdev.h |  6 ++++++
 5 files changed, 32 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 30add45a87ef..af7301581172 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
+DEFINE_IDA(blk_queue_ida);
+
 /*
  * For the allocated request tables
  */
@@ -474,6 +476,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (!q)
 		return NULL;
 
+	q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
+	if (q->id < 0)
+		goto fail_q;
+
 	q->backing_dev_info.ra_pages =
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
@@ -481,15 +487,11 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->backing_dev_info.name = "block";
 
 	err = bdi_init(&q->backing_dev_info);
-	if (err) {
-		kmem_cache_free(blk_requestq_cachep, q);
-		return NULL;
-	}
+	if (err)
+		goto fail_id;
 
-	if (blk_throtl_init(q)) {
-		kmem_cache_free(blk_requestq_cachep, q);
-		return NULL;
-	}
+	if (blk_throtl_init(q))
+		goto fail_id;
 
 	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
 		    laptop_mode_timer_fn, (unsigned long) q);
@@ -512,6 +514,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->queue_lock = &q->__queue_lock;
 
 	return q;
+
+fail_id:
+	ida_simple_remove(&blk_queue_ida, q->id);
+fail_q:
+	kmem_cache_free(blk_requestq_cachep, q);
+	return NULL;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index f0b2ca8f66d0..5b4b4ab5e785 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -494,6 +494,8 @@ static void blk_release_queue(struct kobject *kobj)
 	blk_trace_shutdown(q);
 
 	bdi_destroy(&q->backing_dev_info);
+
+	ida_simple_remove(&blk_queue_ida, q->id);
 	kmem_cache_free(blk_requestq_cachep, q);
 }
 
diff --git a/block/blk.h b/block/blk.h
index e38691dbb329..aae4d88fc523 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -1,6 +1,8 @@
 #ifndef BLK_INTERNAL_H
 #define BLK_INTERNAL_H
 
+#include <linux/idr.h>
+
 /* Amount of time in which a process may batch requests */
 #define BLK_BATCH_TIME	(HZ/50UL)
 
@@ -9,6 +11,7 @@
 
 extern struct kmem_cache *blk_requestq_cachep;
 extern struct kobj_type blk_queue_ktype;
+extern struct ida blk_queue_ida;
 
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 16ace89613bc..ec3f5e8ba564 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -65,9 +65,6 @@ static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
 static struct completion *ioc_gone;
 static DEFINE_SPINLOCK(ioc_gone_lock);
 
-static DEFINE_SPINLOCK(cic_index_lock);
-static DEFINE_IDA(cic_index_ida);
-
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
@@ -290,7 +287,6 @@ struct cfq_data {
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
 
-	unsigned int cic_index;
 	struct list_head cic_list;
 
 	/*
@@ -484,7 +480,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
 
 static inline void *cfqd_dead_key(struct cfq_data *cfqd)
 {
-	return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
+	return (void *)(cfqd->queue->id << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
 }
 
 static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
@@ -3105,7 +3101,7 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
 	BUG_ON(rcu_dereference_check(ioc->ioc_data,
 		lockdep_is_held(&ioc->lock)) == cic);
 
-	radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
+	radix_tree_delete(&ioc->radix_root, cfqd->queue->id);
 	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
@@ -3133,7 +3129,7 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 	}
 
 	do {
-		cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
+		cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
 		rcu_read_unlock();
 		if (!cic)
 			break;
@@ -3169,8 +3165,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 		cic->key = cfqd;
 
 		spin_lock_irqsave(&ioc->lock, flags);
-		ret = radix_tree_insert(&ioc->radix_root,
-						cfqd->cic_index, cic);
+		ret = radix_tree_insert(&ioc->radix_root, cfqd->queue->id, cic);
 		if (!ret)
 			hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
 		spin_unlock_irqrestore(&ioc->lock, flags);
@@ -3944,10 +3939,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
 
 	cfq_shutdown_timer_wq(cfqd);
 
-	spin_lock(&cic_index_lock);
-	ida_remove(&cic_index_ida, cfqd->cic_index);
-	spin_unlock(&cic_index_lock);
-
 	/*
 	 * Wait for cfqg->blkg->key accessors to exit their grace periods.
 	 * Do this wait only if there are other unlinked groups out
@@ -3969,24 +3960,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
 	kfree(cfqd);
 }
 
-static int cfq_alloc_cic_index(void)
-{
-	int index, error;
-
-	do {
-		if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
-			return -ENOMEM;
-
-		spin_lock(&cic_index_lock);
-		error = ida_get_new(&cic_index_ida, &index);
-		spin_unlock(&cic_index_lock);
-		if (error && error != -EAGAIN)
-			return error;
-	} while (error);
-
-	return index;
-}
-
 static void *cfq_init_queue(struct request_queue *q)
 {
 	struct cfq_data *cfqd;
@@ -3994,23 +3967,9 @@ static void *cfq_init_queue(struct request_queue *q)
 	struct cfq_group *cfqg;
 	struct cfq_rb_root *st;
 
-	i = cfq_alloc_cic_index();
-	if (i < 0)
-		return NULL;
-
 	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
-	if (!cfqd) {
-		spin_lock(&cic_index_lock);
-		ida_remove(&cic_index_ida, i);
-		spin_unlock(&cic_index_lock);
+	if (!cfqd)
 		return NULL;
-	}
-
-	/*
-	 * Don't need take queue_lock in the routine, since we are
-	 * initializing the ioscheduler, and nobody is using cfqd
-	 */
-	cfqd->cic_index = i;
 
 	/* Init root service tree */
 	cfqd->grp_service_tree = CFQ_RB_ROOT;
@@ -4294,7 +4253,6 @@ static void __exit cfq_exit(void)
 	 */
 	if (elv_ioc_count_read(cfq_ioc_count))
 		wait_for_completion(&all_gone);
-	ida_destroy(&cic_index_ida);
 	cfq_slab_kill();
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 783f97c14d0a..8c8dbc4738ea 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -310,6 +310,12 @@ struct request_queue {
 	 */
 	unsigned long		queue_flags;
 
+	/*
+	 * ida allocated id for this queue.  Used to index queues from
+	 * ioctx.
+	 */
+	int			id;
+
 	/*
 	 * queue needs bounce pages for pages above this limit
 	 */
-- 
cgit v1.2.3


From 42ec57a8f68311bbbf4ff96a5d33c8a2e90b9d05 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:37 +0100
Subject: block: misc ioc cleanups

* int return from put_io_context() wasn't used by anybody.  Make it
  return void like other put functions and docbook-fy the function
  comment.

* Reorder dummy declarations for !CONFIG_BLOCK case a bit.

* Make alloc_ioc_context() use __GFP_ZERO allocation, take init out of
  if block and drop 0'ing.

* Docbook-fy current_io_context() comment.

This patch doesn't introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ioc.c           | 72 +++++++++++++++++++++++------------------------
 include/linux/iocontext.h | 12 ++------
 2 files changed, 39 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 6f9bbd978653..8bebf06bac76 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -27,26 +27,28 @@ static void cfq_dtor(struct io_context *ioc)
 	}
 }
 
-/*
- * IO Context helper functions. put_io_context() returns 1 if there are no
- * more users of this io context, 0 otherwise.
+/**
+ * put_io_context - put a reference of io_context
+ * @ioc: io_context to put
+ *
+ * Decrement reference count of @ioc and release it if the count reaches
+ * zero.
  */
-int put_io_context(struct io_context *ioc)
+void put_io_context(struct io_context *ioc)
 {
 	if (ioc == NULL)
-		return 1;
+		return;
 
-	BUG_ON(atomic_long_read(&ioc->refcount) == 0);
+	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
 
-	if (atomic_long_dec_and_test(&ioc->refcount)) {
-		rcu_read_lock();
-		cfq_dtor(ioc);
-		rcu_read_unlock();
+	if (!atomic_long_dec_and_test(&ioc->refcount))
+		return;
 
-		kmem_cache_free(iocontext_cachep, ioc);
-		return 1;
-	}
-	return 0;
+	rcu_read_lock();
+	cfq_dtor(ioc);
+	rcu_read_unlock();
+
+	kmem_cache_free(iocontext_cachep, ioc);
 }
 EXPORT_SYMBOL(put_io_context);
 
@@ -84,33 +86,31 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 {
 	struct io_context *ioc;
 
-	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
-	if (ioc) {
-		atomic_long_set(&ioc->refcount, 1);
-		atomic_set(&ioc->nr_tasks, 1);
-		spin_lock_init(&ioc->lock);
-		ioc->ioprio_changed = 0;
-		ioc->ioprio = 0;
-		ioc->last_waited = 0; /* doesn't matter... */
-		ioc->nr_batch_requests = 0; /* because this is 0 */
-		INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
-		INIT_HLIST_HEAD(&ioc->cic_list);
-		ioc->ioc_data = NULL;
-#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
-		ioc->cgroup_changed = 0;
-#endif
-	}
+	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
+				    node);
+	if (unlikely(!ioc))
+		return NULL;
+
+	/* initialize */
+	atomic_long_set(&ioc->refcount, 1);
+	atomic_set(&ioc->nr_tasks, 1);
+	spin_lock_init(&ioc->lock);
+	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+	INIT_HLIST_HEAD(&ioc->cic_list);
 
 	return ioc;
 }
 
-/*
- * If the current task has no IO context then create one and initialise it.
- * Otherwise, return its existing IO context.
+/**
+ * current_io_context - get io_context of %current
+ * @gfp_flags: allocation flags, used if allocation is necessary
+ * @node: allocation node, used if allocation is necessary
  *
- * This returned IO context doesn't have a specifically elevated refcount,
- * but since the current task itself holds a reference, the context can be
- * used in general code, so long as it stays within `current` context.
+ * Return io_context of %current.  If it doesn't exist, it is created with
+ * @gfp_flags and @node.  The returned io_context does NOT have its
+ * reference count incremented.  Because io_context is exited only on task
+ * exit, %current can be sure that the returned io_context is valid and
+ * alive as long as it is executing.
  */
 struct io_context *current_io_context(gfp_t gfp_flags, int node)
 {
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 5037a0ad2312..8a6ecb66346f 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -76,20 +76,14 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 
 struct task_struct;
 #ifdef CONFIG_BLOCK
-int put_io_context(struct io_context *ioc);
+void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_io_context(gfp_t gfp_flags, int node);
 struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
 #else
-static inline void exit_io_context(struct task_struct *task)
-{
-}
-
 struct io_context;
-static inline int put_io_context(struct io_context *ioc)
-{
-	return 1;
-}
+static inline void put_io_context(struct io_context *ioc) { }
+static inline void exit_io_context(struct task_struct *task) { }
 #endif
 
 #endif
-- 
cgit v1.2.3


From 6e736be7f282fff705db7c34a15313281b372a76 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:38 +0100
Subject: block: make ioc get/put interface more conventional and fix race on
 alloction

Ignoring copy_io() during fork, io_context can be allocated from two
places - current_io_context() and set_task_ioprio().  The former is
always called from local task while the latter can be called from
different task.  The synchornization between them are peculiar and
dubious.

* current_io_context() doesn't grab task_lock() and assumes that if it
  saw %NULL ->io_context, it would stay that way until allocation and
  assignment is complete.  It has smp_wmb() between alloc/init and
  assignment.

* set_task_ioprio() grabs task_lock() for assignment and does
  smp_read_barrier_depends() between "ioc = task->io_context" and "if
  (ioc)".  Unfortunately, this doesn't achieve anything - the latter
  is not a dependent load of the former.  ie, if ioc itself were being
  dereferenced "ioc->xxx", it would mean something (not sure what tho)
  but as the code currently stands, the dependent read barrier is
  noop.

As only one of the the two test-assignment sequences is task_lock()
protected, the task_lock() can't do much about race between the two.
Nothing prevents current_io_context() and set_task_ioprio() allocating
its own ioc for the same task and overwriting the other's.

Also, set_task_ioprio() can race with exiting task and create a new
ioc after exit_io_context() is finished.

ioc get/put doesn't have any reason to be complex.  The only hot path
is accessing the existing ioc of %current, which is simple to achieve
given that ->io_context is never destroyed as long as the task is
alive.  All other paths can happily go through task_lock() like all
other task sub structures without impacting anything.

This patch updates ioc get/put so that it becomes more conventional.

* alloc_io_context() is replaced with get_task_io_context().  This is
  the only interface which can acquire access to ioc of another task.
  On return, the caller has an explicit reference to the object which
  should be put using put_io_context() afterwards.

* The functionality of current_io_context() remains the same but when
  creating a new ioc, it shares the code path with
  get_task_io_context() and always goes through task_lock().

* get_io_context() now means incrementing ref on an ioc which the
  caller already has access to (be that an explicit refcnt or implicit
  %current one).

* PF_EXITING inhibits creation of new io_context and once
  exit_io_context() is finished, it's guaranteed that both ioc
  acquisition functions return %NULL.

* All users are updated.  Most are trivial but
  smp_read_barrier_depends() removal from cfq_get_io_context() needs a
  bit of explanation.  I suppose the original intention was to ensure
  ioc->ioprio is visible when set_task_ioprio() allocates new
  io_context and installs it; however, this wouldn't have worked
  because set_task_ioprio() doesn't have wmb between init and install.
  There are other problems with this which will be fixed in another
  patch.

* While at it, use NUMA_NO_NODE instead of -1 for wildcard node
  specification.

-v2: Vivek spotted contamination from debug patch.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c        |  9 +++--
 block/blk-ioc.c           | 99 +++++++++++++++++++++++++++++++----------------
 block/blk.h               |  1 +
 block/cfq-iosched.c       | 18 ++++-----
 fs/ioprio.c               | 21 ++--------
 include/linux/iocontext.h |  4 +-
 kernel/fork.c             |  8 ++--
 7 files changed, 91 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8f630cec906e..4b001dcd85b0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1645,11 +1645,12 @@ static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 	struct io_context *ioc;
 
-	task_lock(tsk);
-	ioc = tsk->io_context;
-	if (ioc)
+	/* we don't lose anything even if ioc allocation fails */
+	ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE);
+	if (ioc) {
 		ioc->cgroup_changed = 1;
-	task_unlock(tsk);
+		put_io_context(ioc);
+	}
 }
 
 void blkio_policy_register(struct blkio_policy_type *blkiop)
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 8bebf06bac76..b13ed96776c2 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -16,6 +16,19 @@
  */
 static struct kmem_cache *iocontext_cachep;
 
+/**
+ * get_io_context - increment reference count to io_context
+ * @ioc: io_context to get
+ *
+ * Increment reference count to @ioc.
+ */
+void get_io_context(struct io_context *ioc)
+{
+	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
+	atomic_long_inc(&ioc->refcount);
+}
+EXPORT_SYMBOL(get_io_context);
+
 static void cfq_dtor(struct io_context *ioc)
 {
 	if (!hlist_empty(&ioc->cic_list)) {
@@ -71,6 +84,9 @@ void exit_io_context(struct task_struct *task)
 {
 	struct io_context *ioc;
 
+	/* PF_EXITING prevents new io_context from being attached to @task */
+	WARN_ON_ONCE(!(current->flags & PF_EXITING));
+
 	task_lock(task);
 	ioc = task->io_context;
 	task->io_context = NULL;
@@ -82,7 +98,9 @@ void exit_io_context(struct task_struct *task)
 	put_io_context(ioc);
 }
 
-struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
+static struct io_context *create_task_io_context(struct task_struct *task,
+						 gfp_t gfp_flags, int node,
+						 bool take_ref)
 {
 	struct io_context *ioc;
 
@@ -98,6 +116,20 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
 	INIT_HLIST_HEAD(&ioc->cic_list);
 
+	/* try to install, somebody might already have beaten us to it */
+	task_lock(task);
+
+	if (!task->io_context && !(task->flags & PF_EXITING)) {
+		task->io_context = ioc;
+	} else {
+		kmem_cache_free(iocontext_cachep, ioc);
+		ioc = task->io_context;
+	}
+
+	if (ioc && take_ref)
+		get_io_context(ioc);
+
+	task_unlock(task);
 	return ioc;
 }
 
@@ -114,46 +146,47 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
  */
 struct io_context *current_io_context(gfp_t gfp_flags, int node)
 {
-	struct task_struct *tsk = current;
-	struct io_context *ret;
-
-	ret = tsk->io_context;
-	if (likely(ret))
-		return ret;
-
-	ret = alloc_io_context(gfp_flags, node);
-	if (ret) {
-		/* make sure set_task_ioprio() sees the settings above */
-		smp_wmb();
-		tsk->io_context = ret;
-	}
+	might_sleep_if(gfp_flags & __GFP_WAIT);
 
-	return ret;
+	if (current->io_context)
+		return current->io_context;
+
+	return create_task_io_context(current, gfp_flags, node, false);
 }
+EXPORT_SYMBOL(current_io_context);
 
-/*
- * If the current task has no IO context then create one and initialise it.
- * If it does have a context, take a ref on it.
+/**
+ * get_task_io_context - get io_context of a task
+ * @task: task of interest
+ * @gfp_flags: allocation flags, used if allocation is necessary
+ * @node: allocation node, used if allocation is necessary
+ *
+ * Return io_context of @task.  If it doesn't exist, it is created with
+ * @gfp_flags and @node.  The returned io_context has its reference count
+ * incremented.
  *
- * This is always called in the context of the task which submitted the I/O.
+ * This function always goes through task_lock() and it's better to use
+ * current_io_context() + get_io_context() for %current.
  */
-struct io_context *get_io_context(gfp_t gfp_flags, int node)
+struct io_context *get_task_io_context(struct task_struct *task,
+				       gfp_t gfp_flags, int node)
 {
-	struct io_context *ioc = NULL;
-
-	/*
-	 * Check for unlikely race with exiting task. ioc ref count is
-	 * zero when ioc is being detached.
-	 */
-	do {
-		ioc = current_io_context(gfp_flags, node);
-		if (unlikely(!ioc))
-			break;
-	} while (!atomic_long_inc_not_zero(&ioc->refcount));
+	struct io_context *ioc;
 
-	return ioc;
+	might_sleep_if(gfp_flags & __GFP_WAIT);
+
+	task_lock(task);
+	ioc = task->io_context;
+	if (likely(ioc)) {
+		get_io_context(ioc);
+		task_unlock(task);
+		return ioc;
+	}
+	task_unlock(task);
+
+	return create_task_io_context(task, gfp_flags, node, true);
 }
-EXPORT_SYMBOL(get_io_context);
+EXPORT_SYMBOL(get_task_io_context);
 
 static int __init blk_ioc_init(void)
 {
diff --git a/block/blk.h b/block/blk.h
index aae4d88fc523..fc3c41b2fd24 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -122,6 +122,7 @@ static inline int blk_should_fake_timeout(struct request_queue *q)
 }
 #endif
 
+void get_io_context(struct io_context *ioc);
 struct io_context *current_io_context(gfp_t gfp_flags, int node);
 
 int ll_back_merge_fn(struct request_queue *q, struct request *req,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ec3f5e8ba564..d42d89ccce1b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -14,6 +14,7 @@
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
 #include <linux/blktrace_api.h>
+#include "blk.h"
 #include "cfq.h"
 
 /*
@@ -3194,13 +3195,13 @@ static struct cfq_io_context *
 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct io_context *ioc = NULL;
-	struct cfq_io_context *cic;
+	struct cfq_io_context *cic = NULL;
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
-	ioc = get_io_context(gfp_mask, cfqd->queue->node);
+	ioc = current_io_context(gfp_mask, cfqd->queue->node);
 	if (!ioc)
-		return NULL;
+		goto err;
 
 	cic = cfq_cic_lookup(cfqd, ioc);
 	if (cic)
@@ -3211,10 +3212,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 		goto err;
 
 	if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
-		goto err_free;
-
+		goto err;
 out:
-	smp_read_barrier_depends();
+	get_io_context(ioc);
+
 	if (unlikely(ioc->ioprio_changed))
 		cfq_ioc_set_ioprio(ioc);
 
@@ -3223,10 +3224,9 @@ out:
 		cfq_ioc_set_cgroup(ioc);
 #endif
 	return cic;
-err_free:
-	cfq_cic_free(cic);
 err:
-	put_io_context(ioc);
+	if (cic)
+		cfq_cic_free(cic);
 	return NULL;
 }
 
diff --git a/fs/ioprio.c b/fs/ioprio.c
index f79dab83e17b..998ec239d1ea 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -48,28 +48,13 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
 	if (err)
 		return err;
 
-	task_lock(task);
-	do {
-		ioc = task->io_context;
-		/* see wmb() in current_io_context() */
-		smp_read_barrier_depends();
-		if (ioc)
-			break;
-
-		ioc = alloc_io_context(GFP_ATOMIC, -1);
-		if (!ioc) {
-			err = -ENOMEM;
-			break;
-		}
-		task->io_context = ioc;
-	} while (1);
-
-	if (!err) {
+	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
+	if (ioc) {
 		ioc->ioprio = ioprio;
 		ioc->ioprio_changed = 1;
+		put_io_context(ioc);
 	}
 
-	task_unlock(task);
 	return err;
 }
 EXPORT_SYMBOL_GPL(set_task_ioprio);
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 8a6ecb66346f..28bb621ef5a2 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -78,8 +78,8 @@ struct task_struct;
 #ifdef CONFIG_BLOCK
 void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
-struct io_context *get_io_context(gfp_t gfp_flags, int node);
-struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+struct io_context *get_task_io_context(struct task_struct *task,
+				       gfp_t gfp_flags, int node);
 #else
 struct io_context;
 static inline void put_io_context(struct io_context *ioc) { }
diff --git a/kernel/fork.c b/kernel/fork.c
index da4a6a10d088..5bcfc739bb7c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -870,6 +870,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 {
 #ifdef CONFIG_BLOCK
 	struct io_context *ioc = current->io_context;
+	struct io_context *new_ioc;
 
 	if (!ioc)
 		return 0;
@@ -881,11 +882,12 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 		if (unlikely(!tsk->io_context))
 			return -ENOMEM;
 	} else if (ioprio_valid(ioc->ioprio)) {
-		tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
-		if (unlikely(!tsk->io_context))
+		new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
+		if (unlikely(!new_ioc))
 			return -ENOMEM;
 
-		tsk->io_context->ioprio = ioc->ioprio;
+		new_ioc->ioprio = ioc->ioprio;
+		put_io_context(new_ioc);
 	}
 #endif
 	return 0;
-- 
cgit v1.2.3


From 09ac46c429464c919d04bb737b27edd84d944f02 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:38 +0100
Subject: block: misc updates to blk_get_queue()

* blk_get_queue() is peculiar in that it returns 0 on success and 1 on
  failure instead of 0 / -errno or boolean.  Update it such that it
  returns %true on success and %false on failure.

* Make sure the caller checks for the return value.

* Separate out __blk_get_queue() which doesn't check whether @q is
  dead and put it in blk.h.  This will be used later.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c         | 8 ++++----
 block/blk.h              | 5 +++++
 block/bsg.c              | 4 +---
 block/genhd.c            | 2 +-
 drivers/scsi/scsi_scan.c | 2 +-
 include/linux/blkdev.h   | 2 +-
 6 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index af7301581172..fd4749391e17 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -626,14 +626,14 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 }
 EXPORT_SYMBOL(blk_init_allocated_queue_node);
 
-int blk_get_queue(struct request_queue *q)
+bool blk_get_queue(struct request_queue *q)
 {
 	if (likely(!blk_queue_dead(q))) {
-		kobject_get(&q->kobj);
-		return 0;
+		__blk_get_queue(q);
+		return true;
 	}
 
-	return 1;
+	return false;
 }
 EXPORT_SYMBOL(blk_get_queue);
 
diff --git a/block/blk.h b/block/blk.h
index fc3c41b2fd24..8d421156fefb 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,11 @@ extern struct kmem_cache *blk_requestq_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
+static inline void __blk_get_queue(struct request_queue *q)
+{
+	kobject_get(&q->kobj);
+}
+
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
diff --git a/block/bsg.c b/block/bsg.c
index 702f1316bb8f..167d586cece6 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -769,12 +769,10 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 					 struct file *file)
 {
 	struct bsg_device *bd;
-	int ret;
 #ifdef BSG_DEBUG
 	unsigned char buf[32];
 #endif
-	ret = blk_get_queue(rq);
-	if (ret)
+	if (!blk_get_queue(rq))
 		return ERR_PTR(-ENXIO);
 
 	bd = bsg_alloc_device();
diff --git a/block/genhd.c b/block/genhd.c
index 02e9fca80825..c958169d24f0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -615,7 +615,7 @@ void add_disk(struct gendisk *disk)
 	 * Take an extra ref on queue which will be put on disk_release()
 	 * so that it sticks around as long as @disk is there.
 	 */
-	WARN_ON_ONCE(blk_get_queue(disk->queue));
+	WARN_ON_ONCE(!blk_get_queue(disk->queue));
 
 	retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
 				   "bdi");
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index b3c6d957fbd8..89da43f73c00 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -297,7 +297,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 		kfree(sdev);
 		goto out;
 	}
-	blk_get_queue(sdev->request_queue);
+	WARN_ON_ONCE(!blk_get_queue(sdev->request_queue));
 	sdev->request_queue->queuedata = sdev;
 	scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8c8dbc4738ea..d1b6f4ed1f96 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,7 +865,7 @@ extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatte
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
 
-int blk_get_queue(struct request_queue *);
+bool __must_check blk_get_queue(struct request_queue *);
 struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
-- 
cgit v1.2.3


From 283287a52e3c3f7f8f9da747f4b8c5202740d776 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:38 +0100
Subject: block, cfq: misc updates to cfq_io_context

Make the following changes to prepare for ioc/cic management cleanup.

* Add cic->q so that ioc can determine the associated queue without
  querying cfq.  This will eventually replace ->key.

* Factor out cfq_release_cic() from cic_free_func().  This function
  assumes that the caller handled locking.

* Rename __cfq_exit_single_io_context() to cfq_exit_cic() and make it
  take only @cic.

* Restructure cfq_cic_link() for future updates.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c       | 58 ++++++++++++++++++++++++++---------------------
 include/linux/iocontext.h |  1 +
 2 files changed, 33 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d42d89ccce1b..a612ca65f371 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2709,21 +2709,26 @@ static void cfq_cic_free(struct cfq_io_context *cic)
 	call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
 }
 
-static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
+static void cfq_release_cic(struct cfq_io_context *cic)
 {
-	unsigned long flags;
+	struct io_context *ioc = cic->ioc;
 	unsigned long dead_key = (unsigned long) cic->key;
 
 	BUG_ON(!(dead_key & CIC_DEAD_KEY));
-
-	spin_lock_irqsave(&ioc->lock, flags);
 	radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
 	hlist_del_rcu(&cic->cic_list);
-	spin_unlock_irqrestore(&ioc->lock, flags);
-
 	cfq_cic_free(cic);
 }
 
+static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->lock, flags);
+	cfq_release_cic(cic);
+	spin_unlock_irqrestore(&ioc->lock, flags);
+}
+
 /*
  * Must be called with rcu_read_lock() held or preemption otherwise disabled.
  * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
@@ -2773,9 +2778,9 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_put_queue(cfqq);
 }
 
-static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
-					 struct cfq_io_context *cic)
+static void cfq_exit_cic(struct cfq_io_context *cic)
 {
+	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	struct io_context *ioc = cic->ioc;
 
 	list_del_init(&cic->queue_list);
@@ -2823,7 +2828,7 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
 		 */
 		smp_read_barrier_depends();
 		if (cic->key == cfqd)
-			__cfq_exit_single_io_context(cfqd, cic);
+			cfq_exit_cic(cic);
 
 		spin_unlock_irqrestore(q->queue_lock, flags);
 	}
@@ -3161,28 +3166,29 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 	int ret;
 
 	ret = radix_tree_preload(gfp_mask);
-	if (!ret) {
-		cic->ioc = ioc;
-		cic->key = cfqd;
+	if (ret)
+		goto out;
 
-		spin_lock_irqsave(&ioc->lock, flags);
-		ret = radix_tree_insert(&ioc->radix_root, cfqd->queue->id, cic);
-		if (!ret)
-			hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
-		spin_unlock_irqrestore(&ioc->lock, flags);
+	cic->ioc = ioc;
+	cic->key = cfqd;
+	cic->q = cfqd->queue;
+
+	spin_lock_irqsave(&ioc->lock, flags);
+	ret = radix_tree_insert(&ioc->radix_root, cfqd->queue->id, cic);
+	if (!ret)
+		hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
+	spin_unlock_irqrestore(&ioc->lock, flags);
 
-		radix_tree_preload_end();
+	radix_tree_preload_end();
 
-		if (!ret) {
-			spin_lock_irqsave(cfqd->queue->queue_lock, flags);
-			list_add(&cic->queue_list, &cfqd->cic_list);
-			spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
-		}
+	if (!ret) {
+		spin_lock_irqsave(cfqd->queue->queue_lock, flags);
+		list_add(&cic->queue_list, &cfqd->cic_list);
+		spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 	}
-
+out:
 	if (ret)
 		printk(KERN_ERR "cfq: cic link failed!\n");
-
 	return ret;
 }
 
@@ -3922,7 +3928,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
 							struct cfq_io_context,
 							queue_list);
 
-		__cfq_exit_single_io_context(cfqd, cic);
+		cfq_exit_cic(cic);
 	}
 
 	cfq_put_async_queues(cfqd);
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 28bb621ef5a2..079aea8fd8a8 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -15,6 +15,7 @@ struct cfq_ttime {
 
 struct cfq_io_context {
 	void *key;
+	struct request_queue *q;
 
 	struct cfq_queue *cfqq[2];
 
-- 
cgit v1.2.3


From dc86900e0a8f665122de6faadd27fb4c6d2b3e4d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:38 +0100
Subject: block, cfq: move ioc ioprio/cgroup changed handling to cic

ioprio/cgroup change was handled by marking the changed state in ioc
and, on the following access to the ioc, performing RCU-protected
iteration through all cic's grabbing the matching queue_lock.

This patch moves the changed state to each cic.  When ioprio or cgroup
changes, the respective bit is set on all cic's of the ioc and when
each of those cic (not ioc) is accessed, change is applied for that
specific ioc-queue pair.

This also fixes the following two race conditions between setting and
clearing of changed states.

* Missing barrier between assign/load of ioprio and ioprio_changed
  allowed applying old ioprio.

* Change requests could happen between application of change and
  clearing of changed variables.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c        |  2 +-
 block/blk-ioc.c           | 45 +++++++++++++++++++++++++++++++++++++++++++++
 block/cfq-iosched.c       | 28 +++++++++-------------------
 fs/ioprio.c               |  3 +--
 include/linux/iocontext.h | 14 +++++++++-----
 5 files changed, 65 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4b001dcd85b0..dc00835aab6a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1648,7 +1648,7 @@ static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	/* we don't lose anything even if ioc allocation fails */
 	ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
-		ioc->cgroup_changed = 1;
+		ioc_cgroup_changed(ioc);
 		put_io_context(ioc);
 	}
 }
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index b13ed96776c2..6f59fbad93d9 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -188,6 +188,51 @@ struct io_context *get_task_io_context(struct task_struct *task,
 }
 EXPORT_SYMBOL(get_task_io_context);
 
+void ioc_set_changed(struct io_context *ioc, int which)
+{
+	struct cfq_io_context *cic;
+	struct hlist_node *n;
+
+	hlist_for_each_entry(cic, n, &ioc->cic_list, cic_list)
+		set_bit(which, &cic->changed);
+}
+
+/**
+ * ioc_ioprio_changed - notify ioprio change
+ * @ioc: io_context of interest
+ * @ioprio: new ioprio
+ *
+ * @ioc's ioprio has changed to @ioprio.  Set %CIC_IOPRIO_CHANGED for all
+ * cic's.  iosched is responsible for checking the bit and applying it on
+ * request issue path.
+ */
+void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->lock, flags);
+	ioc->ioprio = ioprio;
+	ioc_set_changed(ioc, CIC_IOPRIO_CHANGED);
+	spin_unlock_irqrestore(&ioc->lock, flags);
+}
+
+/**
+ * ioc_cgroup_changed - notify cgroup change
+ * @ioc: io_context of interest
+ *
+ * @ioc's cgroup has changed.  Set %CIC_CGROUP_CHANGED for all cic's.
+ * iosched is responsible for checking the bit and applying it on request
+ * issue path.
+ */
+void ioc_cgroup_changed(struct io_context *ioc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->lock, flags);
+	ioc_set_changed(ioc, CIC_CGROUP_CHANGED);
+	spin_unlock_irqrestore(&ioc->lock, flags);
+}
+
 static int __init blk_ioc_init(void)
 {
 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a612ca65f371..51aece2eea7c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2904,7 +2904,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 	cfq_clear_cfqq_prio_changed(cfqq);
 }
 
-static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
+static void changed_ioprio(struct cfq_io_context *cic)
 {
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	struct cfq_queue *cfqq;
@@ -2933,12 +2933,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
 
-static void cfq_ioc_set_ioprio(struct io_context *ioc)
-{
-	call_for_each_cic(ioc, changed_ioprio);
-	ioc->ioprio_changed = 0;
-}
-
 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			  pid_t pid, bool is_sync)
 {
@@ -2960,7 +2954,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
+static void changed_cgroup(struct cfq_io_context *cic)
 {
 	struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
@@ -2986,12 +2980,6 @@ static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
 
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
-
-static void cfq_ioc_set_cgroup(struct io_context *ioc)
-{
-	call_for_each_cic(ioc, changed_cgroup);
-	ioc->cgroup_changed = 0;
-}
 #endif  /* CONFIG_CFQ_GROUP_IOSCHED */
 
 static struct cfq_queue *
@@ -3222,13 +3210,15 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 out:
 	get_io_context(ioc);
 
-	if (unlikely(ioc->ioprio_changed))
-		cfq_ioc_set_ioprio(ioc);
-
+	if (unlikely(cic->changed)) {
+		if (test_and_clear_bit(CIC_IOPRIO_CHANGED, &cic->changed))
+			changed_ioprio(cic);
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-	if (unlikely(ioc->cgroup_changed))
-		cfq_ioc_set_cgroup(ioc);
+		if (test_and_clear_bit(CIC_CGROUP_CHANGED, &cic->changed))
+			changed_cgroup(cic);
 #endif
+	}
+
 	return cic;
 err:
 	if (cic)
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 998ec239d1ea..0f1b9515213b 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -50,8 +50,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
 
 	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
-		ioc->ioprio = ioprio;
-		ioc->ioprio_changed = 1;
+		ioc_ioprio_changed(ioc, ioprio);
 		put_io_context(ioc);
 	}
 
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 079aea8fd8a8..2c2b6da96b3c 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -13,6 +13,11 @@ struct cfq_ttime {
 	unsigned long ttime_mean;
 };
 
+enum {
+	CIC_IOPRIO_CHANGED,
+	CIC_CGROUP_CHANGED,
+};
+
 struct cfq_io_context {
 	void *key;
 	struct request_queue *q;
@@ -26,6 +31,8 @@ struct cfq_io_context {
 	struct list_head queue_list;
 	struct hlist_node cic_list;
 
+	unsigned long changed;
+
 	void (*dtor)(struct io_context *); /* destructor */
 	void (*exit)(struct io_context *); /* called on task exit */
 
@@ -44,11 +51,6 @@ struct io_context {
 	spinlock_t lock;
 
 	unsigned short ioprio;
-	unsigned short ioprio_changed;
-
-#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
-	unsigned short cgroup_changed;
-#endif
 
 	/*
 	 * For request batching
@@ -81,6 +83,8 @@ void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
+void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
+void ioc_cgroup_changed(struct io_context *ioc);
 #else
 struct io_context;
 static inline void put_io_context(struct io_context *ioc) { }
-- 
cgit v1.2.3


From b2efa05265d62bc29f3a64400fad4b44340eedb8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:39 +0100
Subject: block, cfq: unlink cfq_io_context's immediately

cic is association between io_context and request_queue.  A cic is
linked from both ioc and q and should be destroyed when either one
goes away.  As ioc and q both have their own locks, locking becomes a
bit complex - both orders work for removal from one but not from the
other.

Currently, cfq tries to circumvent this locking order issue with RCU.
ioc->lock nests inside queue_lock but the radix tree and cic's are
also protected by RCU allowing either side to walk their lists without
grabbing lock.

This rather unconventional use of RCU quickly devolves into extremely
fragile convolution.  e.g. The following is from cfqd going away too
soon after ioc and q exits raced.

 general protection fault: 0000 [#1] PREEMPT SMP
 CPU 2
 Modules linked in:
 [   88.503444]
 Pid: 599, comm: hexdump Not tainted 3.1.0-rc10-work+ #158 Bochs Bochs
 RIP: 0010:[<ffffffff81397628>]  [<ffffffff81397628>] cfq_exit_single_io_context+0x58/0xf0
 ...
 Call Trace:
  [<ffffffff81395a4a>] call_for_each_cic+0x5a/0x90
  [<ffffffff81395ab5>] cfq_exit_io_context+0x15/0x20
  [<ffffffff81389130>] exit_io_context+0x100/0x140
  [<ffffffff81098a29>] do_exit+0x579/0x850
  [<ffffffff81098d5b>] do_group_exit+0x5b/0xd0
  [<ffffffff81098de7>] sys_exit_group+0x17/0x20
  [<ffffffff81b02f2b>] system_call_fastpath+0x16/0x1b

The only real hot path here is cic lookup during request
initialization and avoiding extra locking requires very confined use
of RCU.  This patch makes cic removal from both ioc and request_queue
perform double-locking and unlink immediately.

* From q side, the change is almost trivial as ioc->lock nests inside
  queue_lock.  It just needs to grab each ioc->lock as it walks
  cic_list and unlink it.

* From ioc side, it's a bit more difficult because of inversed lock
  order.  ioc needs its lock to walk its cic_list but can't grab the
  matching queue_lock and needs to perform unlock-relock dancing.

  Unlinking is now wholly done from put_io_context() and fast path is
  optimized by using the queue_lock the caller already holds, which is
  by far the most common case.  If the ioc accessed multiple devices,
  it tries with trylock.  In unlikely cases of fast path failure, it
  falls back to full double-locking dance from workqueue.

Double-locking isn't the prettiest thing in the world but it's *far*
simpler and more understandable than RCU trick without adding any
meaningful overhead.

This still leaves a lot of now unnecessary RCU logics.  Future patches
will trim them.

-v2: Vivek pointed out that cic->q was being dereferenced after
     cic->release() was called.  Updated to use local variable @this_q
     instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c        |   2 +-
 block/blk-ioc.c           | 166 ++++++++++++++++++++++++++++++++++++++--------
 block/cfq-iosched.c       |  44 +++---------
 fs/ioprio.c               |   2 +-
 include/linux/blkdev.h    |   3 +
 include/linux/iocontext.h |  12 ++--
 kernel/fork.c             |   2 +-
 7 files changed, 159 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index dc00835aab6a..278869358049 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1649,7 +1649,7 @@ static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
 		ioc_cgroup_changed(ioc);
-		put_io_context(ioc);
+		put_io_context(ioc, NULL);
 	}
 }
 
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 6f59fbad93d9..fb23965595da 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -29,55 +29,164 @@ void get_io_context(struct io_context *ioc)
 }
 EXPORT_SYMBOL(get_io_context);
 
-static void cfq_dtor(struct io_context *ioc)
+/*
+ * Releasing ioc may nest into another put_io_context() leading to nested
+ * fast path release.  As the ioc's can't be the same, this is okay but
+ * makes lockdep whine.  Keep track of nesting and use it as subclass.
+ */
+#ifdef CONFIG_LOCKDEP
+#define ioc_release_depth(q)		((q) ? (q)->ioc_release_depth : 0)
+#define ioc_release_depth_inc(q)	(q)->ioc_release_depth++
+#define ioc_release_depth_dec(q)	(q)->ioc_release_depth--
+#else
+#define ioc_release_depth(q)		0
+#define ioc_release_depth_inc(q)	do { } while (0)
+#define ioc_release_depth_dec(q)	do { } while (0)
+#endif
+
+/*
+ * Slow path for ioc release in put_io_context().  Performs double-lock
+ * dancing to unlink all cic's and then frees ioc.
+ */
+static void ioc_release_fn(struct work_struct *work)
 {
-	if (!hlist_empty(&ioc->cic_list)) {
-		struct cfq_io_context *cic;
+	struct io_context *ioc = container_of(work, struct io_context,
+					      release_work);
+	struct request_queue *last_q = NULL;
+
+	spin_lock_irq(&ioc->lock);
+
+	while (!hlist_empty(&ioc->cic_list)) {
+		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
+							 struct cfq_io_context,
+							 cic_list);
+		struct request_queue *this_q = cic->q;
+
+		if (this_q != last_q) {
+			/*
+			 * Need to switch to @this_q.  Once we release
+			 * @ioc->lock, it can go away along with @cic.
+			 * Hold on to it.
+			 */
+			__blk_get_queue(this_q);
+
+			/*
+			 * blk_put_queue() might sleep thanks to kobject
+			 * idiocy.  Always release both locks, put and
+			 * restart.
+			 */
+			if (last_q) {
+				spin_unlock(last_q->queue_lock);
+				spin_unlock_irq(&ioc->lock);
+				blk_put_queue(last_q);
+			} else {
+				spin_unlock_irq(&ioc->lock);
+			}
+
+			last_q = this_q;
+			spin_lock_irq(this_q->queue_lock);
+			spin_lock(&ioc->lock);
+			continue;
+		}
+		ioc_release_depth_inc(this_q);
+		cic->exit(cic);
+		cic->release(cic);
+		ioc_release_depth_dec(this_q);
+	}
 
-		cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
-								cic_list);
-		cic->dtor(ioc);
+	if (last_q) {
+		spin_unlock(last_q->queue_lock);
+		spin_unlock_irq(&ioc->lock);
+		blk_put_queue(last_q);
+	} else {
+		spin_unlock_irq(&ioc->lock);
 	}
+
+	kmem_cache_free(iocontext_cachep, ioc);
 }
 
 /**
  * put_io_context - put a reference of io_context
  * @ioc: io_context to put
+ * @locked_q: request_queue the caller is holding queue_lock of (hint)
  *
  * Decrement reference count of @ioc and release it if the count reaches
- * zero.
+ * zero.  If the caller is holding queue_lock of a queue, it can indicate
+ * that with @locked_q.  This is an optimization hint and the caller is
+ * allowed to pass in %NULL even when it's holding a queue_lock.
  */
-void put_io_context(struct io_context *ioc)
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 {
+	struct request_queue *last_q = locked_q;
+	unsigned long flags;
+
 	if (ioc == NULL)
 		return;
 
 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
+	if (locked_q)
+		lockdep_assert_held(locked_q->queue_lock);
 
 	if (!atomic_long_dec_and_test(&ioc->refcount))
 		return;
 
-	rcu_read_lock();
-	cfq_dtor(ioc);
-	rcu_read_unlock();
-
-	kmem_cache_free(iocontext_cachep, ioc);
-}
-EXPORT_SYMBOL(put_io_context);
+	/*
+	 * Destroy @ioc.  This is a bit messy because cic's are chained
+	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
+	 * The inner ioc->lock should be held to walk our cic_list and then
+	 * for each cic the outer matching queue_lock should be grabbed.
+	 * ie. We need to do reverse-order double lock dancing.
+	 *
+	 * Another twist is that we are often called with one of the
+	 * matching queue_locks held as indicated by @locked_q, which
+	 * prevents performing double-lock dance for other queues.
+	 *
+	 * So, we do it in two stages.  The fast path uses the queue_lock
+	 * the caller is holding and, if other queues need to be accessed,
+	 * uses trylock to avoid introducing locking dependency.  This can
+	 * handle most cases, especially if @ioc was performing IO on only
+	 * single device.
+	 *
+	 * If trylock doesn't cut it, we defer to @ioc->release_work which
+	 * can do all the double-locking dancing.
+	 */
+	spin_lock_irqsave_nested(&ioc->lock, flags,
+				 ioc_release_depth(locked_q));
+
+	while (!hlist_empty(&ioc->cic_list)) {
+		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
+							 struct cfq_io_context,
+							 cic_list);
+		struct request_queue *this_q = cic->q;
+
+		if (this_q != last_q) {
+			if (last_q && last_q != locked_q)
+				spin_unlock(last_q->queue_lock);
+			last_q = NULL;
+
+			if (!spin_trylock(this_q->queue_lock))
+				break;
+			last_q = this_q;
+			continue;
+		}
+		ioc_release_depth_inc(this_q);
+		cic->exit(cic);
+		cic->release(cic);
+		ioc_release_depth_dec(this_q);
+	}
 
-static void cfq_exit(struct io_context *ioc)
-{
-	rcu_read_lock();
+	if (last_q && last_q != locked_q)
+		spin_unlock(last_q->queue_lock);
 
-	if (!hlist_empty(&ioc->cic_list)) {
-		struct cfq_io_context *cic;
+	spin_unlock_irqrestore(&ioc->lock, flags);
 
-		cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
-								cic_list);
-		cic->exit(ioc);
-	}
-	rcu_read_unlock();
+	/* if no cic's left, we're done; otherwise, kick release_work */
+	if (hlist_empty(&ioc->cic_list))
+		kmem_cache_free(iocontext_cachep, ioc);
+	else
+		schedule_work(&ioc->release_work);
 }
+EXPORT_SYMBOL(put_io_context);
 
 /* Called by the exiting task */
 void exit_io_context(struct task_struct *task)
@@ -92,10 +201,8 @@ void exit_io_context(struct task_struct *task)
 	task->io_context = NULL;
 	task_unlock(task);
 
-	if (atomic_dec_and_test(&ioc->nr_tasks))
-		cfq_exit(ioc);
-
-	put_io_context(ioc);
+	atomic_dec(&ioc->nr_tasks);
+	put_io_context(ioc, NULL);
 }
 
 static struct io_context *create_task_io_context(struct task_struct *task,
@@ -115,6 +222,7 @@ static struct io_context *create_task_io_context(struct task_struct *task,
 	spin_lock_init(&ioc->lock);
 	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
 	INIT_HLIST_HEAD(&ioc->cic_list);
+	INIT_WORK(&ioc->release_work, ioc_release_fn);
 
 	/* try to install, somebody might already have beaten us to it */
 	task_lock(task);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e617b088c59b..6cc606560402 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1778,7 +1778,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		cfqd->active_queue = NULL;
 
 	if (cfqd->active_cic) {
-		put_io_context(cfqd->active_cic->ioc);
+		put_io_context(cfqd->active_cic->ioc, cfqd->queue);
 		cfqd->active_cic = NULL;
 	}
 }
@@ -2812,38 +2812,6 @@ static void cfq_exit_cic(struct cfq_io_context *cic)
 	}
 }
 
-static void cfq_exit_single_io_context(struct io_context *ioc,
-				       struct cfq_io_context *cic)
-{
-	struct cfq_data *cfqd = cic_to_cfqd(cic);
-
-	if (cfqd) {
-		struct request_queue *q = cfqd->queue;
-		unsigned long flags;
-
-		spin_lock_irqsave(q->queue_lock, flags);
-
-		/*
-		 * Ensure we get a fresh copy of the ->key to prevent
-		 * race between exiting task and queue
-		 */
-		smp_read_barrier_depends();
-		if (cic->key == cfqd)
-			cfq_exit_cic(cic);
-
-		spin_unlock_irqrestore(q->queue_lock, flags);
-	}
-}
-
-/*
- * The process that ioc belongs to has exited, we need to clean up
- * and put the internal structures we have that belongs to that process.
- */
-static void cfq_exit_io_context(struct io_context *ioc)
-{
-	call_for_each_cic(ioc, cfq_exit_single_io_context);
-}
-
 static struct cfq_io_context *
 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
@@ -2855,8 +2823,8 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 		cic->ttime.last_end_request = jiffies;
 		INIT_LIST_HEAD(&cic->queue_list);
 		INIT_HLIST_NODE(&cic->cic_list);
-		cic->dtor = cfq_free_io_context;
-		cic->exit = cfq_exit_io_context;
+		cic->exit = cfq_exit_cic;
+		cic->release = cfq_release_cic;
 		elv_ioc_count_inc(cfq_ioc_count);
 	}
 
@@ -3726,7 +3694,7 @@ static void cfq_put_request(struct request *rq)
 		BUG_ON(!cfqq->allocated[rw]);
 		cfqq->allocated[rw]--;
 
-		put_io_context(RQ_CIC(rq)->ioc);
+		put_io_context(RQ_CIC(rq)->ioc, cfqq->cfqd->queue);
 
 		rq->elevator_private[0] = NULL;
 		rq->elevator_private[1] = NULL;
@@ -3937,8 +3905,12 @@ static void cfq_exit_queue(struct elevator_queue *e)
 		struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
 							struct cfq_io_context,
 							queue_list);
+		struct io_context *ioc = cic->ioc;
 
+		spin_lock(&ioc->lock);
 		cfq_exit_cic(cic);
+		cfq_release_cic(cic);
+		spin_unlock(&ioc->lock);
 	}
 
 	cfq_put_async_queues(cfqd);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 0f1b9515213b..f84b380d65e5 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
 	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
 		ioc_ioprio_changed(ioc, ioprio);
-		put_io_context(ioc);
+		put_io_context(ioc, NULL);
 	}
 
 	return err;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d1b6f4ed1f96..65c2f8c70089 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -393,6 +393,9 @@ struct request_queue {
 	/* Throttle data */
 	struct throtl_data *td;
 #endif
+#ifdef CONFIG_LOCKDEP
+	int			ioc_release_depth;
+#endif
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 2c2b6da96b3c..01e863128780 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -3,6 +3,7 @@
 
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
+#include <linux/workqueue.h>
 
 struct cfq_queue;
 struct cfq_ttime {
@@ -33,8 +34,8 @@ struct cfq_io_context {
 
 	unsigned long changed;
 
-	void (*dtor)(struct io_context *); /* destructor */
-	void (*exit)(struct io_context *); /* called on task exit */
+	void (*exit)(struct cfq_io_context *);
+	void (*release)(struct cfq_io_context *);
 
 	struct rcu_head rcu_head;
 };
@@ -61,6 +62,8 @@ struct io_context {
 	struct radix_tree_root radix_root;
 	struct hlist_head cic_list;
 	void __rcu *ioc_data;
+
+	struct work_struct release_work;
 };
 
 static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -79,7 +82,7 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 
 struct task_struct;
 #ifdef CONFIG_BLOCK
-void put_io_context(struct io_context *ioc);
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
@@ -87,7 +90,8 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
 void ioc_cgroup_changed(struct io_context *ioc);
 #else
 struct io_context;
-static inline void put_io_context(struct io_context *ioc) { }
+static inline void put_io_context(struct io_context *ioc,
+				  struct request_queue *locked_q) { }
 static inline void exit_io_context(struct task_struct *task) { }
 #endif
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 5bcfc739bb7c..2753449f2038 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -887,7 +887,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 			return -ENOMEM;
 
 		new_ioc->ioprio = ioc->ioprio;
-		put_io_context(new_ioc);
+		put_io_context(new_ioc, NULL);
 	}
 #endif
 	return 0;
-- 
cgit v1.2.3


From b9a1920837bc53430d339380e393a6e4c372939f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:39 +0100
Subject: block, cfq: remove delayed unlink

Now that all cic's are immediately unlinked from both ioc and queue,
lazy dropping from lookup path and trimming on elevator unregister are
unnecessary.  Kill them and remove now unused elevator_ops->trim().

This also leaves call_for_each_cic() without any user.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c      | 92 ++++++------------------------------------------
 block/elevator.c         | 16 ---------
 include/linux/elevator.h |  1 -
 3 files changed, 10 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6cc606560402..ff44435fad50 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2669,24 +2669,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	cfq_put_cfqg(cfqg);
 }
 
-/*
- * Call func for each cic attached to this ioc.
- */
-static void
-call_for_each_cic(struct io_context *ioc,
-		  void (*func)(struct io_context *, struct cfq_io_context *))
-{
-	struct cfq_io_context *cic;
-	struct hlist_node *n;
-
-	rcu_read_lock();
-
-	hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
-		func(ioc, cic);
-
-	rcu_read_unlock();
-}
-
 static void cfq_cic_free_rcu(struct rcu_head *head)
 {
 	struct cfq_io_context *cic;
@@ -2727,31 +2709,6 @@ static void cfq_release_cic(struct cfq_io_context *cic)
 	cfq_cic_free(cic);
 }
 
-static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioc->lock, flags);
-	cfq_release_cic(cic);
-	spin_unlock_irqrestore(&ioc->lock, flags);
-}
-
-/*
- * Must be called with rcu_read_lock() held or preemption otherwise disabled.
- * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
- * and ->trim() which is called with the task lock held
- */
-static void cfq_free_io_context(struct io_context *ioc)
-{
-	/*
-	 * ioc->refcount is zero here, or we are called from elv_unregister(),
-	 * so no more cic's are allowed to be linked into this ioc.  So it
-	 * should be ok to iterate over the known list, we will see all cic's
-	 * since no new ones are added.
-	 */
-	call_for_each_cic(ioc, cic_free_func);
-}
-
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
 {
 	struct cfq_queue *__cfqq, *next;
@@ -3037,30 +2994,6 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 	return cfqq;
 }
 
-/*
- * We drop cfq io contexts lazily, so we may find a dead one.
- */
-static void
-cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
-		  struct cfq_io_context *cic)
-{
-	unsigned long flags;
-
-	WARN_ON(!list_empty(&cic->queue_list));
-	BUG_ON(cic->key != cfqd_dead_key(cfqd));
-
-	spin_lock_irqsave(&ioc->lock, flags);
-
-	BUG_ON(rcu_dereference_check(ioc->ioc_data,
-		lockdep_is_held(&ioc->lock)) == cic);
-
-	radix_tree_delete(&ioc->radix_root, cfqd->queue->id);
-	hlist_del_rcu(&cic->cic_list);
-	spin_unlock_irqrestore(&ioc->lock, flags);
-
-	cfq_cic_free(cic);
-}
-
 /**
  * cfq_cic_lookup - lookup cfq_io_context
  * @cfqd: the associated cfq_data
@@ -3078,26 +3011,22 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 	if (unlikely(!ioc))
 		return NULL;
 
-	rcu_read_lock();
-
 	/*
-	 * we maintain a last-hit cache, to avoid browsing over the tree
+	 * cic's are indexed from @ioc using radix tree and hint pointer,
+	 * both of which are protected with RCU.  All removals are done
+	 * holding both q and ioc locks, and we're holding q lock - if we
+	 * find a cic which points to us, it's guaranteed to be valid.
 	 */
+	rcu_read_lock();
 	cic = rcu_dereference(ioc->ioc_data);
 	if (cic && cic->key == cfqd)
 		goto out;
 
-	do {
-		cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
-		if (!cic)
-			break;
-		if (likely(cic->key == cfqd)) {
-			/* hint assignment itself can race safely */
-			rcu_assign_pointer(ioc->ioc_data, cic);
-			break;
-		}
-		cfq_drop_dead_cic(cfqd, ioc, cic);
-	} while (1);
+	cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
+	if (cic && cic->key == cfqd)
+		rcu_assign_pointer(ioc->ioc_data, cic);	/* allowed to race */
+	else
+		cic = NULL;
 out:
 	rcu_read_unlock();
 	return cic;
@@ -4182,7 +4111,6 @@ static struct elevator_type iosched_cfq = {
 		.elevator_may_queue_fn =	cfq_may_queue,
 		.elevator_init_fn =		cfq_init_queue,
 		.elevator_exit_fn =		cfq_exit_queue,
-		.trim =				cfq_free_io_context,
 	},
 	.elevator_attrs =	cfq_attrs,
 	.elevator_name =	"cfq",
diff --git a/block/elevator.c b/block/elevator.c
index 66343d6917d0..6a343e8f8319 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -913,22 +913,6 @@ EXPORT_SYMBOL_GPL(elv_register);
 
 void elv_unregister(struct elevator_type *e)
 {
-	struct task_struct *g, *p;
-
-	/*
-	 * Iterate every thread in the process to remove the io contexts.
-	 */
-	if (e->ops.trim) {
-		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
-			task_lock(p);
-			if (p->io_context)
-				e->ops.trim(p->io_context);
-			task_unlock(p);
-		} while_each_thread(g, p);
-		read_unlock(&tasklist_lock);
-	}
-
 	spin_lock(&elv_list_lock);
 	list_del_init(&e->list);
 	spin_unlock(&elv_list_lock);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1d0f7a2ff73b..581dd1bd3d3e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -63,7 +63,6 @@ struct elevator_ops
 
 	elevator_init_fn *elevator_init_fn;
 	elevator_exit_fn *elevator_exit_fn;
-	void (*trim)(struct io_context *);
 };
 
 #define ELV_NAME_MAX	(16)
-- 
cgit v1.2.3


From b50b636bce6293fa858cc7ff6c3ffe4920d90006 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:39 +0100
Subject: block, cfq: kill ioc_gone

Now that cic's are immediately unlinked under both locks, there's no
need to count and drain cic's before module unload.  RCU callback
completion is waited with rcu_barrier().

While at it, remove residual RCU operations on cic_list.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c      | 43 +++++--------------------------------------
 include/linux/elevator.h | 17 -----------------
 2 files changed, 5 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ff44435fad50..ae7791a8ded9 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -62,10 +62,6 @@ static const int cfq_hist_divisor = 4;
 static struct kmem_cache *cfq_pool;
 static struct kmem_cache *cfq_ioc_pool;
 
-static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
-static struct completion *ioc_gone;
-static DEFINE_SPINLOCK(ioc_gone_lock);
-
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
@@ -2671,26 +2667,8 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
 static void cfq_cic_free_rcu(struct rcu_head *head)
 {
-	struct cfq_io_context *cic;
-
-	cic = container_of(head, struct cfq_io_context, rcu_head);
-
-	kmem_cache_free(cfq_ioc_pool, cic);
-	elv_ioc_count_dec(cfq_ioc_count);
-
-	if (ioc_gone) {
-		/*
-		 * CFQ scheduler is exiting, grab exit lock and check
-		 * the pending io context count. If it hits zero,
-		 * complete ioc_gone and set it back to NULL
-		 */
-		spin_lock(&ioc_gone_lock);
-		if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
-			complete(ioc_gone);
-			ioc_gone = NULL;
-		}
-		spin_unlock(&ioc_gone_lock);
-	}
+	kmem_cache_free(cfq_ioc_pool,
+			container_of(head, struct cfq_io_context, rcu_head));
 }
 
 static void cfq_cic_free(struct cfq_io_context *cic)
@@ -2705,7 +2683,7 @@ static void cfq_release_cic(struct cfq_io_context *cic)
 
 	BUG_ON(!(dead_key & CIC_DEAD_KEY));
 	radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
-	hlist_del_rcu(&cic->cic_list);
+	hlist_del(&cic->cic_list);
 	cfq_cic_free(cic);
 }
 
@@ -2782,7 +2760,6 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 		INIT_HLIST_NODE(&cic->cic_list);
 		cic->exit = cfq_exit_cic;
 		cic->release = cfq_release_cic;
-		elv_ioc_count_inc(cfq_ioc_count);
 	}
 
 	return cic;
@@ -3072,7 +3049,7 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 	ret = radix_tree_insert(&ioc->radix_root, q->id, cic);
 	if (likely(!ret)) {
-		hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
+		hlist_add_head(&cic->cic_list, &ioc->cic_list);
 		list_add(&cic->queue_list, &cfqd->cic_list);
 		cic = NULL;
 	} else if (ret == -EEXIST) {
@@ -4156,19 +4133,9 @@ static int __init cfq_init(void)
 
 static void __exit cfq_exit(void)
 {
-	DECLARE_COMPLETION_ONSTACK(all_gone);
 	blkio_policy_unregister(&blkio_policy_cfq);
 	elv_unregister(&iosched_cfq);
-	ioc_gone = &all_gone;
-	/* ioc_gone's update must be visible before reading ioc_count */
-	smp_wmb();
-
-	/*
-	 * this also protects us from entering cfq_slab_kill() with
-	 * pending RCU callbacks
-	 */
-	if (elv_ioc_count_read(cfq_ioc_count))
-		wait_for_completion(&all_gone);
+	rcu_barrier();	/* make sure all cic RCU frees are complete */
 	cfq_slab_kill();
 }
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 581dd1bd3d3e..02604c89ddde 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -196,22 +196,5 @@ enum {
 	INIT_LIST_HEAD(&(rq)->csd.list);	\
 	} while (0)
 
-/*
- * io context count accounting
- */
-#define elv_ioc_count_mod(name, __val) this_cpu_add(name, __val)
-#define elv_ioc_count_inc(name)	this_cpu_inc(name)
-#define elv_ioc_count_dec(name)	this_cpu_dec(name)
-
-#define elv_ioc_count_read(name)				\
-({								\
-	unsigned long __val = 0;				\
-	int __cpu;						\
-	smp_wmb();						\
-	for_each_possible_cpu(__cpu)				\
-		__val += per_cpu(name, __cpu);			\
-	__val;							\
-})
-
 #endif /* CONFIG_BLOCK */
 #endif
-- 
cgit v1.2.3


From 1238033c79e92e5c315af12e45396f1a78c73dec Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:40 +0100
Subject: block, cfq: kill cic->key

Now that lazy paths are removed, cfqd_dead_key() is meaningless and
cic->q can be used whereever cic->key is used.  Kill cic->key.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c       | 26 +++++---------------------
 include/linux/iocontext.h |  1 -
 2 files changed, 5 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ae7791a8ded9..3b07ce168780 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -472,22 +472,9 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
 	cic->cfqq[is_sync] = cfqq;
 }
 
-#define CIC_DEAD_KEY	1ul
-#define CIC_DEAD_INDEX_SHIFT	1
-
-static inline void *cfqd_dead_key(struct cfq_data *cfqd)
-{
-	return (void *)(cfqd->queue->id << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
-}
-
 static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
 {
-	struct cfq_data *cfqd = cic->key;
-
-	if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
-		return NULL;
-
-	return cfqd;
+	return cic->q->elevator->elevator_data;
 }
 
 /*
@@ -2679,10 +2666,8 @@ static void cfq_cic_free(struct cfq_io_context *cic)
 static void cfq_release_cic(struct cfq_io_context *cic)
 {
 	struct io_context *ioc = cic->ioc;
-	unsigned long dead_key = (unsigned long) cic->key;
 
-	BUG_ON(!(dead_key & CIC_DEAD_KEY));
-	radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
+	radix_tree_delete(&ioc->radix_root, cic->q->id);
 	hlist_del(&cic->cic_list);
 	cfq_cic_free(cic);
 }
@@ -2726,7 +2711,6 @@ static void cfq_exit_cic(struct cfq_io_context *cic)
 	struct io_context *ioc = cic->ioc;
 
 	list_del_init(&cic->queue_list);
-	cic->key = cfqd_dead_key(cfqd);
 
 	/*
 	 * Both setting lookup hint to and clearing it from @cic are done
@@ -2982,6 +2966,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 static struct cfq_io_context *
 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 {
+	struct request_queue *q = cfqd->queue;
 	struct cfq_io_context *cic;
 
 	lockdep_assert_held(cfqd->queue->queue_lock);
@@ -2996,11 +2981,11 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 	 */
 	rcu_read_lock();
 	cic = rcu_dereference(ioc->ioc_data);
-	if (cic && cic->key == cfqd)
+	if (cic && cic->q == q)
 		goto out;
 
 	cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
-	if (cic && cic->key == cfqd)
+	if (cic && cic->q == q)
 		rcu_assign_pointer(ioc->ioc_data, cic);	/* allowed to race */
 	else
 		cic = NULL;
@@ -3040,7 +3025,6 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 		goto out;
 
 	cic->ioc = ioc;
-	cic->key = cfqd;
 	cic->q = cfqd->queue;
 
 	/* lock both q and ioc and try to link @cic */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 01e863128780..b2b75a54f252 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -20,7 +20,6 @@ enum {
 };
 
 struct cfq_io_context {
-	void *key;
 	struct request_queue *q;
 
 	struct cfq_queue *cfqq[2];
-- 
cgit v1.2.3


From 22f746e235a5cbee2a6ca9887b1be2aa7d31fe71 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:41 +0100
Subject: block: remove elevator_queue->ops

elevator_queue->ops points to the same ops struct ->elevator_type.ops
is pointing to.  The only effect of caching it in elevator_queue is
shorter notation - it doesn't save any indirect derefence.

Relocate elevator_type->list which used only during module init/exit
to the end of the structure, rename elevator_queue->elevator_type to
->type, and replace elevator_queue->ops with elevator_queue->type.ops.

This doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h              | 10 +++----
 block/elevator.c         | 74 +++++++++++++++++++++++-------------------------
 include/linux/elevator.h |  5 ++--
 3 files changed, 43 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index 5bca2668e1bf..4943770e0792 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -94,7 +94,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			return NULL;
 		}
 		if (unlikely(blk_queue_dead(q)) ||
-		    !q->elevator->ops->elevator_dispatch_fn(q, 0))
+		    !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
 }
@@ -103,16 +103,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_activate_req_fn)
-		e->ops->elevator_activate_req_fn(q, rq);
+	if (e->type->ops.elevator_activate_req_fn)
+		e->type->ops.elevator_activate_req_fn(q, rq);
 }
 
 static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_deactivate_req_fn)
-		e->ops->elevator_deactivate_req_fn(q, rq);
+	if (e->type->ops.elevator_deactivate_req_fn)
+		e->type->ops.elevator_deactivate_req_fn(q, rq);
 }
 
 #ifdef CONFIG_FAIL_IO_TIMEOUT
diff --git a/block/elevator.c b/block/elevator.c
index a16c2d1713e5..31ffe76aed3d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -61,8 +61,8 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 	struct request_queue *q = rq->q;
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_allow_merge_fn)
-		return e->ops->elevator_allow_merge_fn(q, rq, bio);
+	if (e->type->ops.elevator_allow_merge_fn)
+		return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
 
 	return 1;
 }
@@ -171,7 +171,7 @@ static struct elevator_type *elevator_get(const char *name)
 static int elevator_init_queue(struct request_queue *q,
 			       struct elevator_queue *eq)
 {
-	eq->elevator_data = eq->ops->elevator_init_fn(q);
+	eq->elevator_data = eq->type->ops.elevator_init_fn(q);
 	if (eq->elevator_data)
 		return 0;
 	return -ENOMEM;
@@ -203,8 +203,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q,
 	if (unlikely(!eq))
 		goto err;
 
-	eq->ops = &e->ops;
-	eq->elevator_type = e;
+	eq->type = e;
 	kobject_init(&eq->kobj, &elv_ktype);
 	mutex_init(&eq->sysfs_lock);
 
@@ -228,7 +227,7 @@ static void elevator_release(struct kobject *kobj)
 	struct elevator_queue *e;
 
 	e = container_of(kobj, struct elevator_queue, kobj);
-	elevator_put(e->elevator_type);
+	elevator_put(e->type);
 	kfree(e->hash);
 	kfree(e);
 }
@@ -288,9 +287,8 @@ EXPORT_SYMBOL(elevator_init);
 void elevator_exit(struct elevator_queue *e)
 {
 	mutex_lock(&e->sysfs_lock);
-	if (e->ops->elevator_exit_fn)
-		e->ops->elevator_exit_fn(e);
-	e->ops = NULL;
+	if (e->type->ops.elevator_exit_fn)
+		e->type->ops.elevator_exit_fn(e);
 	mutex_unlock(&e->sysfs_lock);
 
 	kobject_put(&e->kobj);
@@ -500,8 +498,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 		return ELEVATOR_BACK_MERGE;
 	}
 
-	if (e->ops->elevator_merge_fn)
-		return e->ops->elevator_merge_fn(q, req, bio);
+	if (e->type->ops.elevator_merge_fn)
+		return e->type->ops.elevator_merge_fn(q, req, bio);
 
 	return ELEVATOR_NO_MERGE;
 }
@@ -544,8 +542,8 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_merged_fn)
-		e->ops->elevator_merged_fn(q, rq, type);
+	if (e->type->ops.elevator_merged_fn)
+		e->type->ops.elevator_merged_fn(q, rq, type);
 
 	if (type == ELEVATOR_BACK_MERGE)
 		elv_rqhash_reposition(q, rq);
@@ -559,8 +557,8 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
 	struct elevator_queue *e = q->elevator;
 	const int next_sorted = next->cmd_flags & REQ_SORTED;
 
-	if (next_sorted && e->ops->elevator_merge_req_fn)
-		e->ops->elevator_merge_req_fn(q, rq, next);
+	if (next_sorted && e->type->ops.elevator_merge_req_fn)
+		e->type->ops.elevator_merge_req_fn(q, rq, next);
 
 	elv_rqhash_reposition(q, rq);
 
@@ -577,8 +575,8 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_bio_merged_fn)
-		e->ops->elevator_bio_merged_fn(q, rq, bio);
+	if (e->type->ops.elevator_bio_merged_fn)
+		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
 }
 
 void elv_requeue_request(struct request_queue *q, struct request *rq)
@@ -604,12 +602,12 @@ void elv_drain_elevator(struct request_queue *q)
 
 	lockdep_assert_held(q->queue_lock);
 
-	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+	while (q->elevator->type->ops.elevator_dispatch_fn(q, 1))
 		;
 	if (q->nr_sorted && printed++ < 10) {
 		printk(KERN_ERR "%s: forced dispatching is broken "
 		       "(nr_sorted=%u), please report this\n",
-		       q->elevator->elevator_type->elevator_name, q->nr_sorted);
+		       q->elevator->type->elevator_name, q->nr_sorted);
 	}
 }
 
@@ -698,7 +696,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		 * rq cannot be accessed after calling
 		 * elevator_add_req_fn.
 		 */
-		q->elevator->ops->elevator_add_req_fn(q, rq);
+		q->elevator->type->ops.elevator_add_req_fn(q, rq);
 		break;
 
 	case ELEVATOR_INSERT_FLUSH:
@@ -727,8 +725,8 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_latter_req_fn)
-		return e->ops->elevator_latter_req_fn(q, rq);
+	if (e->type->ops.elevator_latter_req_fn)
+		return e->type->ops.elevator_latter_req_fn(q, rq);
 	return NULL;
 }
 
@@ -736,8 +734,8 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_former_req_fn)
-		return e->ops->elevator_former_req_fn(q, rq);
+	if (e->type->ops.elevator_former_req_fn)
+		return e->type->ops.elevator_former_req_fn(q, rq);
 	return NULL;
 }
 
@@ -745,8 +743,8 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_set_req_fn)
-		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
+	if (e->type->ops.elevator_set_req_fn)
+		return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
 
 	rq->elevator_private[0] = NULL;
 	return 0;
@@ -756,16 +754,16 @@ void elv_put_request(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_put_req_fn)
-		e->ops->elevator_put_req_fn(rq);
+	if (e->type->ops.elevator_put_req_fn)
+		e->type->ops.elevator_put_req_fn(rq);
 }
 
 int elv_may_queue(struct request_queue *q, int rw)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_may_queue_fn)
-		return e->ops->elevator_may_queue_fn(q, rw);
+	if (e->type->ops.elevator_may_queue_fn)
+		return e->type->ops.elevator_may_queue_fn(q, rw);
 
 	return ELV_MQUEUE_MAY;
 }
@@ -800,8 +798,8 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]--;
 		if ((rq->cmd_flags & REQ_SORTED) &&
-		    e->ops->elevator_completed_req_fn)
-			e->ops->elevator_completed_req_fn(q, rq);
+		    e->type->ops.elevator_completed_req_fn)
+			e->type->ops.elevator_completed_req_fn(q, rq);
 	}
 }
 
@@ -819,7 +817,7 @@ elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 
 	e = container_of(kobj, struct elevator_queue, kobj);
 	mutex_lock(&e->sysfs_lock);
-	error = e->ops ? entry->show(e, page) : -ENOENT;
+	error = e->type ? entry->show(e, page) : -ENOENT;
 	mutex_unlock(&e->sysfs_lock);
 	return error;
 }
@@ -837,7 +835,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr,
 
 	e = container_of(kobj, struct elevator_queue, kobj);
 	mutex_lock(&e->sysfs_lock);
-	error = e->ops ? entry->store(e, page, length) : -ENOENT;
+	error = e->type ? entry->store(e, page, length) : -ENOENT;
 	mutex_unlock(&e->sysfs_lock);
 	return error;
 }
@@ -858,7 +856,7 @@ int __elv_register_queue(struct request_queue *q, struct elevator_queue *e)
 
 	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
 	if (!error) {
-		struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
+		struct elv_fs_entry *attr = e->type->elevator_attrs;
 		if (attr) {
 			while (attr->attr.name) {
 				if (sysfs_create_file(&e->kobj, &attr->attr))
@@ -959,7 +957,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	elevator_exit(old_elevator);
 	elv_quiesce_end(q);
 
-	blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
+	blk_add_trace_msg(q, "elv switch: %s", e->type->elevator_name);
 
 	return 0;
 
@@ -993,7 +991,7 @@ int elevator_change(struct request_queue *q, const char *name)
 		return -EINVAL;
 	}
 
-	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
+	if (!strcmp(elevator_name, q->elevator->type->elevator_name)) {
 		elevator_put(e);
 		return 0;
 	}
@@ -1028,7 +1026,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
 	if (!q->elevator || !blk_queue_stackable(q))
 		return sprintf(name, "none\n");
 
-	elv = e->elevator_type;
+	elv = e->type;
 
 	spin_lock(&elv_list_lock);
 	list_for_each_entry(__e, &elv_list, list) {
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 02604c89ddde..04958ef53e62 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -78,11 +78,11 @@ struct elv_fs_entry {
  */
 struct elevator_type
 {
-	struct list_head list;
 	struct elevator_ops ops;
 	struct elv_fs_entry *elevator_attrs;
 	char elevator_name[ELV_NAME_MAX];
 	struct module *elevator_owner;
+	struct list_head list;
 };
 
 /*
@@ -90,10 +90,9 @@ struct elevator_type
  */
 struct elevator_queue
 {
-	struct elevator_ops *ops;
+	struct elevator_type *type;
 	void *elevator_data;
 	struct kobject kobj;
-	struct elevator_type *elevator_type;
 	struct mutex sysfs_lock;
 	struct hlist_head *hash;
 	unsigned int registered:1;
-- 
cgit v1.2.3


From c58698073218f2c8f2fc5982fa3938c2d3803b9f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:41 +0100
Subject: block, cfq: reorganize cfq_io_context into generic and cfq specific
 parts

Currently io_context and cfq logics are mixed without clear boundary.
Most of io_context is independent from cfq but cfq_io_context handling
logic is dispersed between generic ioc code and cfq.

cfq_io_context represents association between an io_context and a
request_queue, which is a concept useful outside of cfq, but it also
contains fields which are useful only to cfq.

This patch takes out generic part and put it into io_cq (io
context-queue) and the rest into cfq_io_cq (cic moniker remains the
same) which contains io_cq.  The following changes are made together.

* cfq_ttime and cfq_io_cq now live in cfq-iosched.c.

* All related fields, functions and constants are renamed accordingly.

* ioc->ioc_data is now "struct io_cq *" instead of "void *" and
  renamed to icq_hint.

This prepares for io_context API cleanup.  Documentation is currently
sparse.  It will be added later.

Changes in this patch are mechanical and don't cause functional
change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ioc.c           |  58 ++++++-----
 block/cfq-iosched.c       | 248 +++++++++++++++++++++++++---------------------
 include/linux/iocontext.h |  43 +++-----
 3 files changed, 175 insertions(+), 174 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index e23c797b4685..dc5e69d335a0 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL(get_io_context);
 
 /*
  * Slow path for ioc release in put_io_context().  Performs double-lock
- * dancing to unlink all cic's and then frees ioc.
+ * dancing to unlink all icq's and then frees ioc.
  */
 static void ioc_release_fn(struct work_struct *work)
 {
@@ -56,11 +56,10 @@ static void ioc_release_fn(struct work_struct *work)
 
 	spin_lock_irq(&ioc->lock);
 
-	while (!hlist_empty(&ioc->cic_list)) {
-		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
-							 struct cfq_io_context,
-							 cic_list);
-		struct request_queue *this_q = cic->q;
+	while (!hlist_empty(&ioc->icq_list)) {
+		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
+						struct io_cq, ioc_node);
+		struct request_queue *this_q = icq->q;
 
 		if (this_q != last_q) {
 			/*
@@ -89,8 +88,8 @@ static void ioc_release_fn(struct work_struct *work)
 			continue;
 		}
 		ioc_release_depth_inc(this_q);
-		cic->exit(cic);
-		cic->release(cic);
+		icq->exit(icq);
+		icq->release(icq);
 		ioc_release_depth_dec(this_q);
 	}
 
@@ -131,10 +130,10 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 		return;
 
 	/*
-	 * Destroy @ioc.  This is a bit messy because cic's are chained
+	 * Destroy @ioc.  This is a bit messy because icq's are chained
 	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
-	 * The inner ioc->lock should be held to walk our cic_list and then
-	 * for each cic the outer matching queue_lock should be grabbed.
+	 * The inner ioc->lock should be held to walk our icq_list and then
+	 * for each icq the outer matching queue_lock should be grabbed.
 	 * ie. We need to do reverse-order double lock dancing.
 	 *
 	 * Another twist is that we are often called with one of the
@@ -153,11 +152,10 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 	spin_lock_irqsave_nested(&ioc->lock, flags,
 				 ioc_release_depth(locked_q));
 
-	while (!hlist_empty(&ioc->cic_list)) {
-		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
-							 struct cfq_io_context,
-							 cic_list);
-		struct request_queue *this_q = cic->q;
+	while (!hlist_empty(&ioc->icq_list)) {
+		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
+						struct io_cq, ioc_node);
+		struct request_queue *this_q = icq->q;
 
 		if (this_q != last_q) {
 			if (last_q && last_q != locked_q)
@@ -170,8 +168,8 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 			continue;
 		}
 		ioc_release_depth_inc(this_q);
-		cic->exit(cic);
-		cic->release(cic);
+		icq->exit(icq);
+		icq->release(icq);
 		ioc_release_depth_dec(this_q);
 	}
 
@@ -180,8 +178,8 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
-	/* if no cic's left, we're done; otherwise, kick release_work */
-	if (hlist_empty(&ioc->cic_list))
+	/* if no icq is left, we're done; otherwise, kick release_work */
+	if (hlist_empty(&ioc->icq_list))
 		kmem_cache_free(iocontext_cachep, ioc);
 	else
 		schedule_work(&ioc->release_work);
@@ -219,8 +217,8 @@ void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
 	atomic_long_set(&ioc->refcount, 1);
 	atomic_set(&ioc->nr_tasks, 1);
 	spin_lock_init(&ioc->lock);
-	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
-	INIT_HLIST_HEAD(&ioc->cic_list);
+	INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
+	INIT_HLIST_HEAD(&ioc->icq_list);
 	INIT_WORK(&ioc->release_work, ioc_release_fn);
 
 	/* try to install, somebody might already have beaten us to it */
@@ -270,11 +268,11 @@ EXPORT_SYMBOL(get_task_io_context);
 
 void ioc_set_changed(struct io_context *ioc, int which)
 {
-	struct cfq_io_context *cic;
+	struct io_cq *icq;
 	struct hlist_node *n;
 
-	hlist_for_each_entry(cic, n, &ioc->cic_list, cic_list)
-		set_bit(which, &cic->changed);
+	hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node)
+		set_bit(which, &icq->changed);
 }
 
 /**
@@ -282,8 +280,8 @@ void ioc_set_changed(struct io_context *ioc, int which)
  * @ioc: io_context of interest
  * @ioprio: new ioprio
  *
- * @ioc's ioprio has changed to @ioprio.  Set %CIC_IOPRIO_CHANGED for all
- * cic's.  iosched is responsible for checking the bit and applying it on
+ * @ioc's ioprio has changed to @ioprio.  Set %ICQ_IOPRIO_CHANGED for all
+ * icq's.  iosched is responsible for checking the bit and applying it on
  * request issue path.
  */
 void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
@@ -292,7 +290,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
 
 	spin_lock_irqsave(&ioc->lock, flags);
 	ioc->ioprio = ioprio;
-	ioc_set_changed(ioc, CIC_IOPRIO_CHANGED);
+	ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 }
 
@@ -300,7 +298,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
  * ioc_cgroup_changed - notify cgroup change
  * @ioc: io_context of interest
  *
- * @ioc's cgroup has changed.  Set %CIC_CGROUP_CHANGED for all cic's.
+ * @ioc's cgroup has changed.  Set %ICQ_CGROUP_CHANGED for all icq's.
  * iosched is responsible for checking the bit and applying it on request
  * issue path.
  */
@@ -309,7 +307,7 @@ void ioc_cgroup_changed(struct io_context *ioc)
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioc->lock, flags);
-	ioc_set_changed(ioc, CIC_CGROUP_CHANGED);
+	ioc_set_changed(ioc, ICQ_CGROUP_CHANGED);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5f7e4d161404..d2f16fcdec7f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -54,13 +54,12 @@ static const int cfq_hist_divisor = 4;
 #define CFQQ_SECT_THR_NONROT	(sector_t)(2 * 32)
 #define CFQQ_SEEKY(cfqq)	(hweight32(cfqq->seek_history) > 32/8)
 
-#define RQ_CIC(rq)		\
-	((struct cfq_io_context *) (rq)->elevator_private[0])
+#define RQ_CIC(rq)		icq_to_cic((rq)->elevator_private[0])
 #define RQ_CFQQ(rq)		(struct cfq_queue *) ((rq)->elevator_private[1])
 #define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elevator_private[2])
 
 static struct kmem_cache *cfq_pool;
-static struct kmem_cache *cfq_ioc_pool;
+static struct kmem_cache *cfq_icq_pool;
 
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -69,6 +68,14 @@ static struct kmem_cache *cfq_ioc_pool;
 #define sample_valid(samples)	((samples) > 80)
 #define rb_entry_cfqg(node)	rb_entry((node), struct cfq_group, rb_node)
 
+struct cfq_ttime {
+	unsigned long last_end_request;
+
+	unsigned long ttime_total;
+	unsigned long ttime_samples;
+	unsigned long ttime_mean;
+};
+
 /*
  * Most of our rbtree usage is for sorting with min extraction, so
  * if we cache the leftmost node we don't have to walk down the tree
@@ -210,6 +217,12 @@ struct cfq_group {
 	struct cfq_ttime ttime;
 };
 
+struct cfq_io_cq {
+	struct io_cq		icq;		/* must be the first member */
+	struct cfq_queue	*cfqq[2];
+	struct cfq_ttime	ttime;
+};
+
 /*
  * Per block device queue structure
  */
@@ -261,7 +274,7 @@ struct cfq_data {
 	struct work_struct unplug_work;
 
 	struct cfq_queue *active_queue;
-	struct cfq_io_context *active_cic;
+	struct cfq_io_cq *active_cic;
 
 	/*
 	 * async queue for each priority case
@@ -284,7 +297,7 @@ struct cfq_data {
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
 
-	struct list_head cic_list;
+	struct list_head icq_list;
 
 	/*
 	 * Fallback dummy cfqq for extreme OOM conditions
@@ -457,24 +470,28 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
 				       struct io_context *, gfp_t);
-static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
-						struct io_context *);
+static struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *, struct io_context *);
 
-static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
-					    bool is_sync)
+static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
+{
+	/* cic->icq is the first member, %NULL will convert to %NULL */
+	return container_of(icq, struct cfq_io_cq, icq);
+}
+
+static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync)
 {
 	return cic->cfqq[is_sync];
 }
 
-static inline void cic_set_cfqq(struct cfq_io_context *cic,
-				struct cfq_queue *cfqq, bool is_sync)
+static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq,
+				bool is_sync)
 {
 	cic->cfqq[is_sync] = cfqq;
 }
 
-static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
+static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
 {
-	return cic->q->elevator->elevator_data;
+	return cic->icq.q->elevator->elevator_data;
 }
 
 /*
@@ -1541,7 +1558,7 @@ static struct request *
 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
 {
 	struct task_struct *tsk = current;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	struct cfq_queue *cfqq;
 
 	cic = cfq_cic_lookup(cfqd, tsk->io_context);
@@ -1655,7 +1672,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 			   struct bio *bio)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	struct cfq_queue *cfqq;
 
 	/*
@@ -1671,7 +1688,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 	 * and %current are guaranteed to be equal.  Avoid lookup which
 	 * requires queue_lock by using @rq's cic.
 	 */
-	if (current->io_context == RQ_CIC(rq)->ioc) {
+	if (current->io_context == RQ_CIC(rq)->icq.ioc) {
 		cic = RQ_CIC(rq);
 	} else {
 		cic = cfq_cic_lookup(cfqd, current->io_context);
@@ -1761,7 +1778,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		cfqd->active_queue = NULL;
 
 	if (cfqd->active_cic) {
-		put_io_context(cfqd->active_cic->ioc, cfqd->queue);
+		put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
 		cfqd->active_cic = NULL;
 	}
 }
@@ -1981,7 +1998,7 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 {
 	struct cfq_queue *cfqq = cfqd->active_queue;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	unsigned long sl, group_idle = 0;
 
 	/*
@@ -2016,7 +2033,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	 * task has exited, don't wait
 	 */
 	cic = cfqd->active_cic;
-	if (!cic || !atomic_read(&cic->ioc->nr_tasks))
+	if (!cic || !atomic_read(&cic->icq.ioc->nr_tasks))
 		return;
 
 	/*
@@ -2567,9 +2584,9 @@ static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_dispatch_insert(cfqd->queue, rq);
 
 	if (!cfqd->active_cic) {
-		struct cfq_io_context *cic = RQ_CIC(rq);
+		struct cfq_io_cq *cic = RQ_CIC(rq);
 
-		atomic_long_inc(&cic->ioc->refcount);
+		atomic_long_inc(&cic->icq.ioc->refcount);
 		cfqd->active_cic = cic;
 	}
 
@@ -2652,24 +2669,24 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	cfq_put_cfqg(cfqg);
 }
 
-static void cfq_cic_free_rcu(struct rcu_head *head)
+static void cfq_icq_free_rcu(struct rcu_head *head)
 {
-	kmem_cache_free(cfq_ioc_pool,
-			container_of(head, struct cfq_io_context, rcu_head));
+	kmem_cache_free(cfq_icq_pool,
+			icq_to_cic(container_of(head, struct io_cq, rcu_head)));
 }
 
-static void cfq_cic_free(struct cfq_io_context *cic)
+static void cfq_icq_free(struct io_cq *icq)
 {
-	call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
+	call_rcu(&icq->rcu_head, cfq_icq_free_rcu);
 }
 
-static void cfq_release_cic(struct cfq_io_context *cic)
+static void cfq_release_icq(struct io_cq *icq)
 {
-	struct io_context *ioc = cic->ioc;
+	struct io_context *ioc = icq->ioc;
 
-	radix_tree_delete(&ioc->radix_root, cic->q->id);
-	hlist_del(&cic->cic_list);
-	cfq_cic_free(cic);
+	radix_tree_delete(&ioc->icq_tree, icq->q->id);
+	hlist_del(&icq->ioc_node);
+	cfq_icq_free(icq);
 }
 
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -2705,20 +2722,21 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_put_queue(cfqq);
 }
 
-static void cfq_exit_cic(struct cfq_io_context *cic)
+static void cfq_exit_icq(struct io_cq *icq)
 {
+	struct cfq_io_cq *cic = icq_to_cic(icq);
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
-	struct io_context *ioc = cic->ioc;
+	struct io_context *ioc = icq->ioc;
 
-	list_del_init(&cic->queue_list);
+	list_del_init(&icq->q_node);
 
 	/*
-	 * Both setting lookup hint to and clearing it from @cic are done
-	 * under queue_lock.  If it's not pointing to @cic now, it never
+	 * Both setting lookup hint to and clearing it from @icq are done
+	 * under queue_lock.  If it's not pointing to @icq now, it never
 	 * will.  Hint assignment itself can race safely.
 	 */
-	if (rcu_dereference_raw(ioc->ioc_data) == cic)
-		rcu_assign_pointer(ioc->ioc_data, NULL);
+	if (rcu_dereference_raw(ioc->icq_hint) == icq)
+		rcu_assign_pointer(ioc->icq_hint, NULL);
 
 	if (cic->cfqq[BLK_RW_ASYNC]) {
 		cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
@@ -2731,19 +2749,18 @@ static void cfq_exit_cic(struct cfq_io_context *cic)
 	}
 }
 
-static struct cfq_io_context *
-cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
+static struct cfq_io_cq *cfq_alloc_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 
-	cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
+	cic = kmem_cache_alloc_node(cfq_icq_pool, gfp_mask | __GFP_ZERO,
 							cfqd->queue->node);
 	if (cic) {
 		cic->ttime.last_end_request = jiffies;
-		INIT_LIST_HEAD(&cic->queue_list);
-		INIT_HLIST_NODE(&cic->cic_list);
-		cic->exit = cfq_exit_cic;
-		cic->release = cfq_release_cic;
+		INIT_LIST_HEAD(&cic->icq.q_node);
+		INIT_HLIST_NODE(&cic->icq.ioc_node);
+		cic->icq.exit = cfq_exit_icq;
+		cic->icq.release = cfq_release_icq;
 	}
 
 	return cic;
@@ -2791,7 +2808,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 	cfq_clear_cfqq_prio_changed(cfqq);
 }
 
-static void changed_ioprio(struct cfq_io_context *cic)
+static void changed_ioprio(struct cfq_io_cq *cic)
 {
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	struct cfq_queue *cfqq;
@@ -2802,7 +2819,7 @@ static void changed_ioprio(struct cfq_io_context *cic)
 	cfqq = cic->cfqq[BLK_RW_ASYNC];
 	if (cfqq) {
 		struct cfq_queue *new_cfqq;
-		new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
+		new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->icq.ioc,
 						GFP_ATOMIC);
 		if (new_cfqq) {
 			cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
@@ -2836,7 +2853,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void changed_cgroup(struct cfq_io_context *cic)
+static void changed_cgroup(struct cfq_io_cq *cic)
 {
 	struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
@@ -2864,7 +2881,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
 		     struct io_context *ioc, gfp_t gfp_mask)
 {
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	struct cfq_group *cfqg;
 
 retry:
@@ -2956,56 +2973,57 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 }
 
 /**
- * cfq_cic_lookup - lookup cfq_io_context
+ * cfq_cic_lookup - lookup cfq_io_cq
  * @cfqd: the associated cfq_data
  * @ioc: the associated io_context
  *
- * Look up cfq_io_context associated with @cfqd - @ioc pair.  Must be
- * called with queue_lock held.
+ * Look up cfq_io_cq associated with @cfqd - @ioc pair.  Must be called
+ * with queue_lock held.
  */
-static struct cfq_io_context *
+static struct cfq_io_cq *
 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 {
 	struct request_queue *q = cfqd->queue;
-	struct cfq_io_context *cic;
+	struct io_cq *icq;
 
 	lockdep_assert_held(cfqd->queue->queue_lock);
 	if (unlikely(!ioc))
 		return NULL;
 
 	/*
-	 * cic's are indexed from @ioc using radix tree and hint pointer,
+	 * icq's are indexed from @ioc using radix tree and hint pointer,
 	 * both of which are protected with RCU.  All removals are done
 	 * holding both q and ioc locks, and we're holding q lock - if we
-	 * find a cic which points to us, it's guaranteed to be valid.
+	 * find a icq which points to us, it's guaranteed to be valid.
 	 */
 	rcu_read_lock();
-	cic = rcu_dereference(ioc->ioc_data);
-	if (cic && cic->q == q)
+	icq = rcu_dereference(ioc->icq_hint);
+	if (icq && icq->q == q)
 		goto out;
 
-	cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
-	if (cic && cic->q == q)
-		rcu_assign_pointer(ioc->ioc_data, cic);	/* allowed to race */
+	icq = radix_tree_lookup(&ioc->icq_tree, cfqd->queue->id);
+	if (icq && icq->q == q)
+		rcu_assign_pointer(ioc->icq_hint, icq);	/* allowed to race */
 	else
-		cic = NULL;
+		icq = NULL;
 out:
 	rcu_read_unlock();
-	return cic;
+	return icq_to_cic(icq);
 }
 
 /**
- * cfq_create_cic - create and link a cfq_io_context
+ * cfq_create_cic - create and link a cfq_io_cq
  * @cfqd: cfqd of interest
  * @gfp_mask: allocation mask
  *
- * Make sure cfq_io_context linking %current->io_context and @cfqd exists.
- * If ioc and/or cic doesn't exist, they will be created using @gfp_mask.
+ * Make sure cfq_io_cq linking %current->io_context and @cfqd exists.  If
+ * ioc and/or cic doesn't exist, they will be created using @gfp_mask.
  */
 static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct request_queue *q = cfqd->queue;
-	struct cfq_io_context *cic = NULL;
+	struct io_cq *icq = NULL;
+	struct cfq_io_cq *cic;
 	struct io_context *ioc;
 	int ret = -ENOMEM;
 
@@ -3016,26 +3034,27 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 	if (!ioc)
 		goto out;
 
-	cic = cfq_alloc_io_context(cfqd, gfp_mask);
+	cic = cfq_alloc_cic(cfqd, gfp_mask);
 	if (!cic)
 		goto out;
+	icq = &cic->icq;
 
 	ret = radix_tree_preload(gfp_mask);
 	if (ret)
 		goto out;
 
-	cic->ioc = ioc;
-	cic->q = cfqd->queue;
+	icq->ioc = ioc;
+	icq->q = cfqd->queue;
 
-	/* lock both q and ioc and try to link @cic */
+	/* lock both q and ioc and try to link @icq */
 	spin_lock_irq(q->queue_lock);
 	spin_lock(&ioc->lock);
 
-	ret = radix_tree_insert(&ioc->radix_root, q->id, cic);
+	ret = radix_tree_insert(&ioc->icq_tree, q->id, icq);
 	if (likely(!ret)) {
-		hlist_add_head(&cic->cic_list, &ioc->cic_list);
-		list_add(&cic->queue_list, &cfqd->cic_list);
-		cic = NULL;
+		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
+		list_add(&icq->q_node, &cfqd->icq_list);
+		icq = NULL;
 	} else if (ret == -EEXIST) {
 		/* someone else already did it */
 		ret = 0;
@@ -3047,29 +3066,28 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 	radix_tree_preload_end();
 out:
 	if (ret)
-		printk(KERN_ERR "cfq: cic link failed!\n");
-	if (cic)
-		cfq_cic_free(cic);
+		printk(KERN_ERR "cfq: icq link failed!\n");
+	if (icq)
+		cfq_icq_free(icq);
 	return ret;
 }
 
 /**
- * cfq_get_io_context - acquire cfq_io_context and bump refcnt on io_context
+ * cfq_get_cic - acquire cfq_io_cq and bump refcnt on io_context
  * @cfqd: cfqd to setup cic for
  * @gfp_mask: allocation mask
  *
- * Return cfq_io_context associating @cfqd and %current->io_context and
+ * Return cfq_io_cq associating @cfqd and %current->io_context and
  * bump refcnt on io_context.  If ioc or cic doesn't exist, they're created
  * using @gfp_mask.
  *
  * Must be called under queue_lock which may be released and re-acquired.
  * This function also may sleep depending on @gfp_mask.
  */
-static struct cfq_io_context *
-cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
+static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct request_queue *q = cfqd->queue;
-	struct cfq_io_context *cic = NULL;
+	struct cfq_io_cq *cic = NULL;
 	struct io_context *ioc;
 	int err;
 
@@ -3095,11 +3113,11 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 	/* bump @ioc's refcnt and handle changed notifications */
 	get_io_context(ioc);
 
-	if (unlikely(cic->changed)) {
-		if (test_and_clear_bit(CIC_IOPRIO_CHANGED, &cic->changed))
+	if (unlikely(cic->icq.changed)) {
+		if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
 			changed_ioprio(cic);
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-		if (test_and_clear_bit(CIC_CGROUP_CHANGED, &cic->changed))
+		if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
 			changed_cgroup(cic);
 #endif
 	}
@@ -3120,7 +3138,7 @@ __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
 
 static void
 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-	struct cfq_io_context *cic)
+			struct cfq_io_cq *cic)
 {
 	if (cfq_cfqq_sync(cfqq)) {
 		__cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
@@ -3158,7 +3176,7 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
  */
 static void
 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-		       struct cfq_io_context *cic)
+		       struct cfq_io_cq *cic)
 {
 	int old_idle, enable_idle;
 
@@ -3175,8 +3193,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 
 	if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
 		enable_idle = 0;
-	else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
-	    (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
+	else if (!atomic_read(&cic->icq.ioc->nr_tasks) ||
+		 !cfqd->cfq_slice_idle ||
+		 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
 		enable_idle = 0;
 	else if (sample_valid(cic->ttime.ttime_samples)) {
 		if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
@@ -3308,7 +3327,7 @@ static void
 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		struct request *rq)
 {
-	struct cfq_io_context *cic = RQ_CIC(rq);
+	struct cfq_io_cq *cic = RQ_CIC(rq);
 
 	cfqd->rq_queued++;
 	if (rq->cmd_flags & REQ_PRIO)
@@ -3361,7 +3380,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
 	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
 	cfq_log_cfqq(cfqd, cfqq, "insert_request");
-	cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
+	cfq_init_prio_data(cfqq, RQ_CIC(rq)->icq.ioc);
 
 	rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
@@ -3411,7 +3430,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
 
 static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	struct cfq_io_context *cic = cfqd->active_cic;
+	struct cfq_io_cq *cic = cfqd->active_cic;
 
 	/* If the queue already has requests, don't wait */
 	if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -3548,7 +3567,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	struct cfq_queue *cfqq;
 
 	/*
@@ -3563,7 +3582,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
 
 	cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
 	if (cfqq) {
-		cfq_init_prio_data(cfqq, cic->ioc);
+		cfq_init_prio_data(cfqq, cic->icq.ioc);
 
 		return __cfq_may_queue(cfqq);
 	}
@@ -3584,7 +3603,7 @@ static void cfq_put_request(struct request *rq)
 		BUG_ON(!cfqq->allocated[rw]);
 		cfqq->allocated[rw]--;
 
-		put_io_context(RQ_CIC(rq)->ioc, cfqq->cfqd->queue);
+		put_io_context(RQ_CIC(rq)->icq.ioc, cfqq->cfqd->queue);
 
 		rq->elevator_private[0] = NULL;
 		rq->elevator_private[1] = NULL;
@@ -3598,7 +3617,7 @@ static void cfq_put_request(struct request *rq)
 }
 
 static struct cfq_queue *
-cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
+cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic,
 		struct cfq_queue *cfqq)
 {
 	cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
@@ -3613,7 +3632,7 @@ cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
  * was the last process referring to said cfqq.
  */
 static struct cfq_queue *
-split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
+split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
 {
 	if (cfqq_process_refs(cfqq) == 1) {
 		cfqq->pid = current->pid;
@@ -3636,7 +3655,7 @@ static int
 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	const int rw = rq_data_dir(rq);
 	const bool is_sync = rq_is_sync(rq);
 	struct cfq_queue *cfqq;
@@ -3644,14 +3663,14 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
 	spin_lock_irq(q->queue_lock);
-	cic = cfq_get_io_context(cfqd, gfp_mask);
+	cic = cfq_get_cic(cfqd, gfp_mask);
 	if (!cic)
 		goto queue_fail;
 
 new_queue:
 	cfqq = cic_to_cfqq(cic, is_sync);
 	if (!cfqq || cfqq == &cfqd->oom_cfqq) {
-		cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
+		cfqq = cfq_get_queue(cfqd, is_sync, cic->icq.ioc, gfp_mask);
 		cic_set_cfqq(cic, cfqq, is_sync);
 	} else {
 		/*
@@ -3677,7 +3696,7 @@ new_queue:
 	cfqq->allocated[rw]++;
 
 	cfqq->ref++;
-	rq->elevator_private[0] = cic;
+	rq->elevator_private[0] = &cic->icq;
 	rq->elevator_private[1] = cfqq;
 	rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
 	spin_unlock_irq(q->queue_lock);
@@ -3791,15 +3810,14 @@ static void cfq_exit_queue(struct elevator_queue *e)
 	if (cfqd->active_queue)
 		__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
-	while (!list_empty(&cfqd->cic_list)) {
-		struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
-							struct cfq_io_context,
-							queue_list);
-		struct io_context *ioc = cic->ioc;
+	while (!list_empty(&cfqd->icq_list)) {
+		struct io_cq *icq = list_entry(cfqd->icq_list.next,
+					       struct io_cq, q_node);
+		struct io_context *ioc = icq->ioc;
 
 		spin_lock(&ioc->lock);
-		cfq_exit_cic(cic);
-		cfq_release_cic(cic);
+		cfq_exit_icq(icq);
+		cfq_release_icq(icq);
 		spin_unlock(&ioc->lock);
 	}
 
@@ -3904,7 +3922,7 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->oom_cfqq.ref++;
 	cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
 
-	INIT_LIST_HEAD(&cfqd->cic_list);
+	INIT_LIST_HEAD(&cfqd->icq_list);
 
 	cfqd->queue = q;
 
@@ -3942,8 +3960,8 @@ static void cfq_slab_kill(void)
 	 */
 	if (cfq_pool)
 		kmem_cache_destroy(cfq_pool);
-	if (cfq_ioc_pool)
-		kmem_cache_destroy(cfq_ioc_pool);
+	if (cfq_icq_pool)
+		kmem_cache_destroy(cfq_icq_pool);
 }
 
 static int __init cfq_slab_setup(void)
@@ -3952,8 +3970,8 @@ static int __init cfq_slab_setup(void)
 	if (!cfq_pool)
 		goto fail;
 
-	cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0);
-	if (!cfq_ioc_pool)
+	cfq_icq_pool = KMEM_CACHE(cfq_io_cq, 0);
+	if (!cfq_icq_pool)
 		goto fail;
 
 	return 0;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index b2b75a54f252..d15ca6591f96 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -5,38 +5,23 @@
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
 
-struct cfq_queue;
-struct cfq_ttime {
-	unsigned long last_end_request;
-
-	unsigned long ttime_total;
-	unsigned long ttime_samples;
-	unsigned long ttime_mean;
-};
-
 enum {
-	CIC_IOPRIO_CHANGED,
-	CIC_CGROUP_CHANGED,
+	ICQ_IOPRIO_CHANGED,
+	ICQ_CGROUP_CHANGED,
 };
 
-struct cfq_io_context {
-	struct request_queue *q;
-
-	struct cfq_queue *cfqq[2];
-
-	struct io_context *ioc;
-
-	struct cfq_ttime ttime;
-
-	struct list_head queue_list;
-	struct hlist_node cic_list;
+struct io_cq {
+	struct request_queue	*q;
+	struct io_context	*ioc;
 
-	unsigned long changed;
+	struct list_head	q_node;
+	struct hlist_node	ioc_node;
 
-	void (*exit)(struct cfq_io_context *);
-	void (*release)(struct cfq_io_context *);
+	unsigned long		changed;
+	struct rcu_head		rcu_head;
 
-	struct rcu_head rcu_head;
+	void (*exit)(struct io_cq *);
+	void (*release)(struct io_cq *);
 };
 
 /*
@@ -58,9 +43,9 @@ struct io_context {
 	int nr_batch_requests;     /* Number of requests left in the batch */
 	unsigned long last_waited; /* Time last woken after wait for request */
 
-	struct radix_tree_root radix_root;
-	struct hlist_head cic_list;
-	void __rcu *ioc_data;
+	struct radix_tree_root	icq_tree;
+	struct io_cq __rcu	*icq_hint;
+	struct hlist_head	icq_list;
 
 	struct work_struct release_work;
 };
-- 
cgit v1.2.3


From a612fddf0d8090f2877305c9168b6c1a34fb5d90 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:41 +0100
Subject: block, cfq: move cfqd->icq_list to request_queue and add
 request->elv.icq

Most of icq management is about to be moved out of cfq into blk-ioc.
This patch prepares for it.

* Move cfqd->icq_list to request_queue->icq_list

* Make request explicitly point to icq instead of through elevator
  private data.  ->elevator_private[3] is replaced with sub struct elv
  which contains icq pointer and priv[2].  cfq is updated accordingly.

* Meaningless clearing of ->elevator_private[0] removed from
  elv_set_request().  At that point in code, the field was guaranteed
  to be %NULL anyway.

This patch doesn't introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |  1 +
 block/cfq-iosched.c    | 28 +++++++++++-----------------
 block/elevator.c       |  2 --
 include/linux/blkdev.h | 10 ++++++++--
 4 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 6804fdf27eff..3c26c7f48703 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -497,6 +497,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 		    laptop_mode_timer_fn, (unsigned long) q);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
+	INIT_LIST_HEAD(&q->icq_list);
 	INIT_LIST_HEAD(&q->flush_queue[0]);
 	INIT_LIST_HEAD(&q->flush_queue[1]);
 	INIT_LIST_HEAD(&q->flush_data_in_flight);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d2f16fcdec7f..9bc5ecc1b336 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -54,9 +54,9 @@ static const int cfq_hist_divisor = 4;
 #define CFQQ_SECT_THR_NONROT	(sector_t)(2 * 32)
 #define CFQQ_SEEKY(cfqq)	(hweight32(cfqq->seek_history) > 32/8)
 
-#define RQ_CIC(rq)		icq_to_cic((rq)->elevator_private[0])
-#define RQ_CFQQ(rq)		(struct cfq_queue *) ((rq)->elevator_private[1])
-#define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elevator_private[2])
+#define RQ_CIC(rq)		icq_to_cic((rq)->elv.icq)
+#define RQ_CFQQ(rq)		(struct cfq_queue *) ((rq)->elv.priv[0])
+#define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elv.priv[1])
 
 static struct kmem_cache *cfq_pool;
 static struct kmem_cache *cfq_icq_pool;
@@ -297,8 +297,6 @@ struct cfq_data {
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
 
-	struct list_head icq_list;
-
 	/*
 	 * Fallback dummy cfqq for extreme OOM conditions
 	 */
@@ -3053,7 +3051,7 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 	ret = radix_tree_insert(&ioc->icq_tree, q->id, icq);
 	if (likely(!ret)) {
 		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
-		list_add(&icq->q_node, &cfqd->icq_list);
+		list_add(&icq->q_node, &q->icq_list);
 		icq = NULL;
 	} else if (ret == -EEXIST) {
 		/* someone else already did it */
@@ -3605,12 +3603,10 @@ static void cfq_put_request(struct request *rq)
 
 		put_io_context(RQ_CIC(rq)->icq.ioc, cfqq->cfqd->queue);
 
-		rq->elevator_private[0] = NULL;
-		rq->elevator_private[1] = NULL;
-
 		/* Put down rq reference on cfqg */
 		cfq_put_cfqg(RQ_CFQG(rq));
-		rq->elevator_private[2] = NULL;
+		rq->elv.priv[0] = NULL;
+		rq->elv.priv[1] = NULL;
 
 		cfq_put_queue(cfqq);
 	}
@@ -3696,9 +3692,9 @@ new_queue:
 	cfqq->allocated[rw]++;
 
 	cfqq->ref++;
-	rq->elevator_private[0] = &cic->icq;
-	rq->elevator_private[1] = cfqq;
-	rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
+	rq->elv.icq = &cic->icq;
+	rq->elv.priv[0] = cfqq;
+	rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
 	spin_unlock_irq(q->queue_lock);
 	return 0;
 
@@ -3810,8 +3806,8 @@ static void cfq_exit_queue(struct elevator_queue *e)
 	if (cfqd->active_queue)
 		__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
-	while (!list_empty(&cfqd->icq_list)) {
-		struct io_cq *icq = list_entry(cfqd->icq_list.next,
+	while (!list_empty(&q->icq_list)) {
+		struct io_cq *icq = list_entry(q->icq_list.next,
 					       struct io_cq, q_node);
 		struct io_context *ioc = icq->ioc;
 
@@ -3922,8 +3918,6 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->oom_cfqq.ref++;
 	cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
 
-	INIT_LIST_HEAD(&cfqd->icq_list);
-
 	cfqd->queue = q;
 
 	init_timer(&cfqd->idle_slice_timer);
diff --git a/block/elevator.c b/block/elevator.c
index 31ffe76aed3d..c5c6214829cb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -745,8 +745,6 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 
 	if (e->type->ops.elevator_set_req_fn)
 		return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
-
-	rq->elevator_private[0] = NULL;
 	return 0;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 65c2f8c70089..8bca04873f53 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -111,10 +111,14 @@ struct request {
 	 * Three pointers are available for the IO schedulers, if they need
 	 * more they have to dynamically allocate it.  Flush requests are
 	 * never put on the IO scheduler. So let the flush fields share
-	 * space with the three elevator_private pointers.
+	 * space with the elevator data.
 	 */
 	union {
-		void *elevator_private[3];
+		struct {
+			struct io_cq		*icq;
+			void			*priv[2];
+		} elv;
+
 		struct {
 			unsigned int		seq;
 			struct list_head	list;
@@ -357,6 +361,8 @@ struct request_queue {
 	struct timer_list	timeout;
 	struct list_head	timeout_list;
 
+	struct list_head	icq_list;
+
 	struct queue_limits	limits;
 
 	/*
-- 
cgit v1.2.3


From 3d3c2379feb177a5fd55bb0ed76776dc9d4f3243 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:42 +0100
Subject: block, cfq: move icq cache management to block core

Let elevators set ->icq_size and ->icq_align in elevator_type and
elv_register() and elv_unregister() respectively create and destroy
kmem_cache for icq.

* elv_register() now can return failure.  All callers updated.

* icq caches are automatically named "ELVNAME_io_cq".

* cfq_slab_setup/kill() are collapsed into cfq_init/exit().

* While at it, minor indentation change for iosched_cfq.elevator_name
  for consistency.

This will help moving icq management to block core.  This doesn't
introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c      | 48 +++++++++++++++---------------------------------
 block/deadline-iosched.c |  4 +---
 block/elevator.c         | 37 +++++++++++++++++++++++++++++++++++--
 block/noop-iosched.c     |  4 +---
 include/linux/elevator.h | 11 ++++++++++-
 5 files changed, 62 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 048fa699adf9..06e59abcb57f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3914,34 +3914,6 @@ static void *cfq_init_queue(struct request_queue *q)
 	return cfqd;
 }
 
-static void cfq_slab_kill(void)
-{
-	/*
-	 * Caller already ensured that pending RCU callbacks are completed,
-	 * so we should have no busy allocations at this point.
-	 */
-	if (cfq_pool)
-		kmem_cache_destroy(cfq_pool);
-	if (cfq_icq_pool)
-		kmem_cache_destroy(cfq_icq_pool);
-}
-
-static int __init cfq_slab_setup(void)
-{
-	cfq_pool = KMEM_CACHE(cfq_queue, 0);
-	if (!cfq_pool)
-		goto fail;
-
-	cfq_icq_pool = KMEM_CACHE(cfq_io_cq, 0);
-	if (!cfq_icq_pool)
-		goto fail;
-
-	return 0;
-fail:
-	cfq_slab_kill();
-	return -ENOMEM;
-}
-
 /*
  * sysfs parts below -->
  */
@@ -4053,8 +4025,10 @@ static struct elevator_type iosched_cfq = {
 		.elevator_init_fn =		cfq_init_queue,
 		.elevator_exit_fn =		cfq_exit_queue,
 	},
+	.icq_size	=	sizeof(struct cfq_io_cq),
+	.icq_align	=	__alignof__(struct cfq_io_cq),
 	.elevator_attrs =	cfq_attrs,
-	.elevator_name =	"cfq",
+	.elevator_name	=	"cfq",
 	.elevator_owner =	THIS_MODULE,
 };
 
@@ -4072,6 +4046,8 @@ static struct blkio_policy_type blkio_policy_cfq;
 
 static int __init cfq_init(void)
 {
+	int ret;
+
 	/*
 	 * could be 0 on HZ < 1000 setups
 	 */
@@ -4086,10 +4062,17 @@ static int __init cfq_init(void)
 #else
 		cfq_group_idle = 0;
 #endif
-	if (cfq_slab_setup())
+	cfq_pool = KMEM_CACHE(cfq_queue, 0);
+	if (!cfq_pool)
 		return -ENOMEM;
 
-	elv_register(&iosched_cfq);
+	ret = elv_register(&iosched_cfq);
+	if (ret) {
+		kmem_cache_destroy(cfq_pool);
+		return ret;
+	}
+	cfq_icq_pool = iosched_cfq.icq_cache;
+
 	blkio_policy_register(&blkio_policy_cfq);
 
 	return 0;
@@ -4099,8 +4082,7 @@ static void __exit cfq_exit(void)
 {
 	blkio_policy_unregister(&blkio_policy_cfq);
 	elv_unregister(&iosched_cfq);
-	rcu_barrier();	/* make sure all cic RCU frees are complete */
-	cfq_slab_kill();
+	kmem_cache_destroy(cfq_pool);
 }
 
 module_init(cfq_init);
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c644137d9cd6..7bf12d793fcd 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -448,9 +448,7 @@ static struct elevator_type iosched_deadline = {
 
 static int __init deadline_init(void)
 {
-	elv_register(&iosched_deadline);
-
-	return 0;
+	return elv_register(&iosched_deadline);
 }
 
 static void __exit deadline_exit(void)
diff --git a/block/elevator.c b/block/elevator.c
index c5c6214829cb..cca049fb45c8 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -886,15 +886,36 @@ void elv_unregister_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(elv_unregister_queue);
 
-void elv_register(struct elevator_type *e)
+int elv_register(struct elevator_type *e)
 {
 	char *def = "";
 
+	/* create icq_cache if requested */
+	if (e->icq_size) {
+		if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
+		    WARN_ON(e->icq_align < __alignof__(struct io_cq)))
+			return -EINVAL;
+
+		snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
+			 "%s_io_cq", e->elevator_name);
+		e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
+						 e->icq_align, 0, NULL);
+		if (!e->icq_cache)
+			return -ENOMEM;
+	}
+
+	/* register, don't allow duplicate names */
 	spin_lock(&elv_list_lock);
-	BUG_ON(elevator_find(e->elevator_name));
+	if (elevator_find(e->elevator_name)) {
+		spin_unlock(&elv_list_lock);
+		if (e->icq_cache)
+			kmem_cache_destroy(e->icq_cache);
+		return -EBUSY;
+	}
 	list_add_tail(&e->list, &elv_list);
 	spin_unlock(&elv_list_lock);
 
+	/* print pretty message */
 	if (!strcmp(e->elevator_name, chosen_elevator) ||
 			(!*chosen_elevator &&
 			 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
@@ -902,14 +923,26 @@ void elv_register(struct elevator_type *e)
 
 	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
 								def);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(elv_register);
 
 void elv_unregister(struct elevator_type *e)
 {
+	/* unregister */
 	spin_lock(&elv_list_lock);
 	list_del_init(&e->list);
 	spin_unlock(&elv_list_lock);
+
+	/*
+	 * Destroy icq_cache if it exists.  icq's are RCU managed.  Make
+	 * sure all RCU operations are complete before proceeding.
+	 */
+	if (e->icq_cache) {
+		rcu_barrier();
+		kmem_cache_destroy(e->icq_cache);
+		e->icq_cache = NULL;
+	}
 }
 EXPORT_SYMBOL_GPL(elv_unregister);
 
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 06389e9ef96d..413a0b1d788c 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -94,9 +94,7 @@ static struct elevator_type elevator_noop = {
 
 static int __init noop_init(void)
 {
-	elv_register(&elevator_noop);
-
-	return 0;
+	return elv_register(&elevator_noop);
 }
 
 static void __exit noop_exit(void)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 04958ef53e62..d3d3e28cbfd4 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -78,10 +78,19 @@ struct elv_fs_entry {
  */
 struct elevator_type
 {
+	/* managed by elevator core */
+	struct kmem_cache *icq_cache;
+
+	/* fields provided by elevator implementation */
 	struct elevator_ops ops;
+	size_t icq_size;
+	size_t icq_align;
 	struct elv_fs_entry *elevator_attrs;
 	char elevator_name[ELV_NAME_MAX];
 	struct module *elevator_owner;
+
+	/* managed by elevator core */
+	char icq_cache_name[ELV_NAME_MAX + 5];	/* elvname + "_io_cq" */
 	struct list_head list;
 };
 
@@ -127,7 +136,7 @@ extern void elv_drain_elevator(struct request_queue *);
 /*
  * io scheduler registration
  */
-extern void elv_register(struct elevator_type *);
+extern int elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
 /*
-- 
cgit v1.2.3


From 7e5a8794492e43e9eebb68a98a23be055888ccd0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:42 +0100
Subject: block, cfq: move io_cq exit/release to blk-ioc.c

With kmem_cache managed by blk-ioc, io_cq exit/release can be moved to
blk-ioc too.  The odd ->io_cq->exit/release() callbacks are replaced
with elevator_ops->elevator_exit_icq_fn() with unlinking from both ioc
and q, and freeing automatically handled by blk-ioc.  The elevator
operation only need to perform exit operation specific to the elevator
- in cfq's case, exiting the cfqq's.

Also, clearing of io_cq's on q detach is moved to block core and
automatically performed on elevator switch and q release.

Because the q io_cq points to might be freed before RCU callback for
the io_cq runs, blk-ioc code should remember to which cache the io_cq
needs to be freed when the io_cq is released.  New field
io_cq->__rcu_icq_cache is added for this purpose.  As both the new
field and rcu_head are used only after io_cq is released and the
q/ioc_node fields aren't, they are put into unions.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ioc.c           | 76 ++++++++++++++++++++++++++++++++++++++++++-----
 block/blk-sysfs.c         |  6 +++-
 block/blk.h               |  1 +
 block/cfq-iosched.c       | 47 ++---------------------------
 block/elevator.c          |  3 +-
 include/linux/elevator.h  |  5 ++++
 include/linux/iocontext.h | 20 +++++++++----
 7 files changed, 97 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 87ecc98b8ade..0910a5584d38 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -44,6 +44,51 @@ EXPORT_SYMBOL(get_io_context);
 #define ioc_release_depth_dec(q)	do { } while (0)
 #endif
 
+static void icq_free_icq_rcu(struct rcu_head *head)
+{
+	struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
+
+	kmem_cache_free(icq->__rcu_icq_cache, icq);
+}
+
+/*
+ * Exit and free an icq.  Called with both ioc and q locked.
+ */
+static void ioc_exit_icq(struct io_cq *icq)
+{
+	struct io_context *ioc = icq->ioc;
+	struct request_queue *q = icq->q;
+	struct elevator_type *et = q->elevator->type;
+
+	lockdep_assert_held(&ioc->lock);
+	lockdep_assert_held(q->queue_lock);
+
+	radix_tree_delete(&ioc->icq_tree, icq->q->id);
+	hlist_del_init(&icq->ioc_node);
+	list_del_init(&icq->q_node);
+
+	/*
+	 * Both setting lookup hint to and clearing it from @icq are done
+	 * under queue_lock.  If it's not pointing to @icq now, it never
+	 * will.  Hint assignment itself can race safely.
+	 */
+	if (rcu_dereference_raw(ioc->icq_hint) == icq)
+		rcu_assign_pointer(ioc->icq_hint, NULL);
+
+	if (et->ops.elevator_exit_icq_fn) {
+		ioc_release_depth_inc(q);
+		et->ops.elevator_exit_icq_fn(icq);
+		ioc_release_depth_dec(q);
+	}
+
+	/*
+	 * @icq->q might have gone away by the time RCU callback runs
+	 * making it impossible to determine icq_cache.  Record it in @icq.
+	 */
+	icq->__rcu_icq_cache = et->icq_cache;
+	call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
+}
+
 /*
  * Slow path for ioc release in put_io_context().  Performs double-lock
  * dancing to unlink all icq's and then frees ioc.
@@ -87,10 +132,7 @@ static void ioc_release_fn(struct work_struct *work)
 			spin_lock(&ioc->lock);
 			continue;
 		}
-		ioc_release_depth_inc(this_q);
-		icq->exit(icq);
-		icq->release(icq);
-		ioc_release_depth_dec(this_q);
+		ioc_exit_icq(icq);
 	}
 
 	if (last_q) {
@@ -167,10 +209,7 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 			last_q = this_q;
 			continue;
 		}
-		ioc_release_depth_inc(this_q);
-		icq->exit(icq);
-		icq->release(icq);
-		ioc_release_depth_dec(this_q);
+		ioc_exit_icq(icq);
 	}
 
 	if (last_q && last_q != locked_q)
@@ -203,6 +242,27 @@ void exit_io_context(struct task_struct *task)
 	put_io_context(ioc, NULL);
 }
 
+/**
+ * ioc_clear_queue - break any ioc association with the specified queue
+ * @q: request_queue being cleared
+ *
+ * Walk @q->icq_list and exit all io_cq's.  Must be called with @q locked.
+ */
+void ioc_clear_queue(struct request_queue *q)
+{
+	lockdep_assert_held(q->queue_lock);
+
+	while (!list_empty(&q->icq_list)) {
+		struct io_cq *icq = list_entry(q->icq_list.next,
+					       struct io_cq, q_node);
+		struct io_context *ioc = icq->ioc;
+
+		spin_lock(&ioc->lock);
+		ioc_exit_icq(icq);
+		spin_unlock(&ioc->lock);
+	}
+}
+
 void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
 				int node)
 {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 5b4b4ab5e785..cf150011d808 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -479,8 +479,12 @@ static void blk_release_queue(struct kobject *kobj)
 
 	blk_sync_queue(q);
 
-	if (q->elevator)
+	if (q->elevator) {
+		spin_lock_irq(q->queue_lock);
+		ioc_clear_queue(q);
+		spin_unlock_irq(q->queue_lock);
 		elevator_exit(q->elevator);
+	}
 
 	blk_throtl_exit(q);
 
diff --git a/block/blk.h b/block/blk.h
index 3c510a4b5054..ed4d9bf2ab16 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -200,6 +200,7 @@ static inline int blk_do_io_stat(struct request *rq)
  */
 void get_io_context(struct io_context *ioc);
 struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
+void ioc_clear_queue(struct request_queue *q);
 
 void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask,
 				int node);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 06e59abcb57f..f6d315551496 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2674,26 +2674,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	cfq_put_cfqg(cfqg);
 }
 
-static void cfq_icq_free_rcu(struct rcu_head *head)
-{
-	kmem_cache_free(cfq_icq_pool,
-			icq_to_cic(container_of(head, struct io_cq, rcu_head)));
-}
-
-static void cfq_icq_free(struct io_cq *icq)
-{
-	call_rcu(&icq->rcu_head, cfq_icq_free_rcu);
-}
-
-static void cfq_release_icq(struct io_cq *icq)
-{
-	struct io_context *ioc = icq->ioc;
-
-	radix_tree_delete(&ioc->icq_tree, icq->q->id);
-	hlist_del(&icq->ioc_node);
-	cfq_icq_free(icq);
-}
-
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
 {
 	struct cfq_queue *__cfqq, *next;
@@ -2731,17 +2711,6 @@ static void cfq_exit_icq(struct io_cq *icq)
 {
 	struct cfq_io_cq *cic = icq_to_cic(icq);
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
-	struct io_context *ioc = icq->ioc;
-
-	list_del_init(&icq->q_node);
-
-	/*
-	 * Both setting lookup hint to and clearing it from @icq are done
-	 * under queue_lock.  If it's not pointing to @icq now, it never
-	 * will.  Hint assignment itself can race safely.
-	 */
-	if (rcu_dereference_raw(ioc->icq_hint) == icq)
-		rcu_assign_pointer(ioc->icq_hint, NULL);
 
 	if (cic->cfqq[BLK_RW_ASYNC]) {
 		cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
@@ -2764,8 +2733,6 @@ static struct cfq_io_cq *cfq_alloc_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 		cic->ttime.last_end_request = jiffies;
 		INIT_LIST_HEAD(&cic->icq.q_node);
 		INIT_HLIST_NODE(&cic->icq.ioc_node);
-		cic->icq.exit = cfq_exit_icq;
-		cic->icq.release = cfq_release_icq;
 	}
 
 	return cic;
@@ -3034,7 +3001,7 @@ out:
 	if (ret)
 		printk(KERN_ERR "cfq: icq link failed!\n");
 	if (icq)
-		cfq_icq_free(icq);
+		kmem_cache_free(cfq_icq_pool, icq);
 	return ret;
 }
 
@@ -3774,17 +3741,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
 	if (cfqd->active_queue)
 		__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
-	while (!list_empty(&q->icq_list)) {
-		struct io_cq *icq = list_entry(q->icq_list.next,
-					       struct io_cq, q_node);
-		struct io_context *ioc = icq->ioc;
-
-		spin_lock(&ioc->lock);
-		cfq_exit_icq(icq);
-		cfq_release_icq(icq);
-		spin_unlock(&ioc->lock);
-	}
-
 	cfq_put_async_queues(cfqd);
 	cfq_release_cfq_groups(cfqd);
 
@@ -4019,6 +3975,7 @@ static struct elevator_type iosched_cfq = {
 		.elevator_completed_req_fn =	cfq_completed_request,
 		.elevator_former_req_fn =	elv_rb_former_request,
 		.elevator_latter_req_fn =	elv_rb_latter_request,
+		.elevator_exit_icq_fn =		cfq_exit_icq,
 		.elevator_set_req_fn =		cfq_set_request,
 		.elevator_put_req_fn =		cfq_put_request,
 		.elevator_may_queue_fn =	cfq_may_queue,
diff --git a/block/elevator.c b/block/elevator.c
index cca049fb45c8..91e18f8af9be 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -979,8 +979,9 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 			goto fail_register;
 	}
 
-	/* done, replace the old one with new one and turn off BYPASS */
+	/* done, clear io_cq's, switch elevators and turn off BYPASS */
 	spin_lock_irq(q->queue_lock);
+	ioc_clear_queue(q);
 	old_elevator = q->elevator;
 	q->elevator = e;
 	spin_unlock_irq(q->queue_lock);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index d3d3e28cbfd4..06e4dd568717 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -5,6 +5,8 @@
 
 #ifdef CONFIG_BLOCK
 
+struct io_cq;
+
 typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
 				 struct bio *);
 
@@ -24,6 +26,7 @@ typedef struct request *(elevator_request_list_fn) (struct request_queue *, stru
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
 typedef int (elevator_may_queue_fn) (struct request_queue *, int);
 
+typedef void (elevator_exit_icq_fn) (struct io_cq *);
 typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
 typedef void (elevator_put_req_fn) (struct request *);
 typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
@@ -56,6 +59,8 @@ struct elevator_ops
 	elevator_request_list_fn *elevator_former_req_fn;
 	elevator_request_list_fn *elevator_latter_req_fn;
 
+	elevator_exit_icq_fn *elevator_exit_icq_fn;
+
 	elevator_set_req_fn *elevator_set_req_fn;
 	elevator_put_req_fn *elevator_put_req_fn;
 
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index d15ca6591f96..ac390a34c0e7 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -14,14 +14,22 @@ struct io_cq {
 	struct request_queue	*q;
 	struct io_context	*ioc;
 
-	struct list_head	q_node;
-	struct hlist_node	ioc_node;
+	/*
+	 * q_node and ioc_node link io_cq through icq_list of q and ioc
+	 * respectively.  Both fields are unused once ioc_exit_icq() is
+	 * called and shared with __rcu_icq_cache and __rcu_head which are
+	 * used for RCU free of io_cq.
+	 */
+	union {
+		struct list_head	q_node;
+		struct kmem_cache	*__rcu_icq_cache;
+	};
+	union {
+		struct hlist_node	ioc_node;
+		struct rcu_head		__rcu_head;
+	};
 
 	unsigned long		changed;
-	struct rcu_head		rcu_head;
-
-	void (*exit)(struct io_cq *);
-	void (*release)(struct io_cq *);
 };
 
 /*
-- 
cgit v1.2.3


From 9b84cacd013996f244d85b3d873287c2a8f88658 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:42 +0100
Subject: block, cfq: restructure io_cq creation path for io_context interface
 cleanup

Add elevator_ops->elevator_init_icq_fn() and restructure
cfq_create_cic() and rename it to ioc_create_icq().

The new function expects its caller to pass in io_context, uses
elevator_type->icq_cache, handles generic init, calls the new elevator
operation for elevator specific initialization, and returns pointer to
created or looked up icq.  This leaves cfq_icq_pool variable without
any user.  Removed.

This prepares for io_context interface cleanup and doesn't introduce
any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c      | 94 +++++++++++++++++++++---------------------------
 include/linux/elevator.h |  2 ++
 2 files changed, 43 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index f6d315551496..11f49d036845 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -59,7 +59,6 @@ static const int cfq_hist_divisor = 4;
 #define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elv.priv[1])
 
 static struct kmem_cache *cfq_pool;
-static struct kmem_cache *cfq_icq_pool;
 
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -2707,6 +2706,13 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_put_queue(cfqq);
 }
 
+static void cfq_init_icq(struct io_cq *icq)
+{
+	struct cfq_io_cq *cic = icq_to_cic(icq);
+
+	cic->ttime.last_end_request = jiffies;
+}
+
 static void cfq_exit_icq(struct io_cq *icq)
 {
 	struct cfq_io_cq *cic = icq_to_cic(icq);
@@ -2723,21 +2729,6 @@ static void cfq_exit_icq(struct io_cq *icq)
 	}
 }
 
-static struct cfq_io_cq *cfq_alloc_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
-{
-	struct cfq_io_cq *cic;
-
-	cic = kmem_cache_alloc_node(cfq_icq_pool, gfp_mask | __GFP_ZERO,
-							cfqd->queue->node);
-	if (cic) {
-		cic->ttime.last_end_request = jiffies;
-		INIT_LIST_HEAD(&cic->icq.q_node);
-		INIT_HLIST_NODE(&cic->icq.ioc_node);
-	}
-
-	return cic;
-}
-
 static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 {
 	struct task_struct *tsk = current;
@@ -2945,64 +2936,62 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 }
 
 /**
- * cfq_create_cic - create and link a cfq_io_cq
- * @cfqd: cfqd of interest
+ * ioc_create_icq - create and link io_cq
+ * @q: request_queue of interest
  * @gfp_mask: allocation mask
  *
- * Make sure cfq_io_cq linking %current->io_context and @cfqd exists.  If
- * ioc and/or cic doesn't exist, they will be created using @gfp_mask.
+ * Make sure io_cq linking %current->io_context and @q exists.  If either
+ * io_context and/or icq don't exist, they will be created using @gfp_mask.
+ *
+ * The caller is responsible for ensuring @ioc won't go away and @q is
+ * alive and will stay alive until this function returns.
  */
-static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
+static struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
 {
-	struct request_queue *q = cfqd->queue;
-	struct io_cq *icq = NULL;
-	struct cfq_io_cq *cic;
+	struct elevator_type *et = q->elevator->type;
 	struct io_context *ioc;
-	int ret = -ENOMEM;
-
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	struct io_cq *icq;
 
 	/* allocate stuff */
 	ioc = create_io_context(current, gfp_mask, q->node);
 	if (!ioc)
-		goto out;
+		return NULL;
 
-	cic = cfq_alloc_cic(cfqd, gfp_mask);
-	if (!cic)
-		goto out;
-	icq = &cic->icq;
+	icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
+				    q->node);
+	if (!icq)
+		return NULL;
 
-	ret = radix_tree_preload(gfp_mask);
-	if (ret)
-		goto out;
+	if (radix_tree_preload(gfp_mask) < 0) {
+		kmem_cache_free(et->icq_cache, icq);
+		return NULL;
+	}
 
 	icq->ioc = ioc;
-	icq->q = cfqd->queue;
+	icq->q = q;
+	INIT_LIST_HEAD(&icq->q_node);
+	INIT_HLIST_NODE(&icq->ioc_node);
 
 	/* lock both q and ioc and try to link @icq */
 	spin_lock_irq(q->queue_lock);
 	spin_lock(&ioc->lock);
 
-	ret = radix_tree_insert(&ioc->icq_tree, q->id, icq);
-	if (likely(!ret)) {
+	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
 		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
 		list_add(&icq->q_node, &q->icq_list);
-		icq = NULL;
-	} else if (ret == -EEXIST) {
-		/* someone else already did it */
-		ret = 0;
+		if (et->ops.elevator_init_icq_fn)
+			et->ops.elevator_init_icq_fn(icq);
+	} else {
+		kmem_cache_free(et->icq_cache, icq);
+		icq = ioc_lookup_icq(ioc, q);
+		if (!icq)
+			printk(KERN_ERR "cfq: icq link failed!\n");
 	}
 
 	spin_unlock(&ioc->lock);
 	spin_unlock_irq(q->queue_lock);
-
 	radix_tree_preload_end();
-out:
-	if (ret)
-		printk(KERN_ERR "cfq: icq link failed!\n");
-	if (icq)
-		kmem_cache_free(cfq_icq_pool, icq);
-	return ret;
+	return icq;
 }
 
 /**
@@ -3022,7 +3011,6 @@ static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 	struct request_queue *q = cfqd->queue;
 	struct cfq_io_cq *cic = NULL;
 	struct io_context *ioc;
-	int err;
 
 	lockdep_assert_held(q->queue_lock);
 
@@ -3037,9 +3025,9 @@ static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 		/* slow path - unlock, create missing ones and retry */
 		spin_unlock_irq(q->queue_lock);
-		err = cfq_create_cic(cfqd, gfp_mask);
+		cic = icq_to_cic(ioc_create_icq(q, gfp_mask));
 		spin_lock_irq(q->queue_lock);
-		if (err)
+		if (!cic)
 			return NULL;
 	}
 
@@ -3975,6 +3963,7 @@ static struct elevator_type iosched_cfq = {
 		.elevator_completed_req_fn =	cfq_completed_request,
 		.elevator_former_req_fn =	elv_rb_former_request,
 		.elevator_latter_req_fn =	elv_rb_latter_request,
+		.elevator_init_icq_fn =		cfq_init_icq,
 		.elevator_exit_icq_fn =		cfq_exit_icq,
 		.elevator_set_req_fn =		cfq_set_request,
 		.elevator_put_req_fn =		cfq_put_request,
@@ -4028,7 +4017,6 @@ static int __init cfq_init(void)
 		kmem_cache_destroy(cfq_pool);
 		return ret;
 	}
-	cfq_icq_pool = iosched_cfq.icq_cache;
 
 	blkio_policy_register(&blkio_policy_cfq);
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 06e4dd568717..c8f1e67a8ebe 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -26,6 +26,7 @@ typedef struct request *(elevator_request_list_fn) (struct request_queue *, stru
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
 typedef int (elevator_may_queue_fn) (struct request_queue *, int);
 
+typedef void (elevator_init_icq_fn) (struct io_cq *);
 typedef void (elevator_exit_icq_fn) (struct io_cq *);
 typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
 typedef void (elevator_put_req_fn) (struct request *);
@@ -59,6 +60,7 @@ struct elevator_ops
 	elevator_request_list_fn *elevator_former_req_fn;
 	elevator_request_list_fn *elevator_latter_req_fn;
 
+	elevator_init_icq_fn *elevator_init_icq_fn;
 	elevator_exit_icq_fn *elevator_exit_icq_fn;
 
 	elevator_set_req_fn *elevator_set_req_fn;
-- 
cgit v1.2.3


From f1f8cc94651738b418ba54c039df536303b91704 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:42 +0100
Subject: block, cfq: move icq creation and rq->elv.icq association to block
 core

Now block layer knows everything necessary to create and associate
icq's with requests.  Move ioc_create_icq() to blk-ioc.c and update
get_request() such that, if elevator_type->icq_size is set, requests
are automatically associated with their matching icq's before
elv_set_request().  io_context reference is also managed by block core
on request alloc/free.

* Only ioprio/cgroup changed handling remains from cfq_get_cic().
  Collapsed into cfq_set_request().

* This removes queue kicking on icq allocation failure (for now).  As
  icq allocation failure is rare and the only effect of queue kicking
  achieved was possibily accelerating queue processing, this change
  shouldn't be noticeable.

  There is a larger underlying problem.  Unlike request allocation,
  icq allocation is not guaranteed to succeed eventually after
  retries.  The number of icq is unbound and thus mempool can't be the
  solution either.  This effectively adds allocation dependency on
  memory free path and thus possibility of deadlock.

  This usually wouldn't happen because icq allocation is not a hot
  path and, even when the condition triggers, it's highly unlikely
  that none of the writeback workers already has icq.

  However, this is still possible especially if elevator is being
  switched under high memory pressure, so we better get it fixed.
  Probably the only solution is just bypassing elevator and appending
  to dispatch queue on any elevator allocation failure.

* Comment added to explain how icq's are managed and synchronized.

This completes cleanup of io_context interface.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          |  46 +++++++++++++---
 block/blk-ioc.c           |  60 ++++++++++++++++++++-
 block/blk.h               |   1 +
 block/cfq-iosched.c       | 135 ++++------------------------------------------
 include/linux/elevator.h  |   8 +--
 include/linux/iocontext.h |  59 ++++++++++++++++++++
 6 files changed, 173 insertions(+), 136 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 3c26c7f48703..8fbdac7010bb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -640,13 +640,18 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
-	if (rq->cmd_flags & REQ_ELVPRIV)
+	if (rq->cmd_flags & REQ_ELVPRIV) {
 		elv_put_request(q, rq);
+		if (rq->elv.icq)
+			put_io_context(rq->elv.icq->ioc, q);
+	}
+
 	mempool_free(rq, q->rq.rq_pool);
 }
 
 static struct request *
-blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, struct io_cq *icq,
+		  unsigned int flags, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -657,10 +662,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
 
 	rq->cmd_flags = flags | REQ_ALLOCED;
 
-	if ((flags & REQ_ELVPRIV) &&
-	    unlikely(elv_set_request(q, rq, gfp_mask))) {
-		mempool_free(rq, q->rq.rq_pool);
-		return NULL;
+	if (flags & REQ_ELVPRIV) {
+		rq->elv.icq = icq;
+		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
+			mempool_free(rq, q->rq.rq_pool);
+			return NULL;
+		}
+		/* @rq->elv.icq holds on to io_context until @rq is freed */
+		if (icq)
+			get_io_context(icq->ioc);
 	}
 
 	return rq;
@@ -772,11 +782,14 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
+	struct elevator_type *et;
 	struct io_context *ioc;
+	struct io_cq *icq = NULL;
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	bool retried = false;
 	int may_queue;
 retry:
+	et = q->elevator->type;
 	ioc = current->io_context;
 
 	if (unlikely(blk_queue_dead(q)))
@@ -837,17 +850,36 @@ retry:
 	rl->count[is_sync]++;
 	rl->starved[is_sync] = 0;
 
+	/*
+	 * Decide whether the new request will be managed by elevator.  If
+	 * so, mark @rw_flags and increment elvpriv.  Non-zero elvpriv will
+	 * prevent the current elevator from being destroyed until the new
+	 * request is freed.  This guarantees icq's won't be destroyed and
+	 * makes creating new ones safe.
+	 *
+	 * Also, lookup icq while holding queue_lock.  If it doesn't exist,
+	 * it will be created after releasing queue_lock.
+	 */
 	if (blk_rq_should_init_elevator(bio) &&
 	    !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
 		rw_flags |= REQ_ELVPRIV;
 		rl->elvpriv++;
+		if (et->icq_cache && ioc)
+			icq = ioc_lookup_icq(ioc, q);
 	}
 
 	if (blk_queue_io_stat(q))
 		rw_flags |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw_flags, gfp_mask);
+	/* create icq if missing */
+	if (unlikely(et->icq_cache && !icq))
+		icq = ioc_create_icq(q, gfp_mask);
+
+	/* rqs are guaranteed to have icq on elv_set_request() if requested */
+	if (likely(!et->icq_cache || icq))
+		rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 0910a5584d38..c04d16b02225 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -289,7 +289,6 @@ void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
 		kmem_cache_free(iocontext_cachep, ioc);
 	task_unlock(task);
 }
-EXPORT_SYMBOL(create_io_context_slowpath);
 
 /**
  * get_task_io_context - get io_context of a task
@@ -362,6 +361,65 @@ out:
 }
 EXPORT_SYMBOL(ioc_lookup_icq);
 
+/**
+ * ioc_create_icq - create and link io_cq
+ * @q: request_queue of interest
+ * @gfp_mask: allocation mask
+ *
+ * Make sure io_cq linking %current->io_context and @q exists.  If either
+ * io_context and/or icq don't exist, they will be created using @gfp_mask.
+ *
+ * The caller is responsible for ensuring @ioc won't go away and @q is
+ * alive and will stay alive until this function returns.
+ */
+struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
+{
+	struct elevator_type *et = q->elevator->type;
+	struct io_context *ioc;
+	struct io_cq *icq;
+
+	/* allocate stuff */
+	ioc = create_io_context(current, gfp_mask, q->node);
+	if (!ioc)
+		return NULL;
+
+	icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
+				    q->node);
+	if (!icq)
+		return NULL;
+
+	if (radix_tree_preload(gfp_mask) < 0) {
+		kmem_cache_free(et->icq_cache, icq);
+		return NULL;
+	}
+
+	icq->ioc = ioc;
+	icq->q = q;
+	INIT_LIST_HEAD(&icq->q_node);
+	INIT_HLIST_NODE(&icq->ioc_node);
+
+	/* lock both q and ioc and try to link @icq */
+	spin_lock_irq(q->queue_lock);
+	spin_lock(&ioc->lock);
+
+	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
+		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
+		list_add(&icq->q_node, &q->icq_list);
+		if (et->ops.elevator_init_icq_fn)
+			et->ops.elevator_init_icq_fn(icq);
+	} else {
+		kmem_cache_free(et->icq_cache, icq);
+		icq = ioc_lookup_icq(ioc, q);
+		if (!icq)
+			printk(KERN_ERR "cfq: icq link failed!\n");
+	}
+
+	spin_unlock(&ioc->lock);
+	spin_unlock_irq(q->queue_lock);
+	radix_tree_preload_end();
+	return icq;
+}
+
 void ioc_set_changed(struct io_context *ioc, int which)
 {
 	struct io_cq *icq;
diff --git a/block/blk.h b/block/blk.h
index ed4d9bf2ab16..7efd772336de 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -200,6 +200,7 @@ static inline int blk_do_io_stat(struct request *rq)
  */
 void get_io_context(struct io_context *ioc);
 struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
+struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask);
 void ioc_clear_queue(struct request_queue *q);
 
 void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 11f49d036845..f3b44c394e6d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2935,117 +2935,6 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 	return cfqq;
 }
 
-/**
- * ioc_create_icq - create and link io_cq
- * @q: request_queue of interest
- * @gfp_mask: allocation mask
- *
- * Make sure io_cq linking %current->io_context and @q exists.  If either
- * io_context and/or icq don't exist, they will be created using @gfp_mask.
- *
- * The caller is responsible for ensuring @ioc won't go away and @q is
- * alive and will stay alive until this function returns.
- */
-static struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
-{
-	struct elevator_type *et = q->elevator->type;
-	struct io_context *ioc;
-	struct io_cq *icq;
-
-	/* allocate stuff */
-	ioc = create_io_context(current, gfp_mask, q->node);
-	if (!ioc)
-		return NULL;
-
-	icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
-				    q->node);
-	if (!icq)
-		return NULL;
-
-	if (radix_tree_preload(gfp_mask) < 0) {
-		kmem_cache_free(et->icq_cache, icq);
-		return NULL;
-	}
-
-	icq->ioc = ioc;
-	icq->q = q;
-	INIT_LIST_HEAD(&icq->q_node);
-	INIT_HLIST_NODE(&icq->ioc_node);
-
-	/* lock both q and ioc and try to link @icq */
-	spin_lock_irq(q->queue_lock);
-	spin_lock(&ioc->lock);
-
-	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
-		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
-		list_add(&icq->q_node, &q->icq_list);
-		if (et->ops.elevator_init_icq_fn)
-			et->ops.elevator_init_icq_fn(icq);
-	} else {
-		kmem_cache_free(et->icq_cache, icq);
-		icq = ioc_lookup_icq(ioc, q);
-		if (!icq)
-			printk(KERN_ERR "cfq: icq link failed!\n");
-	}
-
-	spin_unlock(&ioc->lock);
-	spin_unlock_irq(q->queue_lock);
-	radix_tree_preload_end();
-	return icq;
-}
-
-/**
- * cfq_get_cic - acquire cfq_io_cq and bump refcnt on io_context
- * @cfqd: cfqd to setup cic for
- * @gfp_mask: allocation mask
- *
- * Return cfq_io_cq associating @cfqd and %current->io_context and
- * bump refcnt on io_context.  If ioc or cic doesn't exist, they're created
- * using @gfp_mask.
- *
- * Must be called under queue_lock which may be released and re-acquired.
- * This function also may sleep depending on @gfp_mask.
- */
-static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
-{
-	struct request_queue *q = cfqd->queue;
-	struct cfq_io_cq *cic = NULL;
-	struct io_context *ioc;
-
-	lockdep_assert_held(q->queue_lock);
-
-	while (true) {
-		/* fast path */
-		ioc = current->io_context;
-		if (likely(ioc)) {
-			cic = cfq_cic_lookup(cfqd, ioc);
-			if (likely(cic))
-				break;
-		}
-
-		/* slow path - unlock, create missing ones and retry */
-		spin_unlock_irq(q->queue_lock);
-		cic = icq_to_cic(ioc_create_icq(q, gfp_mask));
-		spin_lock_irq(q->queue_lock);
-		if (!cic)
-			return NULL;
-	}
-
-	/* bump @ioc's refcnt and handle changed notifications */
-	get_io_context(ioc);
-
-	if (unlikely(cic->icq.changed)) {
-		if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
-			changed_ioprio(cic);
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-		if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
-			changed_cgroup(cic);
-#endif
-	}
-
-	return cic;
-}
-
 static void
 __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
 {
@@ -3524,8 +3413,6 @@ static void cfq_put_request(struct request *rq)
 		BUG_ON(!cfqq->allocated[rw]);
 		cfqq->allocated[rw]--;
 
-		put_io_context(RQ_CIC(rq)->icq.ioc, cfqq->cfqd->queue);
-
 		/* Put down rq reference on cfqg */
 		cfq_put_cfqg(RQ_CFQG(rq));
 		rq->elv.priv[0] = NULL;
@@ -3574,7 +3461,7 @@ static int
 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_io_cq *cic;
+	struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
 	const int rw = rq_data_dir(rq);
 	const bool is_sync = rq_is_sync(rq);
 	struct cfq_queue *cfqq;
@@ -3582,9 +3469,16 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
 	spin_lock_irq(q->queue_lock);
-	cic = cfq_get_cic(cfqd, gfp_mask);
-	if (!cic)
-		goto queue_fail;
+
+	/* handle changed notifications */
+	if (unlikely(cic->icq.changed)) {
+		if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
+			changed_ioprio(cic);
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+		if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
+			changed_cgroup(cic);
+#endif
+	}
 
 new_queue:
 	cfqq = cic_to_cfqq(cic, is_sync);
@@ -3615,17 +3509,10 @@ new_queue:
 	cfqq->allocated[rw]++;
 
 	cfqq->ref++;
-	rq->elv.icq = &cic->icq;
 	rq->elv.priv[0] = cfqq;
 	rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
 	spin_unlock_irq(q->queue_lock);
 	return 0;
-
-queue_fail:
-	cfq_schedule_dispatch(cfqd);
-	spin_unlock_irq(q->queue_lock);
-	cfq_log(cfqd, "set_request fail");
-	return 1;
 }
 
 static void cfq_kick_queue(struct work_struct *work)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c8f1e67a8ebe..c24f3d7fbf1e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -60,8 +60,8 @@ struct elevator_ops
 	elevator_request_list_fn *elevator_former_req_fn;
 	elevator_request_list_fn *elevator_latter_req_fn;
 
-	elevator_init_icq_fn *elevator_init_icq_fn;
-	elevator_exit_icq_fn *elevator_exit_icq_fn;
+	elevator_init_icq_fn *elevator_init_icq_fn;	/* see iocontext.h */
+	elevator_exit_icq_fn *elevator_exit_icq_fn;	/* ditto */
 
 	elevator_set_req_fn *elevator_set_req_fn;
 	elevator_put_req_fn *elevator_put_req_fn;
@@ -90,8 +90,8 @@ struct elevator_type
 
 	/* fields provided by elevator implementation */
 	struct elevator_ops ops;
-	size_t icq_size;
-	size_t icq_align;
+	size_t icq_size;	/* see iocontext.h */
+	size_t icq_align;	/* ditto */
 	struct elv_fs_entry *elevator_attrs;
 	char elevator_name[ELV_NAME_MAX];
 	struct module *elevator_owner;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index ac390a34c0e7..7e1371c4bccf 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -10,6 +10,65 @@ enum {
 	ICQ_CGROUP_CHANGED,
 };
 
+/*
+ * An io_cq (icq) is association between an io_context (ioc) and a
+ * request_queue (q).  This is used by elevators which need to track
+ * information per ioc - q pair.
+ *
+ * Elevator can request use of icq by setting elevator_type->icq_size and
+ * ->icq_align.  Both size and align must be larger than that of struct
+ * io_cq and elevator can use the tail area for private information.  The
+ * recommended way to do this is defining a struct which contains io_cq as
+ * the first member followed by private members and using its size and
+ * align.  For example,
+ *
+ *	struct snail_io_cq {
+ *		struct io_cq	icq;
+ *		int		poke_snail;
+ *		int		feed_snail;
+ *	};
+ *
+ *	struct elevator_type snail_elv_type {
+ *		.ops =		{ ... },
+ *		.icq_size =	sizeof(struct snail_io_cq),
+ *		.icq_align =	__alignof__(struct snail_io_cq),
+ *		...
+ *	};
+ *
+ * If icq_size is set, block core will manage icq's.  All requests will
+ * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn()
+ * is called and be holding a reference to the associated io_context.
+ *
+ * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is
+ * called and, on destruction, ->elevator_exit_icq_fn().  Both functions
+ * are called with both the associated io_context and queue locks held.
+ *
+ * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding
+ * queue lock but the returned icq is valid only until the queue lock is
+ * released.  Elevators can not and should not try to create or destroy
+ * icq's.
+ *
+ * As icq's are linked from both ioc and q, the locking rules are a bit
+ * complex.
+ *
+ * - ioc lock nests inside q lock.
+ *
+ * - ioc->icq_list and icq->ioc_node are protected by ioc lock.
+ *   q->icq_list and icq->q_node by q lock.
+ *
+ * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq
+ *   itself is protected by q lock.  However, both the indexes and icq
+ *   itself are also RCU managed and lookup can be performed holding only
+ *   the q lock.
+ *
+ * - icq's are not reference counted.  They are destroyed when either the
+ *   ioc or q goes away.  Each request with icq set holds an extra
+ *   reference to ioc to ensure it stays until the request is completed.
+ *
+ * - Linking and unlinking icq's are performed while holding both ioc and q
+ *   locks.  Due to the lock ordering, q exit is simple but ioc exit
+ *   requires reverse-order double lock dance.
+ */
 struct io_cq {
 	struct request_queue	*q;
 	struct io_context	*ioc;
-- 
cgit v1.2.3


From 63a9332b232bdab0df6ef18a9f39e8d58a82bda4 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 7 Dec 2011 21:48:07 +0100
Subject: ARM: Orion: Get address map from plat-orion instead of via
 platform_data

Use an getter function in plat-orion/addr-map.c to get the address map
structure, rather than pass it to drivers in the platform_data
structures. When the drivers are built for none orion platforms, a
dummy function is provided instead which returns NULL.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Michael Walle <michael@walle.cc>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 arch/arm/mach-dove/pcie.c                  |  3 +--
 arch/arm/mach-kirkwood/mpp.c               |  1 -
 arch/arm/mach-kirkwood/pcie.c              |  3 +--
 arch/arm/mach-mv78xx0/mpp.c                |  1 -
 arch/arm/mach-mv78xx0/pcie.c               |  3 +--
 arch/arm/mach-orion5x/mpp.c                |  1 -
 arch/arm/mach-orion5x/pci.c                |  2 +-
 arch/arm/plat-orion/addr-map.c             |  7 +++++++
 arch/arm/plat-orion/include/plat/pcie.h    |  3 +--
 arch/arm/plat-orion/pcie.c                 |  6 +++---
 drivers/ata/sata_mv.c                      | 19 +++++++++++--------
 drivers/dma/mv_xor.c                       | 11 ++++++-----
 drivers/mmc/host/mvsdio.c                  | 13 ++++++++-----
 drivers/net/ethernet/marvell/mv643xx_eth.c | 10 ++++++----
 drivers/usb/host/ehci-orion.c              | 10 ++++++----
 include/linux/mbus.h                       | 13 ++++++++++++-
 sound/soc/kirkwood/kirkwood-dma.c          | 15 +++++++++------
 17 files changed, 73 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-dove/pcie.c b/arch/arm/mach-dove/pcie.c
index 42e7d22a00f5..6c11a4df7178 100644
--- a/arch/arm/mach-dove/pcie.c
+++ b/arch/arm/mach-dove/pcie.c
@@ -10,7 +10,6 @@
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
-#include <linux/mbus.h>
 #include <video/vga.h>
 #include <asm/mach/pci.h>
 #include <asm/mach/arch.h>
@@ -51,7 +50,7 @@ static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys)
 	 */
 	orion_pcie_set_local_bus_nr(pp->base, sys->busnr);
 
-	orion_pcie_setup(pp->base, &orion_mbus_dram_info);
+	orion_pcie_setup(pp->base);
 
 	/*
 	 * IORESOURCE_IO
diff --git a/arch/arm/mach-kirkwood/mpp.c b/arch/arm/mach-kirkwood/mpp.c
index cc431fa22ccb..0c6ad63f10c7 100644
--- a/arch/arm/mach-kirkwood/mpp.c
+++ b/arch/arm/mach-kirkwood/mpp.c
@@ -10,7 +10,6 @@
 #include <linux/gpio.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/mbus.h>
 #include <linux/io.h>
 #include <mach/hardware.h>
 #include <plat/mpp.h>
diff --git a/arch/arm/mach-kirkwood/pcie.c b/arch/arm/mach-kirkwood/pcie.c
index 8def894f3296..fb451bfe478b 100644
--- a/arch/arm/mach-kirkwood/pcie.c
+++ b/arch/arm/mach-kirkwood/pcie.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
-#include <linux/mbus.h>
 #include <video/vga.h>
 #include <asm/irq.h>
 #include <asm/mach/pci.h>
@@ -209,7 +208,7 @@ static int __init kirkwood_pcie_setup(int nr, struct pci_sys_data *sys)
 	 */
 	orion_pcie_set_local_bus_nr(pp->base, sys->busnr);
 
-	orion_pcie_setup(pp->base, &orion_mbus_dram_info);
+	orion_pcie_setup(pp->base);
 
 	return 1;
 }
diff --git a/arch/arm/mach-mv78xx0/mpp.c b/arch/arm/mach-mv78xx0/mpp.c
index cf4e494d44bf..df50342179e2 100644
--- a/arch/arm/mach-mv78xx0/mpp.c
+++ b/arch/arm/mach-mv78xx0/mpp.c
@@ -10,7 +10,6 @@
 #include <linux/gpio.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/mbus.h>
 #include <linux/io.h>
 #include <plat/mpp.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-mv78xx0/pcie.c b/arch/arm/mach-mv78xx0/pcie.c
index 9208667c478a..12fcb108b0e1 100644
--- a/arch/arm/mach-mv78xx0/pcie.c
+++ b/arch/arm/mach-mv78xx0/pcie.c
@@ -10,7 +10,6 @@
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
-#include <linux/mbus.h>
 #include <video/vga.h>
 #include <asm/irq.h>
 #include <asm/mach/pci.h>
@@ -154,7 +153,7 @@ static int __init mv78xx0_pcie_setup(int nr, struct pci_sys_data *sys)
 	 * Generic PCIe unit setup.
 	 */
 	orion_pcie_set_local_bus_nr(pp->base, sys->busnr);
-	orion_pcie_setup(pp->base, &orion_mbus_dram_info);
+	orion_pcie_setup(pp->base);
 
 	sys->resource[0] = &pp->res[0];
 	sys->resource[1] = &pp->res[1];
diff --git a/arch/arm/mach-orion5x/mpp.c b/arch/arm/mach-orion5x/mpp.c
index b6ddd7a5db6a..5b70026f478c 100644
--- a/arch/arm/mach-orion5x/mpp.c
+++ b/arch/arm/mach-orion5x/mpp.c
@@ -10,7 +10,6 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/mbus.h>
 #include <linux/io.h>
 #include <mach/hardware.h>
 #include <plat/mpp.h>
diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c
index ddfa01acdaaa..a494c470e3e4 100644
--- a/arch/arm/mach-orion5x/pci.c
+++ b/arch/arm/mach-orion5x/pci.c
@@ -146,7 +146,7 @@ static int __init pcie_setup(struct pci_sys_data *sys)
 	/*
 	 * Generic PCIe unit setup.
 	 */
-	orion_pcie_setup(PCIE_BASE, &orion_mbus_dram_info);
+	orion_pcie_setup(PCIE_BASE);
 
 	/*
 	 * Check whether to apply Orion-1/Orion-NAS PCIe config
diff --git a/arch/arm/plat-orion/addr-map.c b/arch/arm/plat-orion/addr-map.c
index c27ad88bc400..367ca89ac403 100644
--- a/arch/arm/plat-orion/addr-map.c
+++ b/arch/arm/plat-orion/addr-map.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/mbus.h>
 #include <linux/io.h>
@@ -16,6 +17,12 @@
 
 struct mbus_dram_target_info orion_mbus_dram_info;
 
+const struct mbus_dram_target_info *mv_mbus_dram_info(void)
+{
+	return &orion_mbus_dram_info;
+}
+EXPORT_SYMBOL_GPL(mv_mbus_dram_info);
+
 /*
  * DDR target is the same on all Orion platforms.
  */
diff --git a/arch/arm/plat-orion/include/plat/pcie.h b/arch/arm/plat-orion/include/plat/pcie.h
index cc99163e73fd..fe5b9e862747 100644
--- a/arch/arm/plat-orion/include/plat/pcie.h
+++ b/arch/arm/plat-orion/include/plat/pcie.h
@@ -20,8 +20,7 @@ int orion_pcie_x4_mode(void __iomem *base);
 int orion_pcie_get_local_bus_nr(void __iomem *base);
 void orion_pcie_set_local_bus_nr(void __iomem *base, int nr);
 void orion_pcie_reset(void __iomem *base);
-void orion_pcie_setup(void __iomem *base,
-		      struct mbus_dram_target_info *dram);
+void orion_pcie_setup(void __iomem *base);
 int orion_pcie_rd_conf(void __iomem *base, struct pci_bus *bus,
 		       u32 devfn, int where, int size, u32 *val);
 int orion_pcie_rd_conf_tlp(void __iomem *base, struct pci_bus *bus,
diff --git a/arch/arm/plat-orion/pcie.c b/arch/arm/plat-orion/pcie.c
index af2d733c50b5..86dbb5bdb172 100644
--- a/arch/arm/plat-orion/pcie.c
+++ b/arch/arm/plat-orion/pcie.c
@@ -13,6 +13,7 @@
 #include <linux/mbus.h>
 #include <asm/mach/pci.h>
 #include <plat/pcie.h>
+#include <plat/addr-map.h>
 #include <linux/delay.h>
 
 /*
@@ -175,8 +176,7 @@ static void __init orion_pcie_setup_wins(void __iomem *base,
 	writel(((size - 1) & 0xffff0000) | 1, base + PCIE_BAR_CTRL_OFF(1));
 }
 
-void __init orion_pcie_setup(void __iomem *base,
-			     struct mbus_dram_target_info *dram)
+void __init orion_pcie_setup(void __iomem *base)
 {
 	u16 cmd;
 	u32 mask;
@@ -184,7 +184,7 @@ void __init orion_pcie_setup(void __iomem *base,
 	/*
 	 * Point PCIe unit MBUS decode windows to DRAM space.
 	 */
-	orion_pcie_setup_wins(base, dram);
+	orion_pcie_setup_wins(base, &orion_mbus_dram_info);
 
 	/*
 	 * Master + slave enable.
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 0b8b8b488ee8..38950ea8398a 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -3988,7 +3988,7 @@ static int mv_create_dma_pools(struct mv_host_priv *hpriv, struct device *dev)
 }
 
 static void mv_conf_mbus_windows(struct mv_host_priv *hpriv,
-				 struct mbus_dram_target_info *dram)
+				 const struct mbus_dram_target_info *dram)
 {
 	int i;
 
@@ -3998,7 +3998,7 @@ static void mv_conf_mbus_windows(struct mv_host_priv *hpriv,
 	}
 
 	for (i = 0; i < dram->num_cs; i++) {
-		struct mbus_dram_window *cs = dram->cs + i;
+		const struct mbus_dram_window *cs = dram->cs + i;
 
 		writel(((cs->size - 1) & 0xffff0000) |
 			(cs->mbus_attr << 8) |
@@ -4019,6 +4019,7 @@ static void mv_conf_mbus_windows(struct mv_host_priv *hpriv,
 static int mv_platform_probe(struct platform_device *pdev)
 {
 	const struct mv_sata_platform_data *mv_platform_data;
+	const struct mbus_dram_target_info *dram;
 	const struct ata_port_info *ppi[] =
 	    { &mv_port_info[chip_soc], NULL };
 	struct ata_host *host;
@@ -4072,8 +4073,9 @@ static int mv_platform_probe(struct platform_device *pdev)
 	/*
 	 * (Re-)program MBUS remapping windows if we are asked to.
 	 */
-	if (mv_platform_data->dram != NULL)
-		mv_conf_mbus_windows(hpriv, mv_platform_data->dram);
+	dram = mv_mbus_dram_info();
+	if (dram)
+		mv_conf_mbus_windows(hpriv, dram);
 
 	rc = mv_create_dma_pools(hpriv, &pdev->dev);
 	if (rc)
@@ -4141,17 +4143,18 @@ static int mv_platform_suspend(struct platform_device *pdev, pm_message_t state)
 static int mv_platform_resume(struct platform_device *pdev)
 {
 	struct ata_host *host = platform_get_drvdata(pdev);
+	const struct mbus_dram_target_info *dram;
 	int ret;
 
 	if (host) {
 		struct mv_host_priv *hpriv = host->private_data;
-		const struct mv_sata_platform_data *mv_platform_data = \
-			pdev->dev.platform_data;
+
 		/*
 		 * (Re-)program MBUS remapping windows if we are asked to.
 		 */
-		if (mv_platform_data->dram != NULL)
-			mv_conf_mbus_windows(hpriv, mv_platform_data->dram);
+		dram = mv_mbus_dram_info();
+		if (dram)
+			mv_conf_mbus_windows(hpriv, dram);
 
 		/* initialize adapter */
 		ret = mv_init_host(host);
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 9a353c2216d0..e779b434af45 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1250,7 +1250,7 @@ static int __devinit mv_xor_probe(struct platform_device *pdev)
 
 static void
 mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
-			 struct mbus_dram_target_info *dram)
+			 const struct mbus_dram_target_info *dram)
 {
 	void __iomem *base = msp->xor_base;
 	u32 win_enable = 0;
@@ -1264,7 +1264,7 @@ mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
 	}
 
 	for (i = 0; i < dram->num_cs; i++) {
-		struct mbus_dram_window *cs = dram->cs + i;
+		const struct mbus_dram_window *cs = dram->cs + i;
 
 		writel((cs->base & 0xffff0000) |
 		       (cs->mbus_attr << 8) |
@@ -1290,7 +1290,7 @@ static struct platform_driver mv_xor_driver = {
 
 static int mv_xor_shared_probe(struct platform_device *pdev)
 {
-	struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
+	const struct mbus_dram_target_info *dram;
 	struct mv_xor_shared_private *msp;
 	struct resource *res;
 
@@ -1323,8 +1323,9 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 	/*
 	 * (Re-)program MBUS remapping windows if we are asked to.
 	 */
-	if (msd != NULL && msd->dram != NULL)
-		mv_xor_conf_mbus_windows(msp, msd->dram);
+	dram = mv_mbus_dram_info();
+	if (dram)
+		mv_xor_conf_mbus_windows(msp, dram);
 
 	return 0;
 }
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 211a4959c293..eeb8cd125b0c 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -679,8 +679,9 @@ static const struct mmc_host_ops mvsd_ops = {
 	.enable_sdio_irq	= mvsd_enable_sdio_irq,
 };
 
-static void __init mv_conf_mbus_windows(struct mvsd_host *host,
-					struct mbus_dram_target_info *dram)
+static void __init
+mv_conf_mbus_windows(struct mvsd_host *host,
+		     const struct mbus_dram_target_info *dram)
 {
 	void __iomem *iobase = host->base;
 	int i;
@@ -691,7 +692,7 @@ static void __init mv_conf_mbus_windows(struct mvsd_host *host,
 	}
 
 	for (i = 0; i < dram->num_cs; i++) {
-		struct mbus_dram_window *cs = dram->cs + i;
+		const struct mbus_dram_window *cs = dram->cs + i;
 		writel(((cs->size - 1) & 0xffff0000) |
 		       (cs->mbus_attr << 8) |
 		       (dram->mbus_dram_target_id << 4) | 1,
@@ -705,6 +706,7 @@ static int __init mvsd_probe(struct platform_device *pdev)
 	struct mmc_host *mmc = NULL;
 	struct mvsd_host *host = NULL;
 	const struct mvsdio_platform_data *mvsd_data;
+	const struct mbus_dram_target_info *dram;
 	struct resource *r;
 	int ret, irq;
 
@@ -755,8 +757,9 @@ static int __init mvsd_probe(struct platform_device *pdev)
 	}
 
 	/* (Re-)program MBUS remapping windows if we are asked to. */
-	if (mvsd_data->dram != NULL)
-		mv_conf_mbus_windows(host, mvsd_data->dram);
+	dram = mv_mbus_dram_info();
+	if (dram)
+		mv_conf_mbus_windows(host, dram);
 
 	mvsd_power_down(host);
 
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 194a03113802..e93be7954a19 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2509,7 +2509,7 @@ static void mv643xx_eth_netpoll(struct net_device *dev)
 /* platform glue ************************************************************/
 static void
 mv643xx_eth_conf_mbus_windows(struct mv643xx_eth_shared_private *msp,
-			      struct mbus_dram_target_info *dram)
+			      const struct mbus_dram_target_info *dram)
 {
 	void __iomem *base = msp->base;
 	u32 win_enable;
@@ -2527,7 +2527,7 @@ mv643xx_eth_conf_mbus_windows(struct mv643xx_eth_shared_private *msp,
 	win_protect = 0;
 
 	for (i = 0; i < dram->num_cs; i++) {
-		struct mbus_dram_window *cs = dram->cs + i;
+		const struct mbus_dram_window *cs = dram->cs + i;
 
 		writel((cs->base & 0xffff0000) |
 			(cs->mbus_attr << 8) |
@@ -2577,6 +2577,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 	static int mv643xx_eth_version_printed;
 	struct mv643xx_eth_shared_platform_data *pd = pdev->dev.platform_data;
 	struct mv643xx_eth_shared_private *msp;
+	const struct mbus_dram_target_info *dram;
 	struct resource *res;
 	int ret;
 
@@ -2641,8 +2642,9 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 	/*
 	 * (Re-)program MBUS remapping windows if we are asked to.
 	 */
-	if (pd != NULL && pd->dram != NULL)
-		mv643xx_eth_conf_mbus_windows(msp, pd->dram);
+	dram = mv_mbus_dram_info();
+	if (dram)
+		mv643xx_eth_conf_mbus_windows(msp, dram);
 
 	/*
 	 * Detect hardware parameters.
diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c
index a68a2a5c4b83..6c6a5a3b4ea7 100644
--- a/drivers/usb/host/ehci-orion.c
+++ b/drivers/usb/host/ehci-orion.c
@@ -172,7 +172,7 @@ static const struct hc_driver ehci_orion_hc_driver = {
 
 static void __init
 ehci_orion_conf_mbus_windows(struct usb_hcd *hcd,
-				struct mbus_dram_target_info *dram)
+			     const struct mbus_dram_target_info *dram)
 {
 	int i;
 
@@ -182,7 +182,7 @@ ehci_orion_conf_mbus_windows(struct usb_hcd *hcd,
 	}
 
 	for (i = 0; i < dram->num_cs; i++) {
-		struct mbus_dram_window *cs = dram->cs + i;
+		const struct mbus_dram_window *cs = dram->cs + i;
 
 		wrl(USB_WINDOW_CTRL(i), ((cs->size - 1) & 0xffff0000) |
 					(cs->mbus_attr << 8) |
@@ -194,6 +194,7 @@ ehci_orion_conf_mbus_windows(struct usb_hcd *hcd,
 static int __devinit ehci_orion_drv_probe(struct platform_device *pdev)
 {
 	struct orion_ehci_data *pd = pdev->dev.platform_data;
+	const struct mbus_dram_target_info *dram;
 	struct resource *res;
 	struct usb_hcd *hcd;
 	struct ehci_hcd *ehci;
@@ -259,8 +260,9 @@ static int __devinit ehci_orion_drv_probe(struct platform_device *pdev)
 	/*
 	 * (Re-)program MBUS remapping windows if we are asked to.
 	 */
-	if (pd != NULL && pd->dram != NULL)
-		ehci_orion_conf_mbus_windows(hcd, pd->dram);
+	dram = mv_mbus_dram_info();
+	if (dram)
+		ehci_orion_conf_mbus_windows(hcd, dram);
 
 	/*
 	 * setup Orion USB controller.
diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index c11ff2932549..efa1a6d7aca8 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -32,5 +32,16 @@ struct mbus_dram_target_info
 	} cs[4];
 };
 
-
+/*
+ * The Marvell mbus is to be found only on SOCs from the Orion family
+ * at the moment.  Provide a dummy stub for other architectures.
+ */
+#ifdef CONFIG_PLAT_ORION
+extern const struct mbus_dram_target_info *mv_mbus_dram_info(void);
+#else
+static inline const struct mbus_dram_target_info *mv_mbus_dram_info(void)
+{
+	return NULL;
+}
+#endif
 #endif
diff --git a/sound/soc/kirkwood/kirkwood-dma.c b/sound/soc/kirkwood/kirkwood-dma.c
index cd33de1c5b7a..df12e0993f5a 100644
--- a/sound/soc/kirkwood/kirkwood-dma.c
+++ b/sound/soc/kirkwood/kirkwood-dma.c
@@ -94,9 +94,10 @@ static irqreturn_t kirkwood_dma_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void kirkwood_dma_conf_mbus_windows(void __iomem *base, int win,
-					unsigned long dma,
-					struct mbus_dram_target_info *dram)
+static void
+kirkwood_dma_conf_mbus_windows(void __iomem *base, int win,
+			       unsigned long dma,
+			       const struct mbus_dram_target_info *dram)
 {
 	int i;
 
@@ -106,7 +107,7 @@ static void kirkwood_dma_conf_mbus_windows(void __iomem *base, int win,
 
 	/* try to find matching cs for current dma address */
 	for (i = 0; i < dram->num_cs; i++) {
-		struct mbus_dram_window *cs = dram->cs + i;
+		const struct mbus_dram_window *cs = dram->cs + i;
 		if ((cs->base & 0xffff0000) < (dma & 0xffff0000)) {
 			writel(cs->base & 0xffff0000,
 				base + KIRKWOOD_AUDIO_WIN_BASE_REG(win));
@@ -127,6 +128,7 @@ static int kirkwood_dma_open(struct snd_pcm_substream *substream)
 	struct snd_soc_dai *cpu_dai = soc_runtime->cpu_dai;
 	struct kirkwood_dma_data *priv;
 	struct kirkwood_dma_priv *prdata = snd_soc_platform_get_drvdata(platform);
+	const struct mbus_dram_target_info *dram;
 	unsigned long addr;
 
 	priv = snd_soc_dai_get_dma_data(cpu_dai, substream);
@@ -175,15 +177,16 @@ static int kirkwood_dma_open(struct snd_pcm_substream *substream)
 		writel((unsigned long)-1, priv->io + KIRKWOOD_ERR_MASK);
 	}
 
+	dram = mv_mbus_dram_info();
 	addr = virt_to_phys(substream->dma_buffer.area);
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		prdata->play_stream = substream;
 		kirkwood_dma_conf_mbus_windows(priv->io,
-			KIRKWOOD_PLAYBACK_WIN, addr, priv->dram);
+			KIRKWOOD_PLAYBACK_WIN, addr, dram);
 	} else {
 		prdata->rec_stream = substream;
 		kirkwood_dma_conf_mbus_windows(priv->io,
-			KIRKWOOD_RECORD_WIN, addr, priv->dram);
+			KIRKWOOD_RECORD_WIN, addr, dram);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From db33f4de9952af112b0d4f2436ce931ae632aba0 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 7 Dec 2011 21:48:08 +0100
Subject: ARM: Orion: Remove address map info from all platform data structures

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Michael Walle <michael@walle.cc>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 arch/arm/mach-dove/common.c                   | 15 ++++------
 arch/arm/mach-kirkwood/common.c               | 16 ++++------
 arch/arm/mach-mv78xx0/common.c                | 21 +++++--------
 arch/arm/mach-orion5x/common.c                | 15 ++++------
 arch/arm/plat-orion/common.c                  | 43 ++++++---------------------
 arch/arm/plat-orion/include/plat/audio.h      |  3 --
 arch/arm/plat-orion/include/plat/common.h     | 17 +++--------
 arch/arm/plat-orion/include/plat/ehci-orion.h |  1 -
 arch/arm/plat-orion/include/plat/mv_xor.h     |  6 ----
 arch/arm/plat-orion/include/plat/mvsdio.h     |  1 -
 include/linux/ata_platform.h                  |  3 --
 11 files changed, 36 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index 29c1881c8acb..29ff0d076f0a 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -14,7 +14,6 @@
 #include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/clk.h>
-#include <linux/mbus.h>
 #include <linux/ata_platform.h>
 #include <linux/gpio.h>
 #include <asm/page.h>
@@ -72,8 +71,7 @@ void __init dove_map_io(void)
  ****************************************************************************/
 void __init dove_ehci0_init(void)
 {
-	orion_ehci_init(&orion_mbus_dram_info,
-			DOVE_USB0_PHYS_BASE, IRQ_DOVE_USB0);
+	orion_ehci_init(DOVE_USB0_PHYS_BASE, IRQ_DOVE_USB0);
 }
 
 /*****************************************************************************
@@ -81,8 +79,7 @@ void __init dove_ehci0_init(void)
  ****************************************************************************/
 void __init dove_ehci1_init(void)
 {
-	orion_ehci_1_init(&orion_mbus_dram_info,
-			  DOVE_USB1_PHYS_BASE, IRQ_DOVE_USB1);
+	orion_ehci_1_init(DOVE_USB1_PHYS_BASE, IRQ_DOVE_USB1);
 }
 
 /*****************************************************************************
@@ -90,7 +87,7 @@ void __init dove_ehci1_init(void)
  ****************************************************************************/
 void __init dove_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 {
-	orion_ge00_init(eth_data, &orion_mbus_dram_info,
+	orion_ge00_init(eth_data,
 			DOVE_GE00_PHYS_BASE, IRQ_DOVE_GE00_SUM,
 			0, get_tclk());
 }
@@ -108,8 +105,7 @@ void __init dove_rtc_init(void)
  ****************************************************************************/
 void __init dove_sata_init(struct mv_sata_platform_data *sata_data)
 {
-	orion_sata_init(sata_data, &orion_mbus_dram_info,
-			DOVE_SATA_PHYS_BASE, IRQ_DOVE_SATA);
+	orion_sata_init(sata_data, DOVE_SATA_PHYS_BASE, IRQ_DOVE_SATA);
 
 }
 
@@ -199,8 +195,7 @@ struct sys_timer dove_timer = {
  ****************************************************************************/
 void __init dove_xor0_init(void)
 {
-	orion_xor0_init(&orion_mbus_dram_info,
-			DOVE_XOR0_PHYS_BASE, DOVE_XOR0_HIGH_PHYS_BASE,
+	orion_xor0_init(DOVE_XOR0_PHYS_BASE, DOVE_XOR0_HIGH_PHYS_BASE,
 			IRQ_DOVE_XOR_00, IRQ_DOVE_XOR_01);
 }
 
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index dd63fac9ed02..10566d4052d2 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/serial_8250.h>
-#include <linux/mbus.h>
 #include <linux/ata_platform.h>
 #include <linux/mtd/nand.h>
 #include <linux/dma-mapping.h>
@@ -74,8 +73,7 @@ unsigned int kirkwood_clk_ctrl = CGC_DUNIT | CGC_RESERVED;
 void __init kirkwood_ehci_init(void)
 {
 	kirkwood_clk_ctrl |= CGC_USB0;
-	orion_ehci_init(&orion_mbus_dram_info,
-			USB_PHYS_BASE, IRQ_KIRKWOOD_USB);
+	orion_ehci_init(USB_PHYS_BASE, IRQ_KIRKWOOD_USB);
 }
 
 
@@ -86,7 +84,7 @@ void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	kirkwood_clk_ctrl |= CGC_GE0;
 
-	orion_ge00_init(eth_data, &orion_mbus_dram_info,
+	orion_ge00_init(eth_data,
 			GE00_PHYS_BASE, IRQ_KIRKWOOD_GE00_SUM,
 			IRQ_KIRKWOOD_GE00_ERR, kirkwood_tclk);
 }
@@ -100,7 +98,7 @@ void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data)
 
 	kirkwood_clk_ctrl |= CGC_GE1;
 
-	orion_ge01_init(eth_data, &orion_mbus_dram_info,
+	orion_ge01_init(eth_data,
 			GE01_PHYS_BASE, IRQ_KIRKWOOD_GE01_SUM,
 			IRQ_KIRKWOOD_GE01_ERR, kirkwood_tclk);
 }
@@ -179,8 +177,7 @@ void __init kirkwood_sata_init(struct mv_sata_platform_data *sata_data)
 	if (sata_data->n_ports > 1)
 		kirkwood_clk_ctrl |= CGC_SATA1;
 
-	orion_sata_init(sata_data, &orion_mbus_dram_info,
-			SATA_PHYS_BASE, IRQ_KIRKWOOD_SATA);
+	orion_sata_init(sata_data, SATA_PHYS_BASE, IRQ_KIRKWOOD_SATA);
 }
 
 
@@ -222,7 +219,6 @@ void __init kirkwood_sdio_init(struct mvsdio_platform_data *mvsdio_data)
 		mvsdio_data->clock = 100000000;
 	else
 		mvsdio_data->clock = 200000000;
-	mvsdio_data->dram = &orion_mbus_dram_info;
 	kirkwood_clk_ctrl |= CGC_SDIO;
 	kirkwood_sdio.dev.platform_data = mvsdio_data;
 	platform_device_register(&kirkwood_sdio);
@@ -286,8 +282,7 @@ static void __init kirkwood_xor0_init(void)
 {
 	kirkwood_clk_ctrl |= CGC_XOR0;
 
-	orion_xor0_init(&orion_mbus_dram_info,
-			XOR0_PHYS_BASE, XOR0_HIGH_PHYS_BASE,
+	orion_xor0_init(XOR0_PHYS_BASE, XOR0_HIGH_PHYS_BASE,
 			IRQ_KIRKWOOD_XOR_00, IRQ_KIRKWOOD_XOR_01);
 }
 
@@ -365,7 +360,6 @@ static struct resource kirkwood_i2s_resources[] = {
 };
 
 static struct kirkwood_asoc_platform_data kirkwood_i2s_data = {
-	.dram        = &orion_mbus_dram_info,
 	.burst       = 128,
 };
 
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index be0c232ea0d8..534ce204e8b9 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/serial_8250.h>
-#include <linux/mbus.h>
 #include <linux/ata_platform.h>
 #include <linux/ethtool.h>
 #include <asm/mach/map.h>
@@ -170,8 +169,7 @@ void __init mv78xx0_map_io(void)
  ****************************************************************************/
 void __init mv78xx0_ehci0_init(void)
 {
-	orion_ehci_init(&orion_mbus_dram_info,
-			USB0_PHYS_BASE, IRQ_MV78XX0_USB_0);
+	orion_ehci_init(USB0_PHYS_BASE, IRQ_MV78XX0_USB_0);
 }
 
 
@@ -180,8 +178,7 @@ void __init mv78xx0_ehci0_init(void)
  ****************************************************************************/
 void __init mv78xx0_ehci1_init(void)
 {
-	orion_ehci_1_init(&orion_mbus_dram_info,
-			  USB1_PHYS_BASE, IRQ_MV78XX0_USB_1);
+	orion_ehci_1_init(USB1_PHYS_BASE, IRQ_MV78XX0_USB_1);
 }
 
 
@@ -190,8 +187,7 @@ void __init mv78xx0_ehci1_init(void)
  ****************************************************************************/
 void __init mv78xx0_ehci2_init(void)
 {
-	orion_ehci_2_init(&orion_mbus_dram_info,
-			  USB2_PHYS_BASE, IRQ_MV78XX0_USB_2);
+	orion_ehci_2_init(USB2_PHYS_BASE, IRQ_MV78XX0_USB_2);
 }
 
 
@@ -200,7 +196,7 @@ void __init mv78xx0_ehci2_init(void)
  ****************************************************************************/
 void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 {
-	orion_ge00_init(eth_data, &orion_mbus_dram_info,
+	orion_ge00_init(eth_data,
 			GE00_PHYS_BASE, IRQ_MV78XX0_GE00_SUM,
 			IRQ_MV78XX0_GE_ERR, get_tclk());
 }
@@ -211,7 +207,7 @@ void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data)
  ****************************************************************************/
 void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data)
 {
-	orion_ge01_init(eth_data, &orion_mbus_dram_info,
+	orion_ge01_init(eth_data,
 			GE01_PHYS_BASE, IRQ_MV78XX0_GE01_SUM,
 			NO_IRQ, get_tclk());
 }
@@ -235,7 +231,7 @@ void __init mv78xx0_ge10_init(struct mv643xx_eth_platform_data *eth_data)
 		eth_data->duplex = DUPLEX_FULL;
 	}
 
-	orion_ge10_init(eth_data, &orion_mbus_dram_info,
+	orion_ge10_init(eth_data,
 			GE10_PHYS_BASE, IRQ_MV78XX0_GE10_SUM,
 			NO_IRQ, get_tclk());
 }
@@ -259,7 +255,7 @@ void __init mv78xx0_ge11_init(struct mv643xx_eth_platform_data *eth_data)
 		eth_data->duplex = DUPLEX_FULL;
 	}
 
-	orion_ge11_init(eth_data, &orion_mbus_dram_info,
+	orion_ge11_init(eth_data,
 			GE11_PHYS_BASE, IRQ_MV78XX0_GE11_SUM,
 			NO_IRQ, get_tclk());
 }
@@ -278,8 +274,7 @@ void __init mv78xx0_i2c_init(void)
  ****************************************************************************/
 void __init mv78xx0_sata_init(struct mv_sata_platform_data *sata_data)
 {
-	orion_sata_init(sata_data, &orion_mbus_dram_info,
-			SATA_PHYS_BASE, IRQ_MV78XX0_SATA);
+	orion_sata_init(sata_data, SATA_PHYS_BASE, IRQ_MV78XX0_SATA);
 }
 
 
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index acac255f9a1b..79ef693d2c03 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -15,7 +15,6 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/serial_8250.h>
-#include <linux/mbus.h>
 #include <linux/mv643xx_i2c.h>
 #include <linux/ata_platform.h>
 #include <net/dsa.h>
@@ -72,8 +71,7 @@ void __init orion5x_map_io(void)
  ****************************************************************************/
 void __init orion5x_ehci0_init(void)
 {
-	orion_ehci_init(&orion_mbus_dram_info,
-			ORION5X_USB0_PHYS_BASE, IRQ_ORION5X_USB0_CTRL);
+	orion_ehci_init(ORION5X_USB0_PHYS_BASE, IRQ_ORION5X_USB0_CTRL);
 }
 
 
@@ -82,8 +80,7 @@ void __init orion5x_ehci0_init(void)
  ****************************************************************************/
 void __init orion5x_ehci1_init(void)
 {
-	orion_ehci_1_init(&orion_mbus_dram_info,
-			  ORION5X_USB1_PHYS_BASE, IRQ_ORION5X_USB1_CTRL);
+	orion_ehci_1_init(ORION5X_USB1_PHYS_BASE, IRQ_ORION5X_USB1_CTRL);
 }
 
 
@@ -92,7 +89,7 @@ void __init orion5x_ehci1_init(void)
  ****************************************************************************/
 void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data)
 {
-	orion_ge00_init(eth_data, &orion_mbus_dram_info,
+	orion_ge00_init(eth_data,
 			ORION5X_ETH_PHYS_BASE, IRQ_ORION5X_ETH_SUM,
 			IRQ_ORION5X_ETH_ERR, orion5x_tclk);
 }
@@ -122,8 +119,7 @@ void __init orion5x_i2c_init(void)
  ****************************************************************************/
 void __init orion5x_sata_init(struct mv_sata_platform_data *sata_data)
 {
-	orion_sata_init(sata_data, &orion_mbus_dram_info,
-			ORION5X_SATA_PHYS_BASE, IRQ_ORION5X_SATA);
+	orion_sata_init(sata_data, ORION5X_SATA_PHYS_BASE, IRQ_ORION5X_SATA);
 }
 
 
@@ -159,8 +155,7 @@ void __init orion5x_uart1_init(void)
  ****************************************************************************/
 void __init orion5x_xor_init(void)
 {
-	orion_xor0_init(&orion_mbus_dram_info,
-			ORION5X_XOR_PHYS_BASE,
+	orion_xor0_init(ORION5X_XOR_PHYS_BASE,
 			ORION5X_XOR_PHYS_BASE + 0x200,
 			IRQ_ORION5X_XOR0, IRQ_ORION5X_XOR1);
 }
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 9e5451b3c8e3..e5a2fde29b19 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -13,7 +13,6 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/serial_8250.h>
-#include <linux/mbus.h>
 #include <linux/ata_platform.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/mv643xx_i2c.h>
@@ -203,13 +202,12 @@ void __init orion_rtc_init(unsigned long mapbase,
  ****************************************************************************/
 static __init void ge_complete(
 	struct mv643xx_eth_shared_platform_data *orion_ge_shared_data,
-	struct mbus_dram_target_info *mbus_dram_info, int tclk,
+	int tclk,
 	struct resource *orion_ge_resource, unsigned long irq,
 	struct platform_device *orion_ge_shared,
 	struct mv643xx_eth_platform_data *eth_data,
 	struct platform_device *orion_ge)
 {
-	orion_ge_shared_data->dram = mbus_dram_info;
 	orion_ge_shared_data->t_clk = tclk;
 	orion_ge_resource->start = irq;
 	orion_ge_resource->end = irq;
@@ -259,7 +257,6 @@ static struct platform_device orion_ge00 = {
 };
 
 void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq,
 			    unsigned long irq_err,
@@ -267,7 +264,7 @@ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
 {
 	fill_resources(&orion_ge00_shared, orion_ge00_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
-	ge_complete(&orion_ge00_shared_data, mbus_dram_info, tclk,
+	ge_complete(&orion_ge00_shared_data, tclk,
 		    orion_ge00_resources, irq, &orion_ge00_shared,
 		    eth_data, &orion_ge00);
 }
@@ -313,7 +310,6 @@ static struct platform_device orion_ge01 = {
 };
 
 void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq,
 			    unsigned long irq_err,
@@ -321,7 +317,7 @@ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
 {
 	fill_resources(&orion_ge01_shared, orion_ge01_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
-	ge_complete(&orion_ge01_shared_data, mbus_dram_info, tclk,
+	ge_complete(&orion_ge01_shared_data, tclk,
 		    orion_ge01_resources, irq, &orion_ge01_shared,
 		    eth_data, &orion_ge01);
 }
@@ -367,7 +363,6 @@ static struct platform_device orion_ge10 = {
 };
 
 void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq,
 			    unsigned long irq_err,
@@ -375,7 +370,7 @@ void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data,
 {
 	fill_resources(&orion_ge10_shared, orion_ge10_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
-	ge_complete(&orion_ge10_shared_data, mbus_dram_info, tclk,
+	ge_complete(&orion_ge10_shared_data, tclk,
 		    orion_ge10_resources, irq, &orion_ge10_shared,
 		    eth_data, &orion_ge10);
 }
@@ -421,7 +416,6 @@ static struct platform_device orion_ge11 = {
 };
 
 void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq,
 			    unsigned long irq_err,
@@ -429,7 +423,7 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
 {
 	fill_resources(&orion_ge11_shared, orion_ge11_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
-	ge_complete(&orion_ge11_shared_data, mbus_dram_info, tclk,
+	ge_complete(&orion_ge11_shared_data, tclk,
 		    orion_ge11_resources, irq, &orion_ge11_shared,
 		    eth_data, &orion_ge11);
 }
@@ -592,8 +586,6 @@ void __init orion_wdt_init(unsigned long tclk)
 /*****************************************************************************
  * XOR
  ****************************************************************************/
-static struct mv_xor_platform_shared_data orion_xor_shared_data;
-
 static u64 orion_xor_dmamask = DMA_BIT_MASK(32);
 
 void __init orion_xor_init_channels(
@@ -632,9 +624,6 @@ static struct resource orion_xor0_shared_resources[] = {
 static struct platform_device orion_xor0_shared = {
 	.name		= MV_XOR_SHARED_NAME,
 	.id		= 0,
-	.dev		= {
-		.platform_data = &orion_xor_shared_data,
-	},
 	.num_resources	= ARRAY_SIZE(orion_xor0_shared_resources),
 	.resource	= orion_xor0_shared_resources,
 };
@@ -687,14 +676,11 @@ static struct platform_device orion_xor01_channel = {
 	},
 };
 
-void __init orion_xor0_init(struct mbus_dram_target_info *mbus_dram_info,
-			    unsigned long mapbase_low,
+void __init orion_xor0_init(unsigned long mapbase_low,
 			    unsigned long mapbase_high,
 			    unsigned long irq_0,
 			    unsigned long irq_1)
 {
-	orion_xor_shared_data.dram = mbus_dram_info;
-
 	orion_xor0_shared_resources[0].start = mapbase_low;
 	orion_xor0_shared_resources[0].end = mapbase_low + 0xff;
 	orion_xor0_shared_resources[1].start = mapbase_high;
@@ -727,9 +713,6 @@ static struct resource orion_xor1_shared_resources[] = {
 static struct platform_device orion_xor1_shared = {
 	.name		= MV_XOR_SHARED_NAME,
 	.id		= 1,
-	.dev		= {
-		.platform_data = &orion_xor_shared_data,
-	},
 	.num_resources	= ARRAY_SIZE(orion_xor1_shared_resources),
 	.resource	= orion_xor1_shared_resources,
 };
@@ -828,11 +811,9 @@ static struct platform_device orion_ehci = {
 	},
 };
 
-void __init orion_ehci_init(struct mbus_dram_target_info *mbus_dram_info,
-			    unsigned long mapbase,
+void __init orion_ehci_init(unsigned long mapbase,
 			    unsigned long irq)
 {
-	orion_ehci_data.dram = mbus_dram_info;
 	fill_resources(&orion_ehci, orion_ehci_resources, mapbase, SZ_4K - 1,
 		       irq);
 
@@ -854,11 +835,9 @@ static struct platform_device orion_ehci_1 = {
 	},
 };
 
-void __init orion_ehci_1_init(struct mbus_dram_target_info *mbus_dram_info,
-			      unsigned long mapbase,
+void __init orion_ehci_1_init(unsigned long mapbase,
 			      unsigned long irq)
 {
-	orion_ehci_data.dram = mbus_dram_info;
 	fill_resources(&orion_ehci_1, orion_ehci_1_resources,
 		       mapbase, SZ_4K - 1, irq);
 
@@ -880,11 +859,9 @@ static struct platform_device orion_ehci_2 = {
 	},
 };
 
-void __init orion_ehci_2_init(struct mbus_dram_target_info *mbus_dram_info,
-			      unsigned long mapbase,
+void __init orion_ehci_2_init(unsigned long mapbase,
 			      unsigned long irq)
 {
-	orion_ehci_data.dram = mbus_dram_info;
 	fill_resources(&orion_ehci_2, orion_ehci_2_resources,
 		       mapbase, SZ_4K - 1, irq);
 
@@ -911,11 +888,9 @@ static struct platform_device orion_sata = {
 };
 
 void __init orion_sata_init(struct mv_sata_platform_data *sata_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq)
 {
-	sata_data->dram = mbus_dram_info;
 	orion_sata.dev.platform_data = sata_data;
 	fill_resources(&orion_sata, orion_sata_resources,
 		       mapbase, 0x5000 - 1, irq);
diff --git a/arch/arm/plat-orion/include/plat/audio.h b/arch/arm/plat-orion/include/plat/audio.h
index 9cf1f781329b..885f8abd927b 100644
--- a/arch/arm/plat-orion/include/plat/audio.h
+++ b/arch/arm/plat-orion/include/plat/audio.h
@@ -1,11 +1,8 @@
 #ifndef __PLAT_AUDIO_H
 #define __PLAT_AUDIO_H
 
-#include <linux/mbus.h>
-
 struct kirkwood_asoc_platform_data {
 	u32 tclk;
-	struct mbus_dram_target_info *dram;
 	int burst;
 };
 #endif
diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h
index a63c357e2ab1..0fe08d77e835 100644
--- a/arch/arm/plat-orion/include/plat/common.h
+++ b/arch/arm/plat-orion/include/plat/common.h
@@ -37,28 +37,24 @@ void __init orion_rtc_init(unsigned long mapbase,
 			   unsigned long irq);
 
 void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq,
 			    unsigned long irq_err,
 			    int tclk);
 
 void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq,
 			    unsigned long irq_err,
 			    int tclk);
 
 void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq,
 			    unsigned long irq_err,
 			    int tclk);
 
 void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq,
 			    unsigned long irq_err,
@@ -82,8 +78,7 @@ void __init orion_spi_1_init(unsigned long mapbase,
 
 void __init orion_wdt_init(unsigned long tclk);
 
-void __init orion_xor0_init(struct mbus_dram_target_info *mbus_dram_info,
-			    unsigned long mapbase_low,
+void __init orion_xor0_init(unsigned long mapbase_low,
 			    unsigned long mapbase_high,
 			    unsigned long irq_0,
 			    unsigned long irq_1);
@@ -93,20 +88,16 @@ void __init orion_xor1_init(unsigned long mapbase_low,
 			    unsigned long irq_0,
 			    unsigned long irq_1);
 
-void __init orion_ehci_init(struct mbus_dram_target_info *mbus_dram_info,
-			    unsigned long mapbase,
+void __init orion_ehci_init(unsigned long mapbase,
 			    unsigned long irq);
 
-void __init orion_ehci_1_init(struct mbus_dram_target_info *mbus_dram_info,
-			      unsigned long mapbase,
+void __init orion_ehci_1_init(unsigned long mapbase,
 			      unsigned long irq);
 
-void __init orion_ehci_2_init(struct mbus_dram_target_info *mbus_dram_info,
-			      unsigned long mapbase,
+void __init orion_ehci_2_init(unsigned long mapbase,
 			      unsigned long irq);
 
 void __init orion_sata_init(struct mv_sata_platform_data *sata_data,
-			    struct mbus_dram_target_info *mbus_dram_info,
 			    unsigned long mapbase,
 			    unsigned long irq);
 
diff --git a/arch/arm/plat-orion/include/plat/ehci-orion.h b/arch/arm/plat-orion/include/plat/ehci-orion.h
index 4ec668e77460..6fc78e430420 100644
--- a/arch/arm/plat-orion/include/plat/ehci-orion.h
+++ b/arch/arm/plat-orion/include/plat/ehci-orion.h
@@ -19,7 +19,6 @@ enum orion_ehci_phy_ver {
 };
 
 struct orion_ehci_data {
-	struct mbus_dram_target_info	*dram;
 	enum orion_ehci_phy_ver phy_version;
 };
 
diff --git a/arch/arm/plat-orion/include/plat/mv_xor.h b/arch/arm/plat-orion/include/plat/mv_xor.h
index bd5f3bdb4ae3..2ba1f7d76eef 100644
--- a/arch/arm/plat-orion/include/plat/mv_xor.h
+++ b/arch/arm/plat-orion/include/plat/mv_xor.h
@@ -13,12 +13,6 @@
 #define MV_XOR_SHARED_NAME	"mv_xor_shared"
 #define MV_XOR_NAME		"mv_xor"
 
-struct mbus_dram_target_info;
-
-struct mv_xor_platform_shared_data {
-	struct mbus_dram_target_info	*dram;
-};
-
 struct mv_xor_platform_data {
 	struct platform_device		*shared;
 	int				hw_id;
diff --git a/arch/arm/plat-orion/include/plat/mvsdio.h b/arch/arm/plat-orion/include/plat/mvsdio.h
index 14ca88676002..1190efedcb94 100644
--- a/arch/arm/plat-orion/include/plat/mvsdio.h
+++ b/arch/arm/plat-orion/include/plat/mvsdio.h
@@ -12,7 +12,6 @@
 #include <linux/mbus.h>
 
 struct mvsdio_platform_data {
-	struct mbus_dram_target_info *dram;
 	unsigned int clock;
 	int gpio_card_detect;
 	int gpio_write_protect;
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index 9a26c83a2c9e..b856a2a590d9 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -27,10 +27,7 @@ extern int __devexit __pata_platform_remove(struct device *dev);
 /*
  * Marvell SATA private data
  */
-struct mbus_dram_target_info;
-
 struct mv_sata_platform_data {
-	struct mbus_dram_target_info	*dram;
 	int	n_ports; /* number of sata ports */
 };
 
-- 
cgit v1.2.3


From 175d6146738b3d04e1adcaa4a971a3b2b0dbd8af Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 28 Nov 2011 14:36:36 +0100
Subject: iommu/amd: Add invalid_ppr callback

This callback can be used to change the PRI response code
sent to a device when a PPR fault fails.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu_v2.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/amd-iommu.h    | 34 +++++++++++++++++++++++---
 2 files changed, 86 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index abdb8396f89a..fe812e2a0474 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -62,6 +62,7 @@ struct device_state {
 	struct iommu_domain *domain;
 	int pasid_levels;
 	int max_pasids;
+	amd_iommu_invalid_ppr_cb inv_ppr_cb;
 	spinlock_t lock;
 	wait_queue_head_t wq;
 };
@@ -505,10 +506,31 @@ static void do_fault(struct work_struct *work)
 	npages = get_user_pages(fault->state->task, fault->state->mm,
 				fault->address, 1, write, 0, &page, NULL);
 
-	if (npages == 1)
+	if (npages == 1) {
 		put_page(page);
-	else
+	} else if (fault->dev_state->inv_ppr_cb) {
+		int status;
+
+		status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
+						      fault->pasid,
+						      fault->address,
+						      fault->flags);
+		switch (status) {
+		case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
+			set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
+			break;
+		case AMD_IOMMU_INV_PRI_RSP_INVALID:
+			set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+			break;
+		case AMD_IOMMU_INV_PRI_RSP_FAIL:
+			set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
+			break;
+		default:
+			BUG();
+		}
+	} else {
 		set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+	}
 
 	finish_pri_tag(fault->dev_state, fault->state, fault->tag);
 
@@ -828,6 +850,37 @@ void amd_iommu_free_device(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL(amd_iommu_free_device);
 
+int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+				 amd_iommu_invalid_ppr_cb cb)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	u16 devid;
+	int ret;
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	devid = device_id(pdev);
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	ret = -EINVAL;
+	dev_state = state_table[devid];
+	if (dev_state == NULL)
+		goto out_unlock;
+
+	dev_state->inv_ppr_cb = cb;
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
+
 static int __init amd_iommu_v2_init(void)
 {
 	size_t state_table_size;
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 23e21e15dfab..06688c42167d 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -28,9 +28,6 @@ struct task_struct;
 struct pci_dev;
 
 extern int amd_iommu_detect(void);
-extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
-				struct task_struct *task);
-extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
 
 
 /**
@@ -91,6 +88,37 @@ extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
  */
 extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
 
+/**
+ * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
+ *				    PRI requests
+ * @pdev: The PCI device the call-back should be registered for
+ * @cb: The call-back function
+ *
+ * The IOMMUv2 driver invokes this call-back when it is unable to
+ * successfully handle a PRI request. The device driver can then decide
+ * which PRI response the device should see. Possible return values for
+ * the call-back are:
+ *
+ * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device
+ * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device
+ * - AMD_IOMMU_INV_PRI_RSP_FAIL    - Send Failure back to the device,
+ *				     the device is required to disable
+ *				     PRI when it receives this response
+ *
+ * The function returns 0 on success or negative value on error.
+ */
+#define AMD_IOMMU_INV_PRI_RSP_SUCCESS	0
+#define AMD_IOMMU_INV_PRI_RSP_INVALID	1
+#define AMD_IOMMU_INV_PRI_RSP_FAIL	2
+
+typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
+					int pasid,
+					unsigned long address,
+					u16);
+
+extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+					amd_iommu_invalid_ppr_cb cb);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From 84c99db879314d58e0064f02b481f668f45d0070 Mon Sep 17 00:00:00 2001
From: Ashish Jangam <ashish.jangam@kpitcummins.com>
Date: Mon, 12 Dec 2011 20:06:56 +0530
Subject: MFD: DA9052/53 MFD core module

The DA9052/53 is a highly integrated PMIC subsystem with supply domain
flexibility to support wide range of high performance application.

It provides voltage regulators, GPIO controller, Touch Screen, RTC, Battery
control and other functionality.

This patch is functionally tested on Samsung SMDKV6410.

Signed-off-by: David Dajun Chen <dchen@diasemi.com>
Signed-off-by: Ashish Jangam <ashish.jangam@kpitcummins.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/Kconfig               |  16 +
 drivers/mfd/Makefile              |   4 +
 drivers/mfd/da9052-core.c         | 690 +++++++++++++++++++++++++++++++++++
 drivers/mfd/da9052-i2c.c          | 140 +++++++
 include/linux/mfd/da9052/da9052.h | 129 +++++++
 include/linux/mfd/da9052/pdata.h  |  40 ++
 include/linux/mfd/da9052/reg.h    | 749 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 1768 insertions(+)
 create mode 100644 drivers/mfd/da9052-core.c
 create mode 100644 drivers/mfd/da9052-i2c.c
 create mode 100644 include/linux/mfd/da9052/da9052.h
 create mode 100644 include/linux/mfd/da9052/pdata.h
 create mode 100644 include/linux/mfd/da9052/reg.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f1391c21ef26..baced42c8572 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -328,6 +328,22 @@ config PMIC_DA903X
 	  individual components like LCD backlight, voltage regulators,
 	  LEDs and battery-charger under the corresponding menus.
 
+config PMIC_DA9052
+	bool
+	select MFD_CORE
+
+config MFD_DA9052_I2C
+	bool "Support Dialog Semiconductor DA9052/53 PMIC variants with I2C"
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	select PMIC_DA9052
+	depends on I2C=y
+	help
+	  Support for the Dialog Semiconductor DA9052 PMIC
+	  when controlled using I2C. This driver provides common support
+	  for accessing the device, additional drivers must be enabled in
+	  order to use the functionality of the device.
+
 config PMIC_ADP5520
 	bool "Analog Devices ADP5520/01 MFD PMIC Core Support"
 	depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index b2292eb75242..484f209f41e4 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -67,6 +67,10 @@ endif
 obj-$(CONFIG_UCB1400_CORE)	+= ucb1400_core.o
 
 obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
+
+obj-$(CONFIG_PMIC_DA9052)	+= da9052-core.o
+obj-$(CONFIG_MFD_DA9052_I2C)	+= da9052-i2c.o
+
 max8925-objs			:= max8925-core.o max8925-i2c.o
 obj-$(CONFIG_MFD_MAX8925)	+= max8925.o
 obj-$(CONFIG_MFD_MAX8997)	+= max8997.o max8997-irq.o
diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
new file mode 100644
index 000000000000..a7c115ca56c8
--- /dev/null
+++ b/drivers/mfd/da9052-core.c
@@ -0,0 +1,690 @@
+/*
+ * Device access for Dialog DA9052 PMICs.
+ *
+ * Copyright(c) 2011 Dialog Semiconductor Ltd.
+ *
+ * Author: David Dajun Chen <dchen@diasemi.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mutex.h>
+#include <linux/mfd/core.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <linux/mfd/da9052/da9052.h>
+#include <linux/mfd/da9052/pdata.h>
+#include <linux/mfd/da9052/reg.h>
+
+#define DA9052_NUM_IRQ_REGS		4
+#define DA9052_IRQ_MASK_POS_1		0x01
+#define DA9052_IRQ_MASK_POS_2		0x02
+#define DA9052_IRQ_MASK_POS_3		0x04
+#define DA9052_IRQ_MASK_POS_4		0x08
+#define DA9052_IRQ_MASK_POS_5		0x10
+#define DA9052_IRQ_MASK_POS_6		0x20
+#define DA9052_IRQ_MASK_POS_7		0x40
+#define DA9052_IRQ_MASK_POS_8		0x80
+
+static bool da9052_reg_readable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case DA9052_PAGE0_CON_REG:
+	case DA9052_STATUS_A_REG:
+	case DA9052_STATUS_B_REG:
+	case DA9052_STATUS_C_REG:
+	case DA9052_STATUS_D_REG:
+	case DA9052_EVENT_A_REG:
+	case DA9052_EVENT_B_REG:
+	case DA9052_EVENT_C_REG:
+	case DA9052_EVENT_D_REG:
+	case DA9052_FAULTLOG_REG:
+	case DA9052_IRQ_MASK_A_REG:
+	case DA9052_IRQ_MASK_B_REG:
+	case DA9052_IRQ_MASK_C_REG:
+	case DA9052_IRQ_MASK_D_REG:
+	case DA9052_CONTROL_A_REG:
+	case DA9052_CONTROL_B_REG:
+	case DA9052_CONTROL_C_REG:
+	case DA9052_CONTROL_D_REG:
+	case DA9052_PDDIS_REG:
+	case DA9052_INTERFACE_REG:
+	case DA9052_RESET_REG:
+	case DA9052_GPIO_0_1_REG:
+	case DA9052_GPIO_2_3_REG:
+	case DA9052_GPIO_4_5_REG:
+	case DA9052_GPIO_6_7_REG:
+	case DA9052_GPIO_14_15_REG:
+	case DA9052_ID_0_1_REG:
+	case DA9052_ID_2_3_REG:
+	case DA9052_ID_4_5_REG:
+	case DA9052_ID_6_7_REG:
+	case DA9052_ID_8_9_REG:
+	case DA9052_ID_10_11_REG:
+	case DA9052_ID_12_13_REG:
+	case DA9052_ID_14_15_REG:
+	case DA9052_ID_16_17_REG:
+	case DA9052_ID_18_19_REG:
+	case DA9052_ID_20_21_REG:
+	case DA9052_SEQ_STATUS_REG:
+	case DA9052_SEQ_A_REG:
+	case DA9052_SEQ_B_REG:
+	case DA9052_SEQ_TIMER_REG:
+	case DA9052_BUCKA_REG:
+	case DA9052_BUCKB_REG:
+	case DA9052_BUCKCORE_REG:
+	case DA9052_BUCKPRO_REG:
+	case DA9052_BUCKMEM_REG:
+	case DA9052_BUCKPERI_REG:
+	case DA9052_LDO1_REG:
+	case DA9052_LDO2_REG:
+	case DA9052_LDO3_REG:
+	case DA9052_LDO4_REG:
+	case DA9052_LDO5_REG:
+	case DA9052_LDO6_REG:
+	case DA9052_LDO7_REG:
+	case DA9052_LDO8_REG:
+	case DA9052_LDO9_REG:
+	case DA9052_LDO10_REG:
+	case DA9052_SUPPLY_REG:
+	case DA9052_PULLDOWN_REG:
+	case DA9052_CHGBUCK_REG:
+	case DA9052_WAITCONT_REG:
+	case DA9052_ISET_REG:
+	case DA9052_BATCHG_REG:
+	case DA9052_CHG_CONT_REG:
+	case DA9052_INPUT_CONT_REG:
+	case DA9052_CHG_TIME_REG:
+	case DA9052_BBAT_CONT_REG:
+	case DA9052_BOOST_REG:
+	case DA9052_LED_CONT_REG:
+	case DA9052_LEDMIN123_REG:
+	case DA9052_LED1_CONF_REG:
+	case DA9052_LED2_CONF_REG:
+	case DA9052_LED3_CONF_REG:
+	case DA9052_LED1CONT_REG:
+	case DA9052_LED2CONT_REG:
+	case DA9052_LED3CONT_REG:
+	case DA9052_LED_CONT_4_REG:
+	case DA9052_LED_CONT_5_REG:
+	case DA9052_ADC_MAN_REG:
+	case DA9052_ADC_CONT_REG:
+	case DA9052_ADC_RES_L_REG:
+	case DA9052_ADC_RES_H_REG:
+	case DA9052_VDD_RES_REG:
+	case DA9052_VDD_MON_REG:
+	case DA9052_ICHG_AV_REG:
+	case DA9052_ICHG_THD_REG:
+	case DA9052_ICHG_END_REG:
+	case DA9052_TBAT_RES_REG:
+	case DA9052_TBAT_HIGHP_REG:
+	case DA9052_TBAT_HIGHN_REG:
+	case DA9052_TBAT_LOW_REG:
+	case DA9052_T_OFFSET_REG:
+	case DA9052_ADCIN4_RES_REG:
+	case DA9052_AUTO4_HIGH_REG:
+	case DA9052_AUTO4_LOW_REG:
+	case DA9052_ADCIN5_RES_REG:
+	case DA9052_AUTO5_HIGH_REG:
+	case DA9052_AUTO5_LOW_REG:
+	case DA9052_ADCIN6_RES_REG:
+	case DA9052_AUTO6_HIGH_REG:
+	case DA9052_AUTO6_LOW_REG:
+	case DA9052_TJUNC_RES_REG:
+	case DA9052_TSI_CONT_A_REG:
+	case DA9052_TSI_CONT_B_REG:
+	case DA9052_TSI_X_MSB_REG:
+	case DA9052_TSI_Y_MSB_REG:
+	case DA9052_TSI_LSB_REG:
+	case DA9052_TSI_Z_MSB_REG:
+	case DA9052_COUNT_S_REG:
+	case DA9052_COUNT_MI_REG:
+	case DA9052_COUNT_H_REG:
+	case DA9052_COUNT_D_REG:
+	case DA9052_COUNT_MO_REG:
+	case DA9052_COUNT_Y_REG:
+	case DA9052_ALARM_MI_REG:
+	case DA9052_ALARM_H_REG:
+	case DA9052_ALARM_D_REG:
+	case DA9052_ALARM_MO_REG:
+	case DA9052_ALARM_Y_REG:
+	case DA9052_SECOND_A_REG:
+	case DA9052_SECOND_B_REG:
+	case DA9052_SECOND_C_REG:
+	case DA9052_SECOND_D_REG:
+	case DA9052_PAGE1_CON_REG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool da9052_reg_writeable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case DA9052_PAGE0_CON_REG:
+	case DA9052_IRQ_MASK_A_REG:
+	case DA9052_IRQ_MASK_B_REG:
+	case DA9052_IRQ_MASK_C_REG:
+	case DA9052_IRQ_MASK_D_REG:
+	case DA9052_CONTROL_A_REG:
+	case DA9052_CONTROL_B_REG:
+	case DA9052_CONTROL_C_REG:
+	case DA9052_CONTROL_D_REG:
+	case DA9052_PDDIS_REG:
+	case DA9052_RESET_REG:
+	case DA9052_GPIO_0_1_REG:
+	case DA9052_GPIO_2_3_REG:
+	case DA9052_GPIO_4_5_REG:
+	case DA9052_GPIO_6_7_REG:
+	case DA9052_GPIO_14_15_REG:
+	case DA9052_ID_0_1_REG:
+	case DA9052_ID_2_3_REG:
+	case DA9052_ID_4_5_REG:
+	case DA9052_ID_6_7_REG:
+	case DA9052_ID_8_9_REG:
+	case DA9052_ID_10_11_REG:
+	case DA9052_ID_12_13_REG:
+	case DA9052_ID_14_15_REG:
+	case DA9052_ID_16_17_REG:
+	case DA9052_ID_18_19_REG:
+	case DA9052_ID_20_21_REG:
+	case DA9052_SEQ_STATUS_REG:
+	case DA9052_SEQ_A_REG:
+	case DA9052_SEQ_B_REG:
+	case DA9052_SEQ_TIMER_REG:
+	case DA9052_BUCKA_REG:
+	case DA9052_BUCKB_REG:
+	case DA9052_BUCKCORE_REG:
+	case DA9052_BUCKPRO_REG:
+	case DA9052_BUCKMEM_REG:
+	case DA9052_BUCKPERI_REG:
+	case DA9052_LDO1_REG:
+	case DA9052_LDO2_REG:
+	case DA9052_LDO3_REG:
+	case DA9052_LDO4_REG:
+	case DA9052_LDO5_REG:
+	case DA9052_LDO6_REG:
+	case DA9052_LDO7_REG:
+	case DA9052_LDO8_REG:
+	case DA9052_LDO9_REG:
+	case DA9052_LDO10_REG:
+	case DA9052_SUPPLY_REG:
+	case DA9052_PULLDOWN_REG:
+	case DA9052_CHGBUCK_REG:
+	case DA9052_WAITCONT_REG:
+	case DA9052_ISET_REG:
+	case DA9052_BATCHG_REG:
+	case DA9052_CHG_CONT_REG:
+	case DA9052_INPUT_CONT_REG:
+	case DA9052_BBAT_CONT_REG:
+	case DA9052_BOOST_REG:
+	case DA9052_LED_CONT_REG:
+	case DA9052_LEDMIN123_REG:
+	case DA9052_LED1_CONF_REG:
+	case DA9052_LED2_CONF_REG:
+	case DA9052_LED3_CONF_REG:
+	case DA9052_LED1CONT_REG:
+	case DA9052_LED2CONT_REG:
+	case DA9052_LED3CONT_REG:
+	case DA9052_LED_CONT_4_REG:
+	case DA9052_LED_CONT_5_REG:
+	case DA9052_ADC_MAN_REG:
+	case DA9052_ADC_CONT_REG:
+	case DA9052_ADC_RES_L_REG:
+	case DA9052_ADC_RES_H_REG:
+	case DA9052_VDD_RES_REG:
+	case DA9052_VDD_MON_REG:
+	case DA9052_ICHG_THD_REG:
+	case DA9052_ICHG_END_REG:
+	case DA9052_TBAT_HIGHP_REG:
+	case DA9052_TBAT_HIGHN_REG:
+	case DA9052_TBAT_LOW_REG:
+	case DA9052_T_OFFSET_REG:
+	case DA9052_AUTO4_HIGH_REG:
+	case DA9052_AUTO4_LOW_REG:
+	case DA9052_AUTO5_HIGH_REG:
+	case DA9052_AUTO5_LOW_REG:
+	case DA9052_AUTO6_HIGH_REG:
+	case DA9052_AUTO6_LOW_REG:
+	case DA9052_TSI_CONT_A_REG:
+	case DA9052_TSI_CONT_B_REG:
+	case DA9052_COUNT_S_REG:
+	case DA9052_COUNT_MI_REG:
+	case DA9052_COUNT_H_REG:
+	case DA9052_COUNT_D_REG:
+	case DA9052_COUNT_MO_REG:
+	case DA9052_COUNT_Y_REG:
+	case DA9052_ALARM_MI_REG:
+	case DA9052_ALARM_H_REG:
+	case DA9052_ALARM_D_REG:
+	case DA9052_ALARM_MO_REG:
+	case DA9052_ALARM_Y_REG:
+	case DA9052_PAGE1_CON_REG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool da9052_reg_volatile(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case DA9052_STATUS_A_REG:
+	case DA9052_STATUS_B_REG:
+	case DA9052_STATUS_C_REG:
+	case DA9052_STATUS_D_REG:
+	case DA9052_EVENT_A_REG:
+	case DA9052_EVENT_B_REG:
+	case DA9052_EVENT_C_REG:
+	case DA9052_EVENT_D_REG:
+	case DA9052_FAULTLOG_REG:
+	case DA9052_CHG_TIME_REG:
+	case DA9052_ADC_RES_L_REG:
+	case DA9052_ADC_RES_H_REG:
+	case DA9052_VDD_RES_REG:
+	case DA9052_ICHG_AV_REG:
+	case DA9052_TBAT_RES_REG:
+	case DA9052_ADCIN4_RES_REG:
+	case DA9052_ADCIN5_RES_REG:
+	case DA9052_ADCIN6_RES_REG:
+	case DA9052_TJUNC_RES_REG:
+	case DA9052_TSI_X_MSB_REG:
+	case DA9052_TSI_Y_MSB_REG:
+	case DA9052_TSI_LSB_REG:
+	case DA9052_TSI_Z_MSB_REG:
+	case DA9052_COUNT_S_REG:
+	case DA9052_COUNT_MI_REG:
+	case DA9052_COUNT_H_REG:
+	case DA9052_COUNT_D_REG:
+	case DA9052_COUNT_MO_REG:
+	case DA9052_COUNT_Y_REG:
+	case DA9052_ALARM_MI_REG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static struct resource da9052_rtc_resource = {
+	.name = "ALM",
+	.start = DA9052_IRQ_ALARM,
+	.end   = DA9052_IRQ_ALARM,
+	.flags = IORESOURCE_IRQ,
+};
+
+static struct resource da9052_onkey_resource = {
+	.name = "ONKEY",
+	.start = DA9052_IRQ_NONKEY,
+	.end   = DA9052_IRQ_NONKEY,
+	.flags = IORESOURCE_IRQ,
+};
+
+static struct resource da9052_bat_resources[] = {
+	{
+		.name = "BATT TEMP",
+		.start = DA9052_IRQ_TBAT,
+		.end   = DA9052_IRQ_TBAT,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "DCIN DET",
+		.start = DA9052_IRQ_DCIN,
+		.end   = DA9052_IRQ_DCIN,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "DCIN REM",
+		.start = DA9052_IRQ_DCINREM,
+		.end   = DA9052_IRQ_DCINREM,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "VBUS DET",
+		.start = DA9052_IRQ_VBUS,
+		.end   = DA9052_IRQ_VBUS,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "VBUS REM",
+		.start = DA9052_IRQ_VBUSREM,
+		.end   = DA9052_IRQ_VBUSREM,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "CHG END",
+		.start = DA9052_IRQ_CHGEND,
+		.end   = DA9052_IRQ_CHGEND,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource da9052_tsi_resources[] = {
+	{
+		.name = "PENDWN",
+		.start = DA9052_IRQ_PENDOWN,
+		.end   = DA9052_IRQ_PENDOWN,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "TSIRDY",
+		.start = DA9052_IRQ_TSIREADY,
+		.end   = DA9052_IRQ_TSIREADY,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell __initdata da9052_subdev_info[] = {
+	{
+		.name = "da9052-regulator",
+		.id = 1,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 2,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 3,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 4,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 5,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 6,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 7,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 8,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 9,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 10,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 11,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 12,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 13,
+	},
+	{
+		.name = "da9052-regulator",
+		.id = 14,
+	},
+	{
+		.name = "da9052-onkey",
+		.resources = &da9052_onkey_resource,
+		.num_resources = 1,
+	},
+	{
+		.name = "da9052-rtc",
+		.resources = &da9052_rtc_resource,
+		.num_resources = 1,
+	},
+	{
+		.name = "da9052-gpio",
+	},
+	{
+		.name = "da9052-hwmon",
+	},
+	{
+		.name = "da9052-leds",
+	},
+	{
+		.name = "da9052-wled1",
+	},
+	{
+		.name = "da9052-wled2",
+	},
+	{
+		.name = "da9052-wled3",
+	},
+	{
+		.name = "da9052-tsi",
+		.resources = da9052_tsi_resources,
+		.num_resources = ARRAY_SIZE(da9052_tsi_resources),
+	},
+	{
+		.name = "da9052-bat",
+		.resources = da9052_bat_resources,
+		.num_resources = ARRAY_SIZE(da9052_bat_resources),
+	},
+	{
+		.name = "da9052-watchdog",
+	},
+};
+
+static struct regmap_irq da9052_irqs[] = {
+	[DA9052_IRQ_DCIN] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_VBUS] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_DCINREM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_VBUSREM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_VDDLOW] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_ALARM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_SEQRDY] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_COMP1V2] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_NONKEY] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_IDFLOAT] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_IDGND] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_CHGEND] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_TBAT] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_ADC_EOM] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_PENDOWN] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_TSIREADY] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_GPI0] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_GPI1] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_GPI2] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_GPI3] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_GPI4] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_GPI5] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_GPI6] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_GPI7] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_GPI8] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_GPI9] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_GPI10] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_GPI11] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_GPI12] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_GPI13] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_GPI14] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_GPI15] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+};
+
+static struct regmap_irq_chip da9052_regmap_irq_chip = {
+	.name = "da9052_irq",
+	.status_base = DA9052_EVENT_A_REG,
+	.mask_base = DA9052_IRQ_MASK_A_REG,
+	.ack_base = DA9052_EVENT_A_REG,
+	.num_regs = DA9052_NUM_IRQ_REGS,
+	.irqs = da9052_irqs,
+	.num_irqs = ARRAY_SIZE(da9052_irqs),
+};
+
+struct regmap_config da9052_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.cache_type = REGCACHE_RBTREE,
+
+	.max_register = DA9052_PAGE1_CON_REG,
+	.readable_reg = da9052_reg_readable,
+	.writeable_reg = da9052_reg_writeable,
+	.volatile_reg = da9052_reg_volatile,
+};
+EXPORT_SYMBOL_GPL(da9052_regmap_config);
+
+int da9052_device_init(struct da9052 *da9052, u8 chip_id)
+{
+	struct da9052_pdata *pdata = da9052->dev->platform_data;
+	struct irq_desc *desc;
+	int ret;
+
+	mutex_init(&da9052->io_lock);
+
+	if (pdata && pdata->init != NULL)
+		pdata->init(da9052);
+
+	da9052->chip_id = chip_id;
+
+	if (!pdata || !pdata->irq_base)
+		da9052->irq_base = -1;
+	else
+		da9052->irq_base = pdata->irq_base;
+
+	ret = regmap_add_irq_chip(da9052->regmap, da9052->chip_irq,
+				  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				  da9052->irq_base, &da9052_regmap_irq_chip,
+				  NULL);
+	if (ret < 0)
+		goto regmap_err;
+
+	desc = irq_to_desc(da9052->chip_irq);
+	da9052->irq_base = regmap_irq_chip_get_base(desc->action->dev_id);
+
+	ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info,
+			      ARRAY_SIZE(da9052_subdev_info), NULL, 0);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	mfd_remove_devices(da9052->dev);
+regmap_err:
+	return ret;
+}
+
+void da9052_device_exit(struct da9052 *da9052)
+{
+	regmap_del_irq_chip(da9052->chip_irq,
+			    irq_get_irq_data(da9052->irq_base)->chip_data);
+	mfd_remove_devices(da9052->dev);
+}
+
+MODULE_AUTHOR("David Dajun Chen <dchen@diasemi.com>");
+MODULE_DESCRIPTION("DA9052 MFD Core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/da9052-i2c.c b/drivers/mfd/da9052-i2c.c
new file mode 100644
index 000000000000..44b97c70a61f
--- /dev/null
+++ b/drivers/mfd/da9052-i2c.c
@@ -0,0 +1,140 @@
+/*
+ * I2C access for DA9052 PMICs.
+ *
+ * Copyright(c) 2011 Dialog Semiconductor Ltd.
+ *
+ * Author: David Dajun Chen <dchen@diasemi.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/mfd/core.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+
+#include <linux/mfd/da9052/da9052.h>
+#include <linux/mfd/da9052/reg.h>
+
+static int da9052_i2c_enable_multiwrite(struct da9052 *da9052)
+{
+	int reg_val, ret;
+
+	ret = regmap_read(da9052->regmap, DA9052_CONTROL_B_REG, &reg_val);
+	if (ret < 0)
+		return ret;
+
+	if (reg_val & DA9052_CONTROL_B_WRITEMODE) {
+		reg_val &= ~DA9052_CONTROL_B_WRITEMODE;
+		ret = regmap_write(da9052->regmap, DA9052_CONTROL_B_REG,
+				   reg_val);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int __devinit da9052_i2c_probe(struct i2c_client *client,
+				       const struct i2c_device_id *id)
+{
+	struct da9052 *da9052;
+	int ret;
+
+	da9052 = kzalloc(sizeof(struct da9052), GFP_KERNEL);
+	if (!da9052)
+		return -ENOMEM;
+
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_info(&client->dev, "Error in %s:i2c_check_functionality\n",
+			 __func__);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	da9052->dev = &client->dev;
+	da9052->chip_irq = client->irq;
+
+	i2c_set_clientdata(client, da9052);
+
+	da9052->regmap = regmap_init_i2c(client, &da9052_regmap_config);
+	if (IS_ERR(da9052->regmap)) {
+		ret = PTR_ERR(da9052->regmap);
+		dev_err(&client->dev, "Failed to allocate register map: %d\n",
+			ret);
+		goto err;
+	}
+
+	ret = da9052_i2c_enable_multiwrite(da9052);
+	if (ret < 0)
+		goto err;
+
+	ret = da9052_device_init(da9052, id->driver_data);
+	if (ret != 0)
+		goto err;
+
+	return 0;
+
+err:
+	kfree(da9052);
+	return ret;
+}
+
+static int da9052_i2c_remove(struct i2c_client *client)
+{
+	struct da9052 *da9052 = i2c_get_clientdata(client);
+
+	da9052_device_exit(da9052);
+	kfree(da9052);
+
+	return 0;
+}
+
+static struct i2c_device_id da9052_i2c_id[] = {
+	{"da9052", DA9052},
+	{"da9053-aa", DA9053_AA},
+	{"da9053-ba", DA9053_BA},
+	{"da9053-bb", DA9053_BB},
+	{}
+};
+
+static struct i2c_driver da9052_i2c_driver = {
+	.probe = da9052_i2c_probe,
+	.remove = da9052_i2c_remove,
+	.id_table = da9052_i2c_id,
+	.driver = {
+		.name = "da9052",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init da9052_i2c_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&da9052_i2c_driver);
+	if (ret != 0) {
+		pr_err("DA9052 I2C registration failed %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+subsys_initcall(da9052_i2c_init);
+
+static void __exit da9052_i2c_exit(void)
+{
+	i2c_del_driver(&da9052_i2c_driver);
+}
+module_exit(da9052_i2c_exit);
+
+MODULE_AUTHOR("David Dajun Chen <dchen@diasemi.com>");
+MODULE_DESCRIPTION("I2C driver for Dialog DA9052 PMIC");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
new file mode 100644
index 000000000000..c8899ab20549
--- /dev/null
+++ b/include/linux/mfd/da9052/da9052.h
@@ -0,0 +1,129 @@
+/*
+ * da9052 declarations for DA9052 PMICs.
+ *
+ * Copyright(c) 2011 Dialog Semiconductor Ltd.
+ *
+ * Author: David Dajun Chen <dchen@diasemi.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __MFD_DA9052_DA9052_H
+#define __MFD_DA9052_DA9052_H
+
+#include <linux/interrupt.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+#include <linux/mfd/core.h>
+
+#include <linux/mfd/da9052/reg.h>
+
+#define DA9052_IRQ_DCIN	0
+#define DA9052_IRQ_VBUS	1
+#define DA9052_IRQ_DCINREM	2
+#define DA9052_IRQ_VBUSREM	3
+#define DA9052_IRQ_VDDLOW	4
+#define DA9052_IRQ_ALARM	5
+#define DA9052_IRQ_SEQRDY	6
+#define DA9052_IRQ_COMP1V2	7
+#define DA9052_IRQ_NONKEY	8
+#define DA9052_IRQ_IDFLOAT	9
+#define DA9052_IRQ_IDGND	10
+#define DA9052_IRQ_CHGEND	11
+#define DA9052_IRQ_TBAT	12
+#define DA9052_IRQ_ADC_EOM	13
+#define DA9052_IRQ_PENDOWN	14
+#define DA9052_IRQ_TSIREADY	15
+#define DA9052_IRQ_GPI0	16
+#define DA9052_IRQ_GPI1	17
+#define DA9052_IRQ_GPI2	18
+#define DA9052_IRQ_GPI3	19
+#define DA9052_IRQ_GPI4	20
+#define DA9052_IRQ_GPI5	21
+#define DA9052_IRQ_GPI6	22
+#define DA9052_IRQ_GPI7	23
+#define DA9052_IRQ_GPI8	24
+#define DA9052_IRQ_GPI9	25
+#define DA9052_IRQ_GPI10	26
+#define DA9052_IRQ_GPI11	27
+#define DA9052_IRQ_GPI12	28
+#define DA9052_IRQ_GPI13	29
+#define DA9052_IRQ_GPI14	30
+#define DA9052_IRQ_GPI15	31
+
+enum da9052_chip_id {
+	DA9052,
+	DA9053_AA,
+	DA9053_BA,
+	DA9053_BB,
+};
+
+struct da9052_pdata;
+
+struct da9052 {
+	struct mutex io_lock;
+
+	struct device *dev;
+	struct regmap *regmap;
+
+	int irq_base;
+	u8 chip_id;
+
+	int chip_irq;
+};
+
+/* Device I/O API */
+static inline int da9052_reg_read(struct da9052 *da9052, unsigned char reg)
+{
+	int val, ret;
+
+	ret = regmap_read(da9052->regmap, reg, &val);
+	if (ret < 0)
+		return ret;
+	return val;
+}
+
+static inline int da9052_reg_write(struct da9052 *da9052, unsigned char reg,
+				    unsigned char val)
+{
+	return regmap_write(da9052->regmap, reg, val);
+}
+
+static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg,
+				     unsigned reg_cnt, unsigned char *val)
+{
+	return regmap_bulk_read(da9052->regmap, reg, val, reg_cnt);
+}
+
+static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg,
+				      unsigned reg_cnt, unsigned char *val)
+{
+	return regmap_raw_write(da9052->regmap, reg, val, reg_cnt);
+}
+
+static inline int da9052_reg_update(struct da9052 *da9052, unsigned char reg,
+				     unsigned char bit_mask,
+				     unsigned char reg_val)
+{
+	return regmap_update_bits(da9052->regmap, reg, bit_mask, reg_val);
+}
+
+int da9052_device_init(struct da9052 *da9052, u8 chip_id);
+void da9052_device_exit(struct da9052 *da9052);
+
+#endif /* __MFD_DA9052_DA9052_H */
diff --git a/include/linux/mfd/da9052/pdata.h b/include/linux/mfd/da9052/pdata.h
new file mode 100644
index 000000000000..62c5c3c2992e
--- /dev/null
+++ b/include/linux/mfd/da9052/pdata.h
@@ -0,0 +1,40 @@
+/*
+ * Platform data declarations for DA9052 PMICs.
+ *
+ * Copyright(c) 2011 Dialog Semiconductor Ltd.
+ *
+ * Author: David Dajun Chen <dchen@diasemi.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __MFD_DA9052_PDATA_H__
+#define __MFD_DA9052_PDATA_H__
+
+#define DA9052_MAX_REGULATORS	14
+
+struct da9052;
+
+struct da9052_pdata {
+	struct led_platform_data *pled;
+	int (*init) (struct da9052 *da9052);
+	int irq_base;
+	int gpio_base;
+	int use_for_apm;
+	struct regulator_init_data *regulators[DA9052_MAX_REGULATORS];
+};
+
+#endif
diff --git a/include/linux/mfd/da9052/reg.h b/include/linux/mfd/da9052/reg.h
new file mode 100644
index 000000000000..b97f7309d7f6
--- /dev/null
+++ b/include/linux/mfd/da9052/reg.h
@@ -0,0 +1,749 @@
+/*
+ * Register declarations for DA9052 PMICs.
+ *
+ * Copyright(c) 2011 Dialog Semiconductor Ltd.
+ *
+ * Author: David Dajun Chen <dchen@diasemi.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LINUX_MFD_DA9052_REG_H
+#define __LINUX_MFD_DA9052_REG_H
+
+/* PAGE REGISTERS */
+#define DA9052_PAGE0_CON_REG		0
+#define DA9052_PAGE1_CON_REG		128
+
+/* STATUS REGISTERS */
+#define DA9052_STATUS_A_REG		1
+#define DA9052_STATUS_B_REG		2
+#define DA9052_STATUS_C_REG		3
+#define DA9052_STATUS_D_REG		4
+
+/* EVENT REGISTERS */
+#define DA9052_EVENT_A_REG		5
+#define DA9052_EVENT_B_REG		6
+#define DA9052_EVENT_C_REG		7
+#define DA9052_EVENT_D_REG		8
+#define DA9052_FAULTLOG_REG		9
+
+/* IRQ REGISTERS */
+#define DA9052_IRQ_MASK_A_REG		10
+#define DA9052_IRQ_MASK_B_REG		11
+#define DA9052_IRQ_MASK_C_REG		12
+#define DA9052_IRQ_MASK_D_REG		13
+
+/* CONTROL REGISTERS */
+#define DA9052_CONTROL_A_REG		14
+#define DA9052_CONTROL_B_REG		15
+#define DA9052_CONTROL_C_REG		16
+#define DA9052_CONTROL_D_REG		17
+
+#define DA9052_PDDIS_REG		18
+#define DA9052_INTERFACE_REG		19
+#define DA9052_RESET_REG		20
+
+/* GPIO REGISTERS */
+#define DA9052_GPIO_0_1_REG		21
+#define DA9052_GPIO_2_3_REG		22
+#define DA9052_GPIO_4_5_REG		23
+#define DA9052_GPIO_6_7_REG		24
+#define DA9052_GPIO_14_15_REG		28
+
+/* POWER SEQUENCER CONTROL REGISTERS */
+#define DA9052_ID_0_1_REG		29
+#define DA9052_ID_2_3_REG		30
+#define DA9052_ID_4_5_REG		31
+#define DA9052_ID_6_7_REG		32
+#define DA9052_ID_8_9_REG		33
+#define DA9052_ID_10_11_REG		34
+#define DA9052_ID_12_13_REG		35
+#define DA9052_ID_14_15_REG		36
+#define DA9052_ID_16_17_REG		37
+#define DA9052_ID_18_19_REG		38
+#define DA9052_ID_20_21_REG		39
+#define DA9052_SEQ_STATUS_REG		40
+#define DA9052_SEQ_A_REG		41
+#define DA9052_SEQ_B_REG		42
+#define DA9052_SEQ_TIMER_REG		43
+
+/* LDO AND BUCK REGISTERS */
+#define DA9052_BUCKA_REG		44
+#define DA9052_BUCKB_REG		45
+#define DA9052_BUCKCORE_REG		46
+#define DA9052_BUCKPRO_REG		47
+#define DA9052_BUCKMEM_REG		48
+#define DA9052_BUCKPERI_REG		49
+#define DA9052_LDO1_REG		50
+#define DA9052_LDO2_REG		51
+#define DA9052_LDO3_REG		52
+#define DA9052_LDO4_REG		53
+#define DA9052_LDO5_REG		54
+#define DA9052_LDO6_REG		55
+#define DA9052_LDO7_REG		56
+#define DA9052_LDO8_REG		57
+#define DA9052_LDO9_REG		58
+#define DA9052_LDO10_REG		59
+#define DA9052_SUPPLY_REG		60
+#define DA9052_PULLDOWN_REG		61
+#define DA9052_CHGBUCK_REG		62
+#define DA9052_WAITCONT_REG		63
+#define DA9052_ISET_REG		64
+#define DA9052_BATCHG_REG		65
+
+/* BATTERY CONTROL REGISTRS */
+#define DA9052_CHG_CONT_REG		66
+#define DA9052_INPUT_CONT_REG		67
+#define DA9052_CHG_TIME_REG		68
+#define DA9052_BBAT_CONT_REG		69
+
+/* LED CONTROL REGISTERS */
+#define DA9052_BOOST_REG		70
+#define DA9052_LED_CONT_REG		71
+#define DA9052_LEDMIN123_REG		72
+#define DA9052_LED1_CONF_REG		73
+#define DA9052_LED2_CONF_REG		74
+#define DA9052_LED3_CONF_REG		75
+#define DA9052_LED1CONT_REG		76
+#define DA9052_LED2CONT_REG		77
+#define DA9052_LED3CONT_REG		78
+#define DA9052_LED_CONT_4_REG		79
+#define DA9052_LED_CONT_5_REG		80
+
+/* ADC CONTROL REGISTERS */
+#define DA9052_ADC_MAN_REG		81
+#define DA9052_ADC_CONT_REG		82
+#define DA9052_ADC_RES_L_REG		83
+#define DA9052_ADC_RES_H_REG		84
+#define DA9052_VDD_RES_REG		85
+#define DA9052_VDD_MON_REG		86
+
+#define DA9052_ICHG_AV_REG		87
+#define DA9052_ICHG_THD_REG		88
+#define DA9052_ICHG_END_REG		89
+#define DA9052_TBAT_RES_REG		90
+#define DA9052_TBAT_HIGHP_REG		91
+#define DA9052_TBAT_HIGHN_REG		92
+#define DA9052_TBAT_LOW_REG		93
+#define DA9052_T_OFFSET_REG		94
+
+#define DA9052_ADCIN4_RES_REG		95
+#define DA9052_AUTO4_HIGH_REG		96
+#define DA9052_AUTO4_LOW_REG		97
+#define DA9052_ADCIN5_RES_REG		98
+#define DA9052_AUTO5_HIGH_REG		99
+#define DA9052_AUTO5_LOW_REG		100
+#define DA9052_ADCIN6_RES_REG		101
+#define DA9052_AUTO6_HIGH_REG		102
+#define DA9052_AUTO6_LOW_REG		103
+
+#define DA9052_TJUNC_RES_REG		104
+
+/* TSI CONTROL REGISTERS */
+#define DA9052_TSI_CONT_A_REG		105
+#define DA9052_TSI_CONT_B_REG		106
+#define DA9052_TSI_X_MSB_REG		107
+#define DA9052_TSI_Y_MSB_REG		108
+#define DA9052_TSI_LSB_REG		109
+#define DA9052_TSI_Z_MSB_REG		110
+
+/* RTC COUNT REGISTERS */
+#define DA9052_COUNT_S_REG		111
+#define DA9052_COUNT_MI_REG		112
+#define DA9052_COUNT_H_REG		113
+#define DA9052_COUNT_D_REG		114
+#define DA9052_COUNT_MO_REG		115
+#define DA9052_COUNT_Y_REG		116
+
+/* RTC CONTROL REGISTERS */
+#define DA9052_ALARM_MI_REG		117
+#define DA9052_ALARM_H_REG		118
+#define DA9052_ALARM_D_REG		119
+#define DA9052_ALARM_MO_REG		120
+#define DA9052_ALARM_Y_REG		121
+#define DA9052_SECOND_A_REG		122
+#define DA9052_SECOND_B_REG		123
+#define DA9052_SECOND_C_REG		124
+#define DA9052_SECOND_D_REG		125
+
+/* PAGE CONFIGURATION BIT */
+#define DA9052_PAGE_CONF		0X80
+
+/* STATUS REGISTER A BITS */
+#define DA9052_STATUSA_VDATDET		0X80
+#define DA9052_STATUSA_VBUSSEL		0X40
+#define DA9052_STATUSA_DCINSEL		0X20
+#define DA9052_STATUSA_VBUSDET		0X10
+#define DA9052_STATUSA_DCINDET		0X08
+#define DA9052_STATUSA_IDGND		0X04
+#define DA9052_STATUSA_IDFLOAT		0X02
+#define DA9052_STATUSA_NONKEY		0X01
+
+/* STATUS REGISTER B BITS */
+#define DA9052_STATUSB_COMPDET		0X80
+#define DA9052_STATUSB_SEQUENCING	0X40
+#define DA9052_STATUSB_GPFB2		0X20
+#define DA9052_STATUSB_CHGTO		0X10
+#define DA9052_STATUSB_CHGEND		0X08
+#define DA9052_STATUSB_CHGLIM		0X04
+#define DA9052_STATUSB_CHGPRE		0X02
+#define DA9052_STATUSB_CHGATT		0X01
+
+/* STATUS REGISTER C BITS */
+#define DA9052_STATUSC_GPI7		0X80
+#define DA9052_STATUSC_GPI6		0X40
+#define DA9052_STATUSC_GPI5		0X20
+#define DA9052_STATUSC_GPI4		0X10
+#define DA9052_STATUSC_GPI3		0X08
+#define DA9052_STATUSC_GPI2		0X04
+#define DA9052_STATUSC_GPI1		0X02
+#define DA9052_STATUSC_GPI0		0X01
+
+/* STATUS REGISTER D BITS */
+#define DA9052_STATUSD_GPI15		0X80
+#define DA9052_STATUSD_GPI14		0X40
+#define DA9052_STATUSD_GPI13		0X20
+#define DA9052_STATUSD_GPI12		0X10
+#define DA9052_STATUSD_GPI11		0X08
+#define DA9052_STATUSD_GPI10		0X04
+#define DA9052_STATUSD_GPI9		0X02
+#define DA9052_STATUSD_GPI8		0X01
+
+/* EVENT REGISTER A BITS */
+#define DA9052_EVENTA_ECOMP1V2		0X80
+#define DA9052_EVENTA_ESEQRDY		0X40
+#define DA9052_EVENTA_EALRAM		0X20
+#define DA9052_EVENTA_EVDDLOW		0X10
+#define DA9052_EVENTA_EVBUSREM		0X08
+#define DA9052_EVENTA_EDCINREM		0X04
+#define DA9052_EVENTA_EVBUSDET		0X02
+#define DA9052_EVENTA_EDCINDET		0X01
+
+/* EVENT REGISTER B BITS */
+#define DA9052_EVENTB_ETSIREADY	0X80
+#define DA9052_EVENTB_EPENDOWN		0X40
+#define DA9052_EVENTB_EADCEOM		0X20
+#define DA9052_EVENTB_ETBAT		0X10
+#define DA9052_EVENTB_ECHGEND		0X08
+#define DA9052_EVENTB_EIDGND		0X04
+#define DA9052_EVENTB_EIDFLOAT		0X02
+#define DA9052_EVENTB_ENONKEY		0X01
+
+/* EVENT REGISTER C BITS */
+#define DA9052_EVENTC_EGPI7		0X80
+#define DA9052_EVENTC_EGPI6		0X40
+#define DA9052_EVENTC_EGPI5		0X20
+#define DA9052_EVENTC_EGPI4		0X10
+#define DA9052_EVENTC_EGPI3		0X08
+#define DA9052_EVENTC_EGPI2		0X04
+#define DA9052_EVENTC_EGPI1		0X02
+#define DA9052_EVENTC_EGPI0		0X01
+
+/* EVENT REGISTER D BITS */
+#define DA9052_EVENTD_EGPI15		0X80
+#define DA9052_EVENTD_EGPI14		0X40
+#define DA9052_EVENTD_EGPI13		0X20
+#define DA9052_EVENTD_EGPI12		0X10
+#define DA9052_EVENTD_EGPI11		0X08
+#define DA9052_EVENTD_EGPI10		0X04
+#define DA9052_EVENTD_EGPI9		0X02
+#define DA9052_EVENTD_EGPI8		0X01
+
+/* IRQ MASK REGISTERS BITS */
+#define DA9052_M_NONKEY		0X0100
+
+/* TSI EVENT REGISTERS BITS */
+#define DA9052_E_PEN_DOWN		0X4000
+#define DA9052_E_TSI_READY		0X8000
+
+/* FAULT LOG REGISTER BITS */
+#define DA9052_FAULTLOG_WAITSET	0X80
+#define DA9052_FAULTLOG_NSDSET		0X40
+#define DA9052_FAULTLOG_KEYSHUT	0X20
+#define DA9052_FAULTLOG_TEMPOVER	0X08
+#define DA9052_FAULTLOG_VDDSTART	0X04
+#define DA9052_FAULTLOG_VDDFAULT	0X02
+#define DA9052_FAULTLOG_TWDERROR	0X01
+
+/* CONTROL REGISTER A BITS */
+#define DA9052_CONTROLA_GPIV		0X80
+#define DA9052_CONTROLA_PMOTYPE	0X20
+#define DA9052_CONTROLA_PMOV		0X10
+#define DA9052_CONTROLA_PMIV		0X08
+#define DA9052_CONTROLA_PMIFV		0X08
+#define DA9052_CONTROLA_PWR1EN		0X04
+#define DA9052_CONTROLA_PWREN		0X02
+#define DA9052_CONTROLA_SYSEN		0X01
+
+/* CONTROL REGISTER B BITS */
+#define DA9052_CONTROLB_SHUTDOWN	0X80
+#define DA9052_CONTROLB_DEEPSLEEP	0X40
+#define DA9052_CONTROL_B_WRITEMODE	0X20
+#define DA9052_CONTROLB_BBATEN		0X10
+#define DA9052_CONTROLB_OTPREADEN	0X08
+#define DA9052_CONTROLB_AUTOBOOT	0X04
+#define DA9052_CONTROLB_ACTDIODE	0X02
+#define DA9052_CONTROLB_BUCKMERGE	0X01
+
+/* CONTROL REGISTER C BITS */
+#define DA9052_CONTROLC_BLINKDUR	0X80
+#define DA9052_CONTROLC_BLINKFRQ	0X60
+#define DA9052_CONTROLC_DEBOUNCING	0X1C
+#define DA9052_CONTROLC_PMFB2PIN	0X02
+#define DA9052_CONTROLC_PMFB1PIN	0X01
+
+/* CONTROL REGISTER D BITS */
+#define DA9052_CONTROLD_WATCHDOG	0X80
+#define DA9052_CONTROLD_ACCDETEN	0X40
+#define DA9052_CONTROLD_GPI1415SD	0X20
+#define DA9052_CONTROLD_NONKEYSD	0X10
+#define DA9052_CONTROLD_KEEPACTEN	0X08
+#define DA9052_CONTROLD_TWDSCALE	0X07
+
+/* POWER DOWN DISABLE REGISTER BITS */
+#define DA9052_PDDIS_PMCONTPD		0X80
+#define DA9052_PDDIS_OUT32KPD		0X40
+#define DA9052_PDDIS_CHGBBATPD		0X20
+#define DA9052_PDDIS_CHGPD		0X10
+#define DA9052_PDDIS_HS2WIREPD		0X08
+#define DA9052_PDDIS_PMIFPD		0X04
+#define DA9052_PDDIS_GPADCPD		0X02
+#define DA9052_PDDIS_GPIOPD		0X01
+
+/* CONTROL REGISTER D BITS */
+#define DA9052_INTERFACE_IFBASEADDR	0XE0
+#define DA9052_INTERFACE_NCSPOL	0X10
+#define DA9052_INTERFACE_RWPOL		0X08
+#define DA9052_INTERFACE_CPHA		0X04
+#define DA9052_INTERFACE_CPOL		0X02
+#define DA9052_INTERFACE_IFTYPE	0X01
+
+/* CONTROL REGISTER D BITS */
+#define DA9052_RESET_RESETEVENT	0XC0
+#define DA9052_RESET_RESETTIMER	0X3F
+
+/* GPIO REGISTERS */
+/* GPIO CONTROL REGISTER BITS */
+#define DA9052_GPIO_EVEN_PORT_PIN	0X03
+#define DA9052_GPIO_EVEN_PORT_TYPE	0X04
+#define DA9052_GPIO_EVEN_PORT_MODE	0X08
+
+#define DA9052_GPIO_ODD_PORT_PIN	0X30
+#define DA9052_GPIO_ODD_PORT_TYPE	0X40
+#define DA9052_GPIO_ODD_PORT_MODE	0X80
+
+/*POWER SEQUENCER REGISTER BITS */
+/* SEQ CONTROL REGISTER BITS FOR ID 0 AND 1 */
+#define DA9052_ID01_LDO1STEP		0XF0
+#define DA9052_ID01_SYSPRE		0X04
+#define DA9052_ID01_DEFSUPPLY		0X02
+#define DA9052_ID01_NRESMODE		0X01
+
+/* SEQ CONTROL REGISTER BITS FOR ID 2 AND 3 */
+#define DA9052_ID23_LDO3STEP		0XF0
+#define DA9052_ID23_LDO2STEP		0X0F
+
+/* SEQ CONTROL REGISTER BITS FOR ID 4 AND 5 */
+#define DA9052_ID45_LDO5STEP		0XF0
+#define DA9052_ID45_LDO4STEP		0X0F
+
+/* SEQ CONTROL REGISTER BITS FOR ID 6 AND 7 */
+#define DA9052_ID67_LDO7STEP		0XF0
+#define DA9052_ID67_LDO6STEP		0X0F
+
+/* SEQ CONTROL REGISTER BITS FOR ID 8 AND 9 */
+#define DA9052_ID89_LDO9STEP		0XF0
+#define DA9052_ID89_LDO8STEP		0X0F
+
+/* SEQ CONTROL REGISTER BITS FOR ID 10 AND 11 */
+#define DA9052_ID1011_PDDISSTEP	0XF0
+#define DA9052_ID1011_LDO10STEP	0X0F
+
+/* SEQ CONTROL REGISTER BITS FOR ID 12 AND 13 */
+#define DA9052_ID1213_VMEMSWSTEP	0XF0
+#define DA9052_ID1213_VPERISWSTEP	0X0F
+
+/* SEQ CONTROL REGISTER BITS FOR ID 14 AND 15 */
+#define DA9052_ID1415_BUCKPROSTEP	0XF0
+#define DA9052_ID1415_BUCKCORESTEP	0X0F
+
+/* SEQ CONTROL REGISTER BITS FOR ID 16 AND 17 */
+#define DA9052_ID1617_BUCKPERISTEP	0XF0
+#define DA9052_ID1617_BUCKMEMSTEP	0X0F
+
+/* SEQ CONTROL REGISTER BITS FOR ID 18 AND 19 */
+#define DA9052_ID1819_GPRISE2STEP	0XF0
+#define DA9052_ID1819_GPRISE1STEP	0X0F
+
+/* SEQ CONTROL REGISTER BITS FOR ID 20 AND 21 */
+#define DA9052_ID2021_GPFALL2STEP	0XF0
+#define DA9052_ID2021_GPFALL1STEP	0X0F
+
+/* POWER SEQ STATUS REGISTER BITS */
+#define DA9052_SEQSTATUS_SEQPOINTER	0XF0
+#define DA9052_SEQSTATUS_WAITSTEP	0X0F
+
+/* POWER SEQ A REGISTER BITS */
+#define DA9052_SEQA_POWEREND		0XF0
+#define DA9052_SEQA_SYSTEMEND		0X0F
+
+/* POWER SEQ B REGISTER BITS */
+#define DA9052_SEQB_PARTDOWN		0XF0
+#define DA9052_SEQB_MAXCOUNT		0X0F
+
+/* POWER SEQ TIMER REGISTER BITS */
+#define DA9052_SEQTIMER_SEQDUMMY	0XF0
+#define DA9052_SEQTIMER_SEQTIME	0X0F
+
+/*POWER SUPPLY CONTROL REGISTER BITS */
+/* BUCK REGISTER A BITS */
+#define DA9052_BUCKA_BPROILIM		0XC0
+#define DA9052_BUCKA_BPROMODE		0X30
+#define DA9052_BUCKA_BCOREILIM		0X0C
+#define DA9052_BUCKA_BCOREMODE		0X03
+
+/* BUCK REGISTER B BITS */
+#define DA9052_BUCKB_BERIILIM		0XC0
+#define DA9052_BUCKB_BPERIMODE		0X30
+#define DA9052_BUCKB_BMEMILIM		0X0C
+#define DA9052_BUCKB_BMEMMODE		0X03
+
+/* BUCKCORE REGISTER BITS */
+#define DA9052_BUCKCORE_BCORECONF	0X80
+#define DA9052_BUCKCORE_BCOREEN	0X40
+#define DA9052_BUCKCORE_VBCORE		0X3F
+
+/* BUCKPRO REGISTER BITS */
+#define DA9052_BUCKPRO_BPROCONF	0X80
+#define DA9052_BUCKPRO_BPROEN		0X40
+#define DA9052_BUCKPRO_VBPRO		0X3F
+
+/* BUCKMEM REGISTER BITS */
+#define DA9052_BUCKMEM_BMEMCONF	0X80
+#define DA9052_BUCKMEM_BMEMEN		0X40
+#define DA9052_BUCKMEM_VBMEM		0X3F
+
+/* BUCKPERI REGISTER BITS */
+#define DA9052_BUCKPERI_BPERICONF	0X80
+#define DA9052_BUCKPERI_BPERIEN	0X40
+#define DA9052_BUCKPERI_BPERIHS	0X20
+#define DA9052_BUCKPERI_VBPERI		0X1F
+
+/* LDO1 REGISTER BITS */
+#define DA9052_LDO1_LDO1CONF		0X80
+#define DA9052_LDO1_LDO1EN		0X40
+#define DA9052_LDO1_VLDO1		0X1F
+
+/* LDO2 REGISTER BITS */
+#define DA9052_LDO2_LDO2CONF		0X80
+#define DA9052_LDO2_LDO2EN		0X40
+#define DA9052_LDO2_VLDO2		0X3F
+
+/* LDO3 REGISTER BITS */
+#define DA9052_LDO3_LDO3CONF		0X80
+#define DA9052_LDO3_LDO3EN		0X40
+#define DA9052_LDO3_VLDO3		0X3F
+
+/* LDO4 REGISTER BITS */
+#define DA9052_LDO4_LDO4CONF		0X80
+#define DA9052_LDO4_LDO4EN		0X40
+#define DA9052_LDO4_VLDO4		0X3F
+
+/* LDO5 REGISTER BITS */
+#define DA9052_LDO5_LDO5CONF		0X80
+#define DA9052_LDO5_LDO5EN		0X40
+#define DA9052_LDO5_VLDO5		0X3F
+
+/* LDO6 REGISTER BITS */
+#define DA9052_LDO6_LDO6CONF		0X80
+#define DA9052_LDO6_LDO6EN		0X40
+#define DA9052_LDO6_VLDO6		0X3F
+
+/* LDO7 REGISTER BITS */
+#define DA9052_LDO7_LDO7CONF		0X80
+#define DA9052_LDO7_LDO7EN		0X40
+#define DA9052_LDO7_VLDO7		0X3F
+
+/* LDO8 REGISTER BITS */
+#define DA9052_LDO8_LDO8CONF		0X80
+#define DA9052_LDO8_LDO8EN		0X40
+#define DA9052_LDO8_VLDO8		0X3F
+
+/* LDO9 REGISTER BITS */
+#define DA9052_LDO9_LDO9CONF		0X80
+#define DA9052_LDO9_LDO9EN		0X40
+#define DA9052_LDO9_VLDO9		0X3F
+
+/* LDO10 REGISTER BITS */
+#define DA9052_LDO10_LDO10CONF		0X80
+#define DA9052_LDO10_LDO10EN		0X40
+#define DA9052_LDO10_VLDO10		0X3F
+
+/* SUPPLY REGISTER BITS */
+#define DA9052_SUPPLY_VLOCK		0X80
+#define DA9052_SUPPLY_VMEMSWEN		0X40
+#define DA9052_SUPPLY_VPERISWEN	0X20
+#define DA9052_SUPPLY_VLDO3GO		0X10
+#define DA9052_SUPPLY_VLDO2GO		0X08
+#define DA9052_SUPPLY_VBMEMGO		0X04
+#define DA9052_SUPPLY_VBPROGO		0X02
+#define DA9052_SUPPLY_VBCOREGO		0X01
+
+/* PULLDOWN REGISTER BITS */
+#define DA9052_PULLDOWN_LDO5PDDIS	0X20
+#define DA9052_PULLDOWN_LDO2PDDIS	0X10
+#define DA9052_PULLDOWN_LDO1PDDIS	0X08
+#define DA9052_PULLDOWN_MEMPDDIS	0X04
+#define DA9052_PULLDOWN_PROPDDIS	0X02
+#define DA9052_PULLDOWN_COREPDDIS	0X01
+
+/* BAT CHARGER REGISTER BITS */
+/* CHARGER BUCK REGISTER BITS */
+#define DA9052_CHGBUCK_CHGTEMP		0X80
+#define DA9052_CHGBUCK_CHGUSBILIM	0X40
+#define DA9052_CHGBUCK_CHGBUCKLP	0X20
+#define DA9052_CHGBUCK_CHGBUCKEN	0X10
+#define DA9052_CHGBUCK_ISETBUCK	0X0F
+
+/* WAIT COUNTER REGISTER BITS */
+#define DA9052_WAITCONT_WAITDIR	0X80
+#define DA9052_WAITCONT_RTCCLOCK	0X40
+#define DA9052_WAITCONT_WAITMODE	0X20
+#define DA9052_WAITCONT_EN32KOUT	0X10
+#define DA9052_WAITCONT_DELAYTIME	0X0F
+
+/* ISET CONTROL REGISTER BITS */
+#define DA9052_ISET_ISETDCIN		0XF0
+#define DA9052_ISET_ISETVBUS		0X0F
+
+/* BATTERY CHARGER CONTROL REGISTER BITS */
+#define DA9052_BATCHG_ICHGPRE		0XC0
+#define DA9052_BATCHG_ICHGBAT		0X3F
+
+/* CHARGER COUNTER REGISTER BITS */
+#define DA9052_CHG_CONT_VCHG_BAT	0XF8
+#define DA9052_CHG_CONT_TCTR		0X07
+
+/* INPUT CONTROL REGISTER BITS */
+#define DA9052_INPUT_CONT_TCTR_MODE	0X80
+#define DA9052_INPUT_CONT_VBUS_SUSP	0X10
+#define DA9052_INPUT_CONT_DCIN_SUSP	0X08
+
+/* CHARGING TIME REGISTER BITS */
+#define DA9052_CHGTIME_CHGTIME		0XFF
+
+/* BACKUP BATTERY CONTROL REGISTER BITS */
+#define DA9052_BBATCONT_BCHARGERISET	0XF0
+#define DA9052_BBATCONT_BCHARGERVSET	0X0F
+
+/* LED REGISTERS BITS */
+/* LED BOOST REGISTER BITS */
+#define DA9052_BOOST_EBFAULT		0X80
+#define DA9052_BOOST_MBFAULT		0X40
+#define DA9052_BOOST_BOOSTFRQ		0X20
+#define DA9052_BOOST_BOOSTILIM		0X10
+#define DA9052_BOOST_LED3INEN		0X08
+#define DA9052_BOOST_LED2INEN		0X04
+#define DA9052_BOOST_LED1INEN		0X02
+#define DA9052_BOOST_BOOSTEN		0X01
+
+/* LED CONTROL REGISTER BITS */
+#define DA9052_LEDCONT_SELLEDMODE	0X80
+#define DA9052_LEDCONT_LED3ICONT	0X40
+#define DA9052_LEDCONT_LED3RAMP	0X20
+#define DA9052_LEDCONT_LED3EN		0X10
+#define DA9052_LEDCONT_LED2RAMP	0X08
+#define DA9052_LEDCONT_LED2EN		0X04
+#define DA9052_LEDCONT_LED1RAMP	0X02
+#define DA9052_LEDCONT_LED1EN		0X01
+
+/* LEDMIN123 REGISTER BIT */
+#define DA9052_LEDMIN123_LEDMINCURRENT	0XFF
+
+/* LED1CONF REGISTER BIT */
+#define DA9052_LED1CONF_LED1CURRENT	0XFF
+
+/* LED2CONF REGISTER BIT */
+#define DA9052_LED2CONF_LED2CURRENT	0XFF
+
+/* LED3CONF REGISTER BIT */
+#define DA9052_LED3CONF_LED3CURRENT	0XFF
+
+/* LED COUNT REGISTER BIT */
+#define DA9052_LED_CONT_DIM		0X80
+
+/* ADC MAN REGISTERS BITS */
+#define DA9052_ADC_MAN_MAN_CONV	0X10
+#define DA9052_ADC_MAN_MUXSEL_VDDOUT	0X00
+#define DA9052_ADC_MAN_MUXSEL_ICH	0X01
+#define DA9052_ADC_MAN_MUXSEL_TBAT	0X02
+#define DA9052_ADC_MAN_MUXSEL_VBAT	0X03
+#define DA9052_ADC_MAN_MUXSEL_AD4	0X04
+#define DA9052_ADC_MAN_MUXSEL_AD5	0X05
+#define DA9052_ADC_MAN_MUXSEL_AD6	0X06
+#define DA9052_ADC_MAN_MUXSEL_VBBAT	0X09
+
+/* ADC CONTROL REGSISTERS BITS */
+#define DA9052_ADCCONT_COMP1V2EN	0X80
+#define DA9052_ADCCONT_ADCMODE		0X40
+#define DA9052_ADCCONT_TBATISRCEN	0X20
+#define DA9052_ADCCONT_AD4ISRCEN	0X10
+#define DA9052_ADCCONT_AUTOAD6EN	0X08
+#define DA9052_ADCCONT_AUTOAD5EN	0X04
+#define DA9052_ADCCONT_AUTOAD4EN	0X02
+#define DA9052_ADCCONT_AUTOVDDEN	0X01
+
+/* ADC 10 BIT MANUAL CONVERSION RESULT LOW REGISTER */
+#define DA9052_ADC_RES_LSB		0X03
+
+/* ADC 10 BIT MANUAL CONVERSION RESULT HIGH REGISTER */
+#define DA9052_ADCRESH_ADCRESMSB	0XFF
+
+/* VDD RES REGSISTER BIT*/
+#define DA9052_VDDRES_VDDOUTRES	0XFF
+
+/* VDD MON REGSISTER BIT */
+#define DA9052_VDDMON_VDDOUTMON	0XFF
+
+/* ICHG_AV REGSISTER BIT */
+#define DA9052_ICHGAV_ICHGAV		0XFF
+
+/* ICHG_THD REGSISTER BIT */
+#define DA9052_ICHGTHD_ICHGTHD		0XFF
+
+/* ICHG_END REGSISTER BIT */
+#define DA9052_ICHGEND_ICHGEND		0XFF
+
+/* TBAT_RES REGSISTER BIT */
+#define DA9052_TBATRES_TBATRES		0XFF
+
+/* TBAT_HIGHP REGSISTER BIT */
+#define DA9052_TBATHIGHP_TBATHIGHP	0XFF
+
+/* TBAT_HIGHN REGSISTER BIT */
+#define DA9052_TBATHIGHN_TBATHIGHN	0XFF
+
+/* TBAT_LOW REGSISTER BIT */
+#define DA9052_TBATLOW_TBATLOW		0XFF
+
+/* T_OFFSET REGSISTER BIT */
+#define DA9052_TOFFSET_TOFFSET		0XFF
+
+/* ADCIN4_RES REGSISTER BIT */
+#define DA9052_ADCIN4RES_ADCIN4RES	0XFF
+
+/* ADCIN4_HIGH REGSISTER BIT */
+#define DA9052_AUTO4HIGH_AUTO4HIGH	0XFF
+
+/* ADCIN4_LOW REGSISTER BIT */
+#define DA9052_AUTO4LOW_AUTO4LOW	0XFF
+
+/* ADCIN5_RES REGSISTER BIT */
+#define DA9052_ADCIN5RES_ADCIN5RES	0XFF
+
+/* ADCIN5_HIGH REGSISTER BIT */
+#define DA9052_AUTO5HIGH_AUTOHIGH	0XFF
+
+/* ADCIN5_LOW REGSISTER BIT */
+#define DA9052_AUTO5LOW_AUTO5LOW	0XFF
+
+/* ADCIN6_RES REGSISTER BIT */
+#define DA9052_ADCIN6RES_ADCIN6RES	0XFF
+
+/* ADCIN6_HIGH REGSISTER BIT */
+#define DA9052_AUTO6HIGH_AUTO6HIGH	0XFF
+
+/* ADCIN6_LOW REGSISTER BIT */
+#define DA9052_AUTO6LOW_AUTO6LOW	0XFF
+
+/* TJUNC_RES REGSISTER BIT*/
+#define DA9052_TJUNCRES_TJUNCRES	0XFF
+
+/* TSI REGISTER */
+/* TSI CONTROL REGISTER A BITS */
+#define DA9052_TSICONTA_TSIDELAY	0XC0
+#define DA9052_TSICONTA_TSISKIP	0X38
+#define DA9052_TSICONTA_TSIMODE	0X04
+#define DA9052_TSICONTA_PENDETEN	0X02
+#define DA9052_TSICONTA_AUTOTSIEN	0X01
+
+/* TSI CONTROL REGISTER B BITS */
+#define DA9052_TSICONTB_ADCREF		0X80
+#define DA9052_TSICONTB_TSIMAN		0X40
+#define DA9052_TSICONTB_TSIMUX		0X30
+#define DA9052_TSICONTB_TSISEL3	0X08
+#define DA9052_TSICONTB_TSISEL2	0X04
+#define DA9052_TSICONTB_TSISEL1	0X02
+#define DA9052_TSICONTB_TSISEL0	0X01
+
+/* TSI X CO-ORDINATE MSB RESULT REGISTER BITS */
+#define DA9052_TSIXMSB_TSIXM		0XFF
+
+/* TSI Y CO-ORDINATE MSB RESULT REGISTER BITS */
+#define DA9052_TSIYMSB_TSIYM		0XFF
+
+/* TSI CO-ORDINATE LSB RESULT REGISTER BITS */
+#define DA9052_TSILSB_PENDOWN		0X40
+#define DA9052_TSILSB_TSIZL		0X30
+#define DA9052_TSILSB_TSIYL		0X0C
+#define DA9052_TSILSB_TSIXL		0X03
+
+/* TSI Z MEASUREMENT MSB RESULT REGISTER BIT */
+#define DA9052_TSIZMSB_TSIZM		0XFF
+
+/* RTC REGISTER */
+/* RTC TIMER SECONDS REGISTER BITS */
+#define DA9052_COUNTS_MONITOR		0X40
+#define DA9052_RTC_SEC			0X3F
+
+/* RTC TIMER MINUTES REGISTER BIT */
+#define DA9052_RTC_MIN			0X3F
+
+/* RTC TIMER HOUR REGISTER BIT */
+#define DA9052_RTC_HOUR		0X1F
+
+/* RTC TIMER DAYS REGISTER BIT */
+#define DA9052_RTC_DAY			0X1F
+
+/* RTC TIMER MONTHS REGISTER BIT */
+#define DA9052_RTC_MONTH		0X0F
+
+/* RTC TIMER YEARS REGISTER BIT */
+#define DA9052_RTC_YEAR		0X3F
+
+/* RTC ALARM MINUTES REGISTER BITS */
+#define DA9052_ALARMM_I_TICK_TYPE	0X80
+#define DA9052_ALARMMI_ALARMTYPE	0X40
+
+/* RTC ALARM YEARS REGISTER BITS */
+#define DA9052_ALARM_Y_TICK_ON		0X80
+#define DA9052_ALARM_Y_ALARM_ON	0X40
+
+/* RTC SECONDS REGISTER A BITS */
+#define DA9052_SECONDA_SECONDSA	0XFF
+
+/* RTC SECONDS REGISTER B BITS */
+#define DA9052_SECONDB_SECONDSB	0XFF
+
+/* RTC SECONDS REGISTER C BITS */
+#define DA9052_SECONDC_SECONDSC	0XFF
+
+/* RTC SECONDS REGISTER D BITS */
+#define DA9052_SECONDD_SECONDSD	0XFF
+
+#endif
+/* __LINUX_MFD_DA9052_REG_H */
-- 
cgit v1.2.3


From 6261ddee70174372d6a75601f40719b7a5392f3f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 14 Dec 2011 11:19:07 -0800
Subject: kref: fix up the kfree build problems

It turns out that some memory allocators use kobjects, which use krefs,
and kref.h was wanting to figure out the address of kfree(), which ended
up in a loop.

kfree was only being needed for a warning to tell the caller that they
were doing something stupid.  Now we just move that warning into the
comments for the functions, which results in a bit more fun as everyone
enjoys digging for people to mock at times of boredom.

So, remove the dependancy of slab.h on kref.h, and fix up the other
include file as well (we really only need bug.h and atomic.h, not
types.h).

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kref.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index d66c88a3b48c..abc0120b09b7 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -15,8 +15,8 @@
 #ifndef _KREF_H_
 #define _KREF_H_
 
-#include <linux/types.h>
-#include <linux/slab.h>
+#include <linux/bug.h>
+#include <linux/atomic.h>
 
 struct kref {
 	atomic_t refcount;
@@ -48,7 +48,10 @@ static inline void kref_get(struct kref *kref)
  * @release: pointer to the function that will clean up the object when the
  *	     last reference to the object is released.
  *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.
+ *	     in as this function.  If the caller does pass kfree to this
+ *	     function, you will be publicly mocked mercilessly by the kref
+ *	     maintainer, and anyone else who happens to notice it.  You have
+ *	     been warned.
  *
  * Subtract @count from the refcount, and if 0, call release().
  * Return 1 if the object was removed, otherwise return 0.  Beware, if this
@@ -60,7 +63,6 @@ static inline int kref_sub(struct kref *kref, unsigned int count,
 	     void (*release)(struct kref *kref))
 {
 	WARN_ON(release == NULL);
-	WARN_ON(release == (void (*)(struct kref *))kfree);
 
 	if (atomic_sub_and_test((int) count, &kref->refcount)) {
 		release(kref);
@@ -75,7 +77,10 @@ static inline int kref_sub(struct kref *kref, unsigned int count,
  * @release: pointer to the function that will clean up the object when the
  *	     last reference to the object is released.
  *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.
+ *	     in as this function.  If the caller does pass kfree to this
+ *	     function, you will be publicly mocked mercilessly by the kref
+ *	     maintainer, and anyone else who happens to notice it.  You have
+ *	     been warned.
  *
  * Decrement the refcount, and if 0, call release().
  * Return 1 if the object was removed, otherwise return 0.  Beware, if this
-- 
cgit v1.2.3


From 1ed28f610653e9b18433c6d87e9d333b7e3e886e Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 14 Dec 2011 16:43:09 +0100
Subject: NFC: Add a DEP link control netlink command

NFC-DEP (Data Exchange Protocol) is an NFC MAC layer.
This command allows to enable and disable the DEP link on to which e.g.
LLCP can run.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nfc.h   |  14 +++++
 include/net/nfc/nfc.h |  11 ++++
 net/nfc/core.c        |  77 +++++++++++++++++++++++++++
 net/nfc/netlink.c     | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/nfc.h         |   9 ++++
 5 files changed, 255 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfc.h b/include/linux/nfc.h
index 36cb955b05cc..34d8303111f0 100644
--- a/include/linux/nfc.h
+++ b/include/linux/nfc.h
@@ -62,6 +62,8 @@ enum nfc_commands {
 	NFC_CMD_GET_DEVICE,
 	NFC_CMD_DEV_UP,
 	NFC_CMD_DEV_DOWN,
+	NFC_CMD_DEP_LINK_UP,
+	NFC_CMD_DEP_LINK_DOWN,
 	NFC_CMD_START_POLL,
 	NFC_CMD_STOP_POLL,
 	NFC_CMD_GET_TARGET,
@@ -86,6 +88,8 @@ enum nfc_commands {
  * @NFC_ATTR_TARGET_SENS_RES: NFC-A targets extra information such as NFCID
  * @NFC_ATTR_TARGET_SEL_RES: NFC-A targets extra information (useful if the
  *	target is not NFC-Forum compliant)
+ * @NFC_ATTR_COMM_MODE: Passive or active mode
+ * @NFC_ATTR_RF_MODE: Initiator or target
  */
 enum nfc_attrs {
 	NFC_ATTR_UNSPEC,
@@ -95,6 +99,8 @@ enum nfc_attrs {
 	NFC_ATTR_TARGET_INDEX,
 	NFC_ATTR_TARGET_SENS_RES,
 	NFC_ATTR_TARGET_SEL_RES,
+	NFC_ATTR_COMM_MODE,
+	NFC_ATTR_RF_MODE,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
@@ -111,6 +117,14 @@ enum nfc_attrs {
 
 #define NFC_PROTO_MAX		6
 
+/* NFC communication modes */
+#define NFC_COMM_ACTIVE  0
+#define NFC_COMM_PASSIVE 1
+
+/* NFC RF modes */
+#define NFC_RF_INITIATOR 0
+#define NFC_RF_TARGET    1
+
 /* NFC protocols masks used in bitsets */
 #define NFC_PROTO_JEWEL_MASK	(1 << NFC_PROTO_JEWEL)
 #define NFC_PROTO_MIFARE_MASK	(1 << NFC_PROTO_MIFARE)
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 3a3304c094d7..bf82d292d68c 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -52,6 +52,9 @@ struct nfc_ops {
 	int (*dev_down)(struct nfc_dev *dev);
 	int (*start_poll)(struct nfc_dev *dev, u32 protocols);
 	void (*stop_poll)(struct nfc_dev *dev);
+	int (*dep_link_up)(struct nfc_dev *dev, int target_idx,
+				u8 comm_mode, u8 rf_mode);
+	int (*dep_link_down)(struct nfc_dev *dev);
 	int (*activate_target)(struct nfc_dev *dev, u32 target_idx,
 							u32 protocol);
 	void (*deactivate_target)(struct nfc_dev *dev, u32 target_idx);
@@ -60,6 +63,9 @@ struct nfc_ops {
 							void *cb_context);
 };
 
+#define NFC_TARGET_IDX_ANY -1
+#define NFC_MAX_GT_LEN 48
+
 struct nfc_target {
 	u32 idx;
 	u32 supported_protocols;
@@ -83,6 +89,8 @@ struct nfc_dev {
 	bool dev_up;
 	bool polling;
 	bool remote_activated;
+	bool dep_link_up;
+	u32 dep_rf_mode;
 	struct nfc_genl_data genl_data;
 	u32 supported_protocols;
 
@@ -165,4 +173,7 @@ struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp);
 int nfc_targets_found(struct nfc_dev *dev, struct nfc_target *targets,
 							int ntargets);
 
+int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
+		       u8 comm_mode, u8 rf_mode);
+
 #endif /* __NET_NFC_H */
diff --git a/net/nfc/core.c b/net/nfc/core.c
index f53f88ada687..785f1f20c7ba 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -181,6 +181,83 @@ error:
 	return rc;
 }
 
+int nfc_dep_link_up(struct nfc_dev *dev, int target_index,
+					u8 comm_mode, u8 rf_mode)
+{
+	int rc = 0;
+
+	pr_debug("dev_name=%s comm:%d rf:%d\n",
+			dev_name(&dev->dev), comm_mode, rf_mode);
+
+	if (!dev->ops->dep_link_up)
+		return -EOPNOTSUPP;
+
+	device_lock(&dev->dev);
+
+	if (!device_is_registered(&dev->dev)) {
+		rc = -ENODEV;
+		goto error;
+	}
+
+	if (dev->dep_link_up == true) {
+		rc = -EALREADY;
+		goto error;
+	}
+
+	rc = dev->ops->dep_link_up(dev, target_index, comm_mode, rf_mode);
+
+error:
+	device_unlock(&dev->dev);
+	return rc;
+}
+
+int nfc_dep_link_down(struct nfc_dev *dev)
+{
+	int rc = 0;
+
+	pr_debug("dev_name=%s\n", dev_name(&dev->dev));
+
+	if (!dev->ops->dep_link_down)
+		return -EOPNOTSUPP;
+
+	device_lock(&dev->dev);
+
+	if (!device_is_registered(&dev->dev)) {
+		rc = -ENODEV;
+		goto error;
+	}
+
+	if (dev->dep_link_up == false) {
+		rc = -EALREADY;
+		goto error;
+	}
+
+	if (dev->dep_rf_mode == NFC_RF_TARGET) {
+		rc = -EOPNOTSUPP;
+		goto error;
+	}
+
+	rc = dev->ops->dep_link_down(dev);
+	if (!rc) {
+		dev->dep_link_up = false;
+		nfc_genl_dep_link_down_event(dev);
+	}
+
+error:
+	device_unlock(&dev->dev);
+	return rc;
+}
+
+int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
+					u8 comm_mode, u8 rf_mode)
+{
+	dev->dep_link_up = true;
+	dev->dep_rf_mode = rf_mode;
+
+	return nfc_genl_dep_link_up_event(dev, target_idx, comm_mode, rf_mode);
+}
+EXPORT_SYMBOL(nfc_dep_link_is_up);
+
 /**
  * nfc_activate_target - prepare the target for data exchange
  *
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 1d76d38c4a24..43a1c47756a7 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -46,6 +46,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
 	[NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
 				.len = NFC_DEVICE_NAME_MAXSIZE },
 	[NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 },
+	[NFC_ATTR_COMM_MODE] = { .type = NLA_U8 },
+	[NFC_ATTR_RF_MODE] = { .type = NLA_U8 },
 };
 
 static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
@@ -311,6 +313,75 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb)
 	return 0;
 }
 
+int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx,
+						u8 comm_mode, u8 rf_mode)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	pr_debug("DEP link is up\n");
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+				NFC_CMD_DEP_LINK_UP);
+	if (!hdr)
+		goto free_msg;
+
+	NLA_PUT_U32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx);
+	if (rf_mode == NFC_RF_INITIATOR)
+		NLA_PUT_U32(msg, NFC_ATTR_TARGET_INDEX, target_idx);
+	NLA_PUT_U8(msg, NFC_ATTR_COMM_MODE, comm_mode);
+	NLA_PUT_U8(msg, NFC_ATTR_RF_MODE, rf_mode);
+
+	genlmsg_end(msg, hdr);
+
+	dev->dep_link_up = true;
+
+	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
+int nfc_genl_dep_link_down_event(struct nfc_dev *dev)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	pr_debug("DEP link is down\n");
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+				NFC_CMD_DEP_LINK_DOWN);
+	if (!hdr)
+		goto free_msg;
+
+	NLA_PUT_U32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx);
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
 static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info)
 {
 	struct sk_buff *msg;
@@ -398,6 +469,8 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info)
 	u32 idx;
 	u32 protocols;
 
+	pr_debug("Poll start\n");
+
 	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
 		!info->attrs[NFC_ATTR_PROTOCOLS])
 		return -EINVAL;
@@ -452,6 +525,67 @@ out:
 	return rc;
 }
 
+static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nfc_dev *dev;
+	int rc, tgt_idx;
+	u32 idx;
+	u8 comm, rf;
+
+	pr_debug("DEP link up\n");
+
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+			!info->attrs[NFC_ATTR_COMM_MODE] ||
+			!info->attrs[NFC_ATTR_RF_MODE])
+		return -EINVAL;
+
+	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+	if (!info->attrs[NFC_ATTR_TARGET_INDEX])
+		tgt_idx = NFC_TARGET_IDX_ANY;
+	else
+		tgt_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
+
+	comm = nla_get_u8(info->attrs[NFC_ATTR_COMM_MODE]);
+	rf = nla_get_u8(info->attrs[NFC_ATTR_RF_MODE]);
+
+	if (comm != NFC_COMM_ACTIVE && comm != NFC_COMM_PASSIVE)
+		return -EINVAL;
+
+	if (rf != NFC_RF_INITIATOR && comm != NFC_RF_TARGET)
+		return -EINVAL;
+
+	dev = nfc_get_device(idx);
+	if (!dev)
+		return -ENODEV;
+
+	rc = nfc_dep_link_up(dev, tgt_idx, comm, rf);
+
+	nfc_put_device(dev);
+
+	return rc;
+}
+
+static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nfc_dev *dev;
+	int rc;
+	u32 idx;
+
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+		return -EINVAL;
+
+	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+
+	dev = nfc_get_device(idx);
+	if (!dev)
+		return -ENODEV;
+
+	rc = nfc_dep_link_down(dev);
+
+	nfc_put_device(dev);
+	return rc;
+}
+
 static struct genl_ops nfc_genl_ops[] = {
 	{
 		.cmd = NFC_CMD_GET_DEVICE,
@@ -480,6 +614,16 @@ static struct genl_ops nfc_genl_ops[] = {
 		.doit = nfc_genl_stop_poll,
 		.policy = nfc_genl_policy,
 	},
+	{
+		.cmd = NFC_CMD_DEP_LINK_UP,
+		.doit = nfc_genl_dep_link_up,
+		.policy = nfc_genl_policy,
+	},
+	{
+		.cmd = NFC_CMD_DEP_LINK_DOWN,
+		.doit = nfc_genl_dep_link_down,
+		.policy = nfc_genl_policy,
+	},
 	{
 		.cmd = NFC_CMD_GET_TARGET,
 		.dumpit = nfc_genl_dump_targets,
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 67d605015304..4d0fb125d033 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -68,6 +68,10 @@ int nfc_genl_targets_found(struct nfc_dev *dev);
 int nfc_genl_device_added(struct nfc_dev *dev);
 int nfc_genl_device_removed(struct nfc_dev *dev);
 
+int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx,
+			       u8 comm_mode, u8 rf_mode);
+int nfc_genl_dep_link_down_event(struct nfc_dev *dev);
+
 struct nfc_dev *nfc_get_device(unsigned idx);
 
 static inline void nfc_put_device(struct nfc_dev *dev)
@@ -102,6 +106,11 @@ int nfc_start_poll(struct nfc_dev *dev, u32 protocols);
 
 int nfc_stop_poll(struct nfc_dev *dev);
 
+int nfc_dep_link_up(struct nfc_dev *dev, int target_idx,
+				u8 comm_mode, u8 rf_mode);
+
+int nfc_dep_link_down(struct nfc_dev *dev);
+
 int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol);
 
 int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx);
-- 
cgit v1.2.3


From d646960f7986fefb460a2b062d5ccc8ccfeacc3a Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 14 Dec 2011 16:43:12 +0100
Subject: NFC: Initial LLCP support

This patch is an initial implementation for the NFC Logical Link Control
protocol. It's also known as NFC peer to peer mode.
This is a basic implementation as it lacks SDP (services Discovery
Protocol), frames aggregation support, and frame rejecion parsing.
Follow up patches will implement those missing features.
This code has been tested against a Nexus S phone implementing LLCP 1.0.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nfc.h     |  15 +-
 net/nfc/Kconfig         |   1 +
 net/nfc/Makefile        |   1 +
 net/nfc/core.c          |  20 +-
 net/nfc/llcp/Kconfig    |   7 +
 net/nfc/llcp/commands.c | 399 ++++++++++++++++++++
 net/nfc/llcp/llcp.c     | 973 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/llcp/llcp.h     | 193 ++++++++++
 net/nfc/llcp/sock.c     | 675 +++++++++++++++++++++++++++++++++
 net/nfc/nfc.h           |  54 +++
 10 files changed, 2335 insertions(+), 3 deletions(-)
 create mode 100644 net/nfc/llcp/Kconfig
 create mode 100644 net/nfc/llcp/commands.c
 create mode 100644 net/nfc/llcp/llcp.c
 create mode 100644 net/nfc/llcp/llcp.h
 create mode 100644 net/nfc/llcp/sock.c

(limited to 'include/linux')

diff --git a/include/linux/nfc.h b/include/linux/nfc.h
index 34d8303111f0..89fee4ab1904 100644
--- a/include/linux/nfc.h
+++ b/include/linux/nfc.h
@@ -139,9 +139,22 @@ struct sockaddr_nfc {
 	__u32 nfc_protocol;
 };
 
+#define NFC_LLCP_MAX_SERVICE_NAME 63
+struct sockaddr_nfc_llcp {
+	sa_family_t sa_family;
+	__u32 dev_idx;
+	__u32 target_idx;
+	__u32 nfc_protocol;
+	__u8 dsap; /* Destination SAP, if known */
+	__u8 ssap; /* Source SAP to be bound to */
+	char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */;
+	size_t service_name_len;
+};
+
 /* NFC socket protocols */
 #define NFC_SOCKPROTO_RAW	0
-#define NFC_SOCKPROTO_MAX	1
+#define NFC_SOCKPROTO_LLCP	1
+#define NFC_SOCKPROTO_MAX	2
 
 #define NFC_HEADER_SIZE 1
 
diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig
index 58cddadf8e8e..44c865b86d6f 100644
--- a/net/nfc/Kconfig
+++ b/net/nfc/Kconfig
@@ -14,5 +14,6 @@ menuconfig NFC
 	  be called nfc.
 
 source "net/nfc/nci/Kconfig"
+source "net/nfc/llcp/Kconfig"
 
 source "drivers/nfc/Kconfig"
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index fbb550f2377b..7b4a6dcfa566 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_NFC) += nfc.o
 obj-$(CONFIG_NFC_NCI) += nci/
 
 nfc-objs := core.o netlink.o af_nfc.o rawsock.o
+nfc-$(CONFIG_NFC_LLCP)	+= llcp/llcp.o llcp/commands.o llcp/sock.o
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 3a45f21b3b97..3ddf6e698df0 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -240,6 +240,7 @@ int nfc_dep_link_down(struct nfc_dev *dev)
 	rc = dev->ops->dep_link_down(dev);
 	if (!rc) {
 		dev->dep_link_up = false;
+		nfc_llcp_mac_is_down(dev);
 		nfc_genl_dep_link_down_event(dev);
 	}
 
@@ -254,6 +255,8 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
 	dev->dep_link_up = true;
 	dev->dep_rf_mode = rf_mode;
 
+	nfc_llcp_mac_is_up(dev, target_idx, comm_mode, rf_mode);
+
 	return nfc_genl_dep_link_up_event(dev, target_idx, comm_mode, rf_mode);
 }
 EXPORT_SYMBOL(nfc_dep_link_is_up);
@@ -360,13 +363,13 @@ int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len)
 	if (gb_len > NFC_MAX_GT_LEN)
 		return -EINVAL;
 
-	return 0;
+	return nfc_llcp_set_remote_gb(dev, gb, gb_len);
 }
 EXPORT_SYMBOL(nfc_set_remote_general_bytes);
 
 u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, u8 *gt_len)
 {
-	return NULL;
+	return nfc_llcp_general_bytes(dev, gt_len);
 }
 EXPORT_SYMBOL(nfc_get_local_general_bytes);
 
@@ -560,6 +563,10 @@ int nfc_register_device(struct nfc_dev *dev)
 	if (rc < 0)
 		return rc;
 
+	rc = nfc_llcp_register_device(dev);
+	if (rc)
+		pr_err("Could not register llcp device\n");
+
 	rc = nfc_genl_device_added(dev);
 	if (rc)
 		pr_debug("The userspace won't be notified that the device %s was added\n",
@@ -591,6 +598,8 @@ void nfc_unregister_device(struct nfc_dev *dev)
 
 	mutex_unlock(&nfc_devlist_mutex);
 
+	nfc_llcp_unregister_device(dev);
+
 	rc = nfc_genl_device_removed(dev);
 	if (rc)
 		pr_debug("The userspace won't be notified that the device %s was removed\n",
@@ -620,6 +629,10 @@ static int __init nfc_init(void)
 	if (rc)
 		goto err_rawsock;
 
+	rc = nfc_llcp_init();
+	if (rc)
+		goto err_llcp_sock;
+
 	rc = af_nfc_init();
 	if (rc)
 		goto err_af_nfc;
@@ -627,6 +640,8 @@ static int __init nfc_init(void)
 	return 0;
 
 err_af_nfc:
+	nfc_llcp_exit();
+err_llcp_sock:
 	rawsock_exit();
 err_rawsock:
 	nfc_genl_exit();
@@ -638,6 +653,7 @@ err_genl:
 static void __exit nfc_exit(void)
 {
 	af_nfc_exit();
+	nfc_llcp_exit();
 	rawsock_exit();
 	nfc_genl_exit();
 	class_unregister(&nfc_class);
diff --git a/net/nfc/llcp/Kconfig b/net/nfc/llcp/Kconfig
new file mode 100644
index 000000000000..fbf5e8150908
--- /dev/null
+++ b/net/nfc/llcp/Kconfig
@@ -0,0 +1,7 @@
+config NFC_LLCP
+       depends on NFC && EXPERIMENTAL
+       bool "NFC LLCP support (EXPERIMENTAL)"
+       default n
+       help
+	 Say Y here if you want to build support for a kernel NFC LLCP
+	 implementation.
\ No newline at end of file
diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c
new file mode 100644
index 000000000000..151f2ef429c4
--- /dev/null
+++ b/net/nfc/llcp/commands.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright (C) 2011  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#define pr_fmt(fmt) "llcp: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nfc.h>
+
+#include <net/nfc/nfc.h>
+
+#include "../nfc.h"
+#include "llcp.h"
+
+static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
+	0,
+	1, /* VERSION */
+	2, /* MIUX */
+	2, /* WKS */
+	1, /* LTO */
+	1, /* RW */
+	0, /* SN */
+	1, /* OPT */
+	0, /* SDREQ */
+	2, /* SDRES */
+
+};
+
+static u8 llcp_tlv8(u8 *tlv, u8 type)
+{
+	if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
+		return 0;
+
+	return tlv[2];
+}
+
+static u8 llcp_tlv16(u8 *tlv, u8 type)
+{
+	if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
+		return 0;
+
+	return be16_to_cpu(*((__be16 *)(tlv + 2)));
+}
+
+
+static u8 llcp_tlv_version(u8 *tlv)
+{
+	return llcp_tlv8(tlv, LLCP_TLV_VERSION);
+}
+
+static u16 llcp_tlv_miux(u8 *tlv)
+{
+	return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7f;
+}
+
+static u16 llcp_tlv_wks(u8 *tlv)
+{
+	return llcp_tlv16(tlv, LLCP_TLV_WKS);
+}
+
+static u16 llcp_tlv_lto(u8 *tlv)
+{
+	return llcp_tlv8(tlv, LLCP_TLV_LTO);
+}
+
+static u8 llcp_tlv_opt(u8 *tlv)
+{
+	return llcp_tlv8(tlv, LLCP_TLV_OPT);
+}
+
+static u8 llcp_tlv_rw(u8 *tlv)
+{
+	return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf;
+}
+
+u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
+{
+	u8 *tlv, length;
+
+	pr_debug("type %d\n", type);
+
+	if (type >= LLCP_TLV_MAX)
+		return NULL;
+
+	length = llcp_tlv_length[type];
+	if (length == 0 && value_length == 0)
+		return NULL;
+	else
+		length = value_length;
+
+	*tlv_length = 2 + length;
+	tlv = kzalloc(2 + length, GFP_KERNEL);
+	if (tlv == NULL)
+		return tlv;
+
+	tlv[0] = type;
+	tlv[1] = length;
+	memcpy(tlv + 2, value, length);
+
+	return tlv;
+}
+
+int nfc_llcp_parse_tlv(struct nfc_llcp_local *local,
+			u8 *tlv_array, u16 tlv_array_len)
+{
+	u8 *tlv = tlv_array, type, length, offset = 0;
+
+	pr_debug("TLV array length %d\n", tlv_array_len);
+
+	if (local == NULL)
+		return -ENODEV;
+
+	while (offset < tlv_array_len) {
+		type = tlv[0];
+		length = tlv[1];
+
+		pr_debug("type 0x%x length %d\n", type, length);
+
+		switch (type) {
+		case LLCP_TLV_VERSION:
+			local->remote_version = llcp_tlv_version(tlv);
+			break;
+		case LLCP_TLV_MIUX:
+			local->remote_miu = llcp_tlv_miux(tlv) + 128;
+			break;
+		case LLCP_TLV_WKS:
+			local->remote_wks = llcp_tlv_wks(tlv);
+			break;
+		case LLCP_TLV_LTO:
+			local->remote_lto = llcp_tlv_lto(tlv) * 10;
+			break;
+		case LLCP_TLV_OPT:
+			local->remote_opt = llcp_tlv_opt(tlv);
+			break;
+		case LLCP_TLV_RW:
+			local->remote_rw = llcp_tlv_rw(tlv);
+			break;
+		default:
+			pr_err("Invalid gt tlv value 0x%x\n", type);
+			break;
+		}
+
+		offset += length + 2;
+		tlv += length + 2;
+	}
+
+	pr_debug("version 0x%x miu %d lto %d opt 0x%x wks 0x%x rw %d\n",
+		local->remote_version, local->remote_miu,
+		local->remote_lto, local->remote_opt,
+		local->remote_wks, local->remote_rw);
+
+	return 0;
+}
+
+static struct sk_buff *llcp_add_header(struct sk_buff *pdu,
+					u8 dsap, u8 ssap, u8 ptype)
+{
+	u8 header[2];
+
+	pr_debug("ptype 0x%x dsap 0x%x ssap 0x%x\n", ptype, dsap, ssap);
+
+	header[0] = (u8)((dsap << 2) | (ptype >> 2));
+	header[1] = (u8)((ptype << 6) | ssap);
+
+	pr_debug("header 0x%x 0x%x\n", header[0], header[1]);
+
+	memcpy(skb_put(pdu, LLCP_HEADER_SIZE), header, LLCP_HEADER_SIZE);
+
+	return pdu;
+}
+
+static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv, u8 tlv_length)
+{
+	/* XXX Add an skb length check */
+
+	if (tlv == NULL)
+		return NULL;
+
+	memcpy(skb_put(pdu, tlv_length), tlv, tlv_length);
+
+	return pdu;
+}
+
+static struct sk_buff *llcp_allocate_pdu(struct nfc_llcp_sock *sock,
+							u8 cmd, u16 size)
+{
+	struct sk_buff *skb;
+	int err;
+
+	if (sock->ssap == 0)
+		return NULL;
+
+	skb = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT,
+					size + LLCP_HEADER_SIZE, &err);
+	if (skb == NULL) {
+		pr_err("Could not allocate PDU\n");
+		return NULL;
+	}
+
+	skb = llcp_add_header(skb, sock->dsap, sock->ssap, cmd);
+
+	return skb;
+}
+
+int nfc_llcp_disconnect(struct nfc_llcp_sock *sock)
+{
+	struct sk_buff *skb;
+	struct nfc_dev *dev;
+	struct nfc_llcp_local *local;
+	u16 size = 0;
+
+	pr_debug("Sending DISC\n");
+
+	local = sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	dev = sock->dev;
+	if (dev == NULL)
+		return -ENODEV;
+
+	size += LLCP_HEADER_SIZE;
+	size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
+
+	skb = llcp_add_header(skb, sock->ssap, sock->dsap, LLCP_PDU_DISC);
+
+	skb_queue_tail(&local->tx_queue, skb);
+
+	return 0;
+}
+
+int nfc_llcp_send_symm(struct nfc_dev *dev)
+{
+	struct sk_buff *skb;
+	struct nfc_llcp_local *local;
+	u16 size = 0;
+
+	pr_debug("Sending SYMM\n");
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL)
+		return -ENODEV;
+
+	size += LLCP_HEADER_SIZE;
+	size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
+
+	skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM);
+
+	return nfc_data_exchange(dev, local->target_idx, skb,
+					nfc_llcp_recv, local);
+}
+
+int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
+{
+	struct nfc_llcp_local *local;
+	struct sk_buff *skb;
+	u8 *service_name_tlv = NULL, service_name_tlv_length;
+	int err;
+	u16 size = 0;
+
+	pr_debug("Sending CONNECT\n");
+
+	local = sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	if (sock->service_name != NULL) {
+		service_name_tlv = nfc_llcp_build_tlv(LLCP_TLV_SN,
+					sock->service_name,
+					sock->service_name_len,
+					&service_name_tlv_length);
+		size += service_name_tlv_length;
+	}
+
+	pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);
+
+	skb = llcp_allocate_pdu(sock, LLCP_PDU_CONNECT, size);
+	if (skb == NULL) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
+
+	if (service_name_tlv != NULL)
+		skb = llcp_add_tlv(skb, service_name_tlv,
+					service_name_tlv_length);
+
+	skb_queue_tail(&local->tx_queue, skb);
+
+	return 0;
+
+error_tlv:
+	pr_err("error %d\n", err);
+
+	kfree(service_name_tlv);
+
+	return err;
+}
+
+int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
+{
+	struct nfc_llcp_local *local;
+	struct sk_buff *skb;
+
+	pr_debug("Sending CC\n");
+
+	local = sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, 0);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_queue_tail(&local->tx_queue, skb);
+
+	return 0;
+}
+
+int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason)
+{
+	struct sk_buff *skb;
+	struct nfc_dev *dev;
+	u16 size = 1; /* Reason code */
+
+	pr_debug("Sending DM reason 0x%x\n", reason);
+
+	if (local == NULL)
+		return -ENODEV;
+
+	dev = local->dev;
+	if (dev == NULL)
+		return -ENODEV;
+
+	size += LLCP_HEADER_SIZE;
+	size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
+
+	skb = llcp_add_header(skb, ssap, dsap, LLCP_PDU_DM);
+
+	memcpy(skb_put(skb, 1), &reason, 1);
+
+	skb_queue_head(&local->tx_queue, skb);
+
+	return 0;
+}
+
+int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock)
+{
+	struct sk_buff *skb;
+	struct nfc_llcp_local *local;
+
+	pr_debug("Send DISC\n");
+
+	local = sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	skb = llcp_allocate_pdu(sock, LLCP_PDU_DISC, 0);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_queue_head(&local->tx_queue, skb);
+
+	return 0;
+}
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
new file mode 100644
index 000000000000..67756b23eac5
--- /dev/null
+++ b/net/nfc/llcp/llcp.c
@@ -0,0 +1,973 @@
+/*
+ * Copyright (C) 2011  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#define pr_fmt(fmt) "llcp: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/nfc.h>
+
+#include "../nfc.h"
+#include "llcp.h"
+
+static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};
+
+static struct list_head llcp_devices;
+
+static void nfc_llcp_socket_release(struct nfc_llcp_local *local)
+{
+	struct nfc_llcp_sock *parent, *s, *n;
+	struct sock *sk, *parent_sk;
+	int i;
+
+
+	mutex_lock(&local->socket_lock);
+
+	for (i = 0; i < LLCP_MAX_SAP; i++) {
+		parent = local->sockets[i];
+		if (parent == NULL)
+			continue;
+
+		/* Release all child sockets */
+		list_for_each_entry_safe(s, n, &parent->list, list) {
+			list_del(&s->list);
+			sk = &s->sk;
+
+			lock_sock(sk);
+
+			if (sk->sk_state == LLCP_CONNECTED)
+				nfc_put_device(s->dev);
+
+			sk->sk_state = LLCP_CLOSED;
+			sock_set_flag(sk, SOCK_DEAD);
+
+			release_sock(sk);
+		}
+
+		parent_sk = &parent->sk;
+
+		lock_sock(parent_sk);
+
+		if (parent_sk->sk_state == LLCP_LISTEN) {
+			struct nfc_llcp_sock *lsk, *n;
+			struct sock *accept_sk;
+
+			list_for_each_entry_safe(lsk, n, &parent->accept_queue,
+								accept_queue) {
+				accept_sk = &lsk->sk;
+				lock_sock(accept_sk);
+
+				nfc_llcp_accept_unlink(accept_sk);
+
+				accept_sk->sk_state = LLCP_CLOSED;
+				sock_set_flag(accept_sk, SOCK_DEAD);
+
+				release_sock(accept_sk);
+
+				sock_orphan(accept_sk);
+			}
+		}
+
+		if (parent_sk->sk_state == LLCP_CONNECTED)
+			nfc_put_device(parent->dev);
+
+		parent_sk->sk_state = LLCP_CLOSED;
+		sock_set_flag(parent_sk, SOCK_DEAD);
+
+		release_sock(parent_sk);
+	}
+
+	mutex_unlock(&local->socket_lock);
+}
+
+static void nfc_llcp_timeout_work(struct work_struct *work)
+{
+	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+							timeout_work);
+
+	nfc_dep_link_down(local->dev);
+}
+
+static void nfc_llcp_symm_timer(unsigned long data)
+{
+	struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+
+	pr_err("SYMM timeout\n");
+
+	queue_work(local->timeout_wq, &local->timeout_work);
+}
+
+struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
+{
+	struct nfc_llcp_local *local, *n;
+
+	list_for_each_entry_safe(local, n, &llcp_devices, list)
+		if (local->dev == dev)
+			return local;
+
+	pr_debug("No device found\n");
+
+	return NULL;
+}
+
+static char *wks[] = {
+	NULL,
+	NULL, /* SDP */
+	"urn:nfc:sn:ip",
+	"urn:nfc:sn:obex",
+	"urn:nfc:sn:snep",
+};
+
+static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
+{
+	int sap, num_wks;
+
+	pr_debug("%s\n", service_name);
+
+	if (service_name == NULL)
+		return -EINVAL;
+
+	num_wks = ARRAY_SIZE(wks);
+
+	for (sap = 0 ; sap < num_wks; sap++) {
+		if (wks[sap] == NULL)
+			continue;
+
+		if (strncmp(wks[sap], service_name, service_name_len) == 0)
+			return sap;
+	}
+
+	return -EINVAL;
+}
+
+u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
+				struct nfc_llcp_sock *sock)
+{
+	mutex_lock(&local->sdp_lock);
+
+	if (sock->service_name != NULL && sock->service_name_len > 0) {
+		int ssap = nfc_llcp_wks_sap(sock->service_name,
+						sock->service_name_len);
+
+		if (ssap > 0) {
+			pr_debug("WKS %d\n", ssap);
+
+			/* This is a WKS, let's check if it's free */
+			if (local->local_wks & BIT(ssap)) {
+				mutex_unlock(&local->sdp_lock);
+
+				return LLCP_SAP_MAX;
+			}
+
+			set_bit(BIT(ssap), &local->local_wks);
+			mutex_unlock(&local->sdp_lock);
+
+			return ssap;
+		}
+
+		/*
+		 * This is not a well known service,
+		 * we should try to find a local SDP free spot
+		 */
+		ssap = find_first_zero_bit(&local->local_sdp, LLCP_SDP_NUM_SAP);
+		if (ssap == LLCP_SDP_NUM_SAP) {
+			mutex_unlock(&local->sdp_lock);
+
+			return LLCP_SAP_MAX;
+		}
+
+		pr_debug("SDP ssap %d\n", LLCP_WKS_NUM_SAP + ssap);
+
+		set_bit(BIT(ssap), &local->local_sdp);
+		mutex_unlock(&local->sdp_lock);
+
+		return LLCP_WKS_NUM_SAP + ssap;
+
+	} else if (sock->ssap != 0) {
+		if (sock->ssap < LLCP_WKS_NUM_SAP) {
+			if (!(local->local_wks & BIT(sock->ssap))) {
+				set_bit(BIT(sock->ssap), &local->local_wks);
+				mutex_unlock(&local->sdp_lock);
+
+				return sock->ssap;
+			}
+
+		} else if (sock->ssap < LLCP_SDP_NUM_SAP) {
+			if (!(local->local_sdp &
+				BIT(sock->ssap - LLCP_WKS_NUM_SAP))) {
+				set_bit(BIT(sock->ssap - LLCP_WKS_NUM_SAP),
+							&local->local_sdp);
+				mutex_unlock(&local->sdp_lock);
+
+				return sock->ssap;
+			}
+		}
+	}
+
+	mutex_unlock(&local->sdp_lock);
+
+	return LLCP_SAP_MAX;
+}
+
+u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local)
+{
+	u8 local_ssap;
+
+	mutex_lock(&local->sdp_lock);
+
+	local_ssap = find_first_zero_bit(&local->local_sap, LLCP_LOCAL_NUM_SAP);
+	if (local_ssap == LLCP_LOCAL_NUM_SAP) {
+		mutex_unlock(&local->sdp_lock);
+		return LLCP_SAP_MAX;
+	}
+
+	set_bit(BIT(local_ssap), &local->local_sap);
+
+	mutex_unlock(&local->sdp_lock);
+
+	return local_ssap + LLCP_LOCAL_SAP_OFFSET;
+}
+
+void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap)
+{
+	u8 local_ssap;
+	unsigned long *sdp;
+
+	if (ssap < LLCP_WKS_NUM_SAP) {
+		local_ssap = ssap;
+		sdp = &local->local_wks;
+	} else if (ssap < LLCP_LOCAL_NUM_SAP) {
+		local_ssap = ssap - LLCP_WKS_NUM_SAP;
+		sdp = &local->local_sdp;
+	} else if (ssap < LLCP_MAX_SAP) {
+		local_ssap = ssap - LLCP_LOCAL_NUM_SAP;
+		sdp = &local->local_sap;
+	} else {
+		return;
+	}
+
+	mutex_lock(&local->sdp_lock);
+
+	clear_bit(1 << local_ssap, sdp);
+
+	mutex_unlock(&local->sdp_lock);
+}
+
+u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, u8 *general_bytes_len)
+{
+	struct nfc_llcp_local *local;
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL) {
+		*general_bytes_len = 0;
+		return NULL;
+	}
+
+	*general_bytes_len = local->gb_len;
+
+	return local->gb;
+}
+
+static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
+{
+	u8 *gb_cur, *version_tlv, version, version_length;
+	u8 *lto_tlv, lto, lto_length;
+	u8 *wks_tlv, wks_length;
+	u8 gb_len = 0;
+
+	version = LLCP_VERSION_11;
+	version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version,
+							1, &version_length);
+	gb_len += version_length;
+
+	/* 1500 ms */
+	lto = 150;
+	lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &lto, 1, &lto_length);
+	gb_len += lto_length;
+
+	pr_debug("Local wks 0x%lx\n", local->local_wks);
+	wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&local->local_wks, 2,
+								&wks_length);
+	gb_len += wks_length;
+
+	gb_len += ARRAY_SIZE(llcp_magic);
+
+	if (gb_len > NFC_MAX_GT_LEN) {
+		kfree(version_tlv);
+		return -EINVAL;
+	}
+
+	gb_cur = local->gb;
+
+	memcpy(gb_cur, llcp_magic, ARRAY_SIZE(llcp_magic));
+	gb_cur += ARRAY_SIZE(llcp_magic);
+
+	memcpy(gb_cur, version_tlv, version_length);
+	gb_cur += version_length;
+
+	memcpy(gb_cur, lto_tlv, lto_length);
+	gb_cur += lto_length;
+
+	memcpy(gb_cur, wks_tlv, wks_length);
+	gb_cur += wks_length;
+
+	kfree(version_tlv);
+	kfree(lto_tlv);
+
+	local->gb_len = gb_len;
+
+	return 0;
+}
+
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+{
+	struct nfc_llcp_local *local = nfc_llcp_find_local(dev);
+
+	if (local == NULL) {
+		pr_err("No LLCP device\n");
+		return -ENODEV;
+	}
+
+	memset(local->remote_gb, 0, NFC_MAX_GT_LEN);
+	memcpy(local->remote_gb, gb, gb_len);
+	local->remote_gb_len = gb_len;
+
+	if (local->remote_gb == NULL ||
+			local->remote_gb_len == 0)
+		return -ENODEV;
+
+	if (memcmp(local->remote_gb, llcp_magic, 3)) {
+		pr_err("MAC does not support LLCP\n");
+		return -EINVAL;
+	}
+
+	return nfc_llcp_parse_tlv(local,
+			&local->remote_gb[3], local->remote_gb_len - 3);
+}
+
+static void nfc_llcp_tx_work(struct work_struct *work)
+{
+	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+							tx_work);
+	struct sk_buff *skb;
+
+	skb = skb_dequeue(&local->tx_queue);
+	if (skb != NULL) {
+		pr_debug("Sending pending skb\n");
+		nfc_data_exchange(local->dev, local->target_idx,
+					skb, nfc_llcp_recv, local);
+	} else {
+		nfc_llcp_send_symm(local->dev);
+	}
+
+	mod_timer(&local->link_timer,
+			jiffies + msecs_to_jiffies(local->remote_lto));
+}
+
+static u8 nfc_llcp_dsap(struct sk_buff *pdu)
+{
+	return (pdu->data[0] & 0xfc) >> 2;
+}
+
+static u8 nfc_llcp_ptype(struct sk_buff *pdu)
+{
+	return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6);
+}
+
+static u8 nfc_llcp_ssap(struct sk_buff *pdu)
+{
+	return pdu->data[1] & 0x3f;
+}
+
+static u8 nfc_llcp_ns(struct sk_buff *pdu)
+{
+	return pdu->data[2] >> 4;
+}
+
+static u8 nfc_llcp_nr(struct sk_buff *pdu)
+{
+	return pdu->data[2] & 0xf;
+}
+
+static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu)
+{
+	pdu->data[2] = (sock->send_n << 4) | ((sock->recv_n - 1) % 16);
+	sock->send_n = (sock->send_n + 1) % 16;
+	sock->recv_ack_n = (sock->recv_n - 1) % 16;
+}
+
+static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
+						u8 ssap, u8 dsap)
+{
+	struct nfc_llcp_sock *sock, *llcp_sock, *n;
+
+	if (ssap == 0 && dsap == 0)
+		return NULL;
+
+	mutex_lock(&local->socket_lock);
+	sock = local->sockets[ssap];
+	if (sock == NULL) {
+		mutex_unlock(&local->socket_lock);
+		return NULL;
+	}
+
+	pr_debug("root dsap %d (%d)\n", sock->dsap, dsap);
+
+	if (sock->dsap == dsap) {
+		sock_hold(&sock->sk);
+		mutex_unlock(&local->socket_lock);
+		return sock;
+	}
+
+	list_for_each_entry_safe(llcp_sock, n, &sock->list, list) {
+		pr_debug("llcp_sock %p sk %p dsap %d\n", llcp_sock,
+				&llcp_sock->sk, llcp_sock->dsap);
+		if (llcp_sock->dsap == dsap) {
+			sock_hold(&llcp_sock->sk);
+			mutex_unlock(&local->socket_lock);
+			return llcp_sock;
+		}
+	}
+
+	pr_err("Could not find socket for %d %d\n", ssap, dsap);
+
+	mutex_unlock(&local->socket_lock);
+
+	return NULL;
+}
+
+static void nfc_llcp_sock_put(struct nfc_llcp_sock *sock)
+{
+	sock_put(&sock->sk);
+}
+
+static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len)
+{
+	u8 *tlv = &skb->data[2], type, length;
+	size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0;
+
+	while (offset < tlv_array_len) {
+		type = tlv[0];
+		length = tlv[1];
+
+		pr_debug("type 0x%x length %d\n", type, length);
+
+		if (type == LLCP_TLV_SN) {
+			*sn_len = length;
+			return &tlv[2];
+		}
+
+		offset += length + 2;
+		tlv += length + 2;
+	}
+
+	return NULL;
+}
+
+static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
+				struct sk_buff *skb)
+{
+	struct sock *new_sk, *parent;
+	struct nfc_llcp_sock *sock, *new_sock;
+	u8 dsap, ssap, bound_sap, reason;
+
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+
+	pr_debug("%d %d\n", dsap, ssap);
+
+	nfc_llcp_parse_tlv(local, &skb->data[LLCP_HEADER_SIZE],
+				skb->len - LLCP_HEADER_SIZE);
+
+	if (dsap != LLCP_SAP_SDP) {
+		bound_sap = dsap;
+
+		mutex_lock(&local->socket_lock);
+		sock = local->sockets[dsap];
+		if (sock == NULL) {
+			mutex_unlock(&local->socket_lock);
+			reason = LLCP_DM_NOBOUND;
+			goto fail;
+		}
+
+		sock_hold(&sock->sk);
+		mutex_unlock(&local->socket_lock);
+
+		lock_sock(&sock->sk);
+
+		if (sock->dsap == LLCP_SAP_SDP &&
+				sock->sk.sk_state == LLCP_LISTEN)
+			goto enqueue;
+	} else {
+		u8 *sn;
+		size_t sn_len;
+
+		sn = nfc_llcp_connect_sn(skb, &sn_len);
+		if (sn == NULL) {
+			reason = LLCP_DM_NOBOUND;
+			goto fail;
+		}
+
+		pr_debug("Service name length %zu\n", sn_len);
+
+		mutex_lock(&local->socket_lock);
+		for (bound_sap = 0; bound_sap < LLCP_LOCAL_SAP_OFFSET;
+								bound_sap++) {
+			sock = local->sockets[bound_sap];
+			if (sock == NULL)
+				continue;
+
+			if (sock->service_name == NULL ||
+				sock->service_name_len == 0)
+					continue;
+
+			if (sock->service_name_len != sn_len)
+				continue;
+
+			if (sock->dsap == LLCP_SAP_SDP &&
+					sock->sk.sk_state == LLCP_LISTEN &&
+					!memcmp(sn, sock->service_name, sn_len)) {
+				pr_debug("Found service name at SAP %d\n",
+								bound_sap);
+				sock_hold(&sock->sk);
+				mutex_unlock(&local->socket_lock);
+
+				lock_sock(&sock->sk);
+
+				goto enqueue;
+			}
+		}
+
+	}
+
+	mutex_unlock(&local->socket_lock);
+
+	reason = LLCP_DM_NOBOUND;
+	goto fail;
+
+enqueue:
+	parent = &sock->sk;
+
+	if (sk_acceptq_is_full(parent)) {
+		reason = LLCP_DM_REJ;
+		release_sock(&sock->sk);
+		sock_put(&sock->sk);
+		goto fail;
+	}
+
+	new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type,
+				     GFP_ATOMIC);
+	if (new_sk == NULL) {
+		reason = LLCP_DM_REJ;
+		release_sock(&sock->sk);
+		sock_put(&sock->sk);
+		goto fail;
+	}
+
+	new_sock = nfc_llcp_sock(new_sk);
+	new_sock->dev = local->dev;
+	new_sock->local = local;
+	new_sock->nfc_protocol = sock->nfc_protocol;
+	new_sock->ssap = bound_sap;
+	new_sock->dsap = ssap;
+	new_sock->parent = parent;
+
+	pr_debug("new sock %p sk %p\n", new_sock, &new_sock->sk);
+
+	list_add_tail(&new_sock->list, &sock->list);
+
+	nfc_llcp_accept_enqueue(&sock->sk, new_sk);
+
+	nfc_get_device(local->dev->idx);
+
+	new_sk->sk_state = LLCP_CONNECTED;
+
+	/* Wake the listening processes */
+	parent->sk_data_ready(parent, 0);
+
+	/* Send CC */
+	nfc_llcp_send_cc(new_sock);
+
+	release_sock(&sock->sk);
+	sock_put(&sock->sk);
+
+	return;
+
+fail:
+	/* Send DM */
+	nfc_llcp_send_dm(local, dsap, ssap, reason);
+
+	return;
+
+}
+
+static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local,
+				struct sk_buff *skb)
+{
+	struct nfc_llcp_sock *llcp_sock;
+	struct sock *sk;
+	u8 dsap, ssap, ptype, ns, nr;
+
+	ptype = nfc_llcp_ptype(skb);
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+	ns = nfc_llcp_ns(skb);
+	nr = nfc_llcp_nr(skb);
+
+	pr_debug("%d %d R %d S %d\n", dsap, ssap, nr, ns);
+
+	llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
+	if (llcp_sock == NULL) {
+		nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
+		return;
+	}
+
+	sk = &llcp_sock->sk;
+	lock_sock(sk);
+	if (sk->sk_state == LLCP_CLOSED) {
+		release_sock(sk);
+		nfc_llcp_sock_put(llcp_sock);
+	}
+
+	if (ns == llcp_sock->recv_n)
+		llcp_sock->recv_n = (llcp_sock->recv_n + 1) % 16;
+	else
+		pr_err("Received out of sequence I PDU\n");
+
+	/* Pass the payload upstream */
+	if (ptype == LLCP_PDU_I) {
+		pr_debug("I frame, queueing on %p\n", &llcp_sock->sk);
+
+		skb_pull(skb, LLCP_HEADER_SIZE + LLCP_SEQUENCE_SIZE);
+		if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) {
+			pr_err("receive queue is full\n");
+			skb_queue_head(&llcp_sock->tx_backlog_queue, skb);
+		}
+	}
+
+	/* Remove skbs from the pending queue */
+	if (llcp_sock->send_ack_n != nr) {
+		struct sk_buff *s, *tmp;
+
+		llcp_sock->send_ack_n = nr;
+
+		skb_queue_walk_safe(&llcp_sock->tx_pending_queue, s, tmp)
+			if (nfc_llcp_ns(s) <= nr) {
+				skb_unlink(s, &llcp_sock->tx_pending_queue);
+				kfree_skb(s);
+			}
+	}
+
+	/* Queue some I frames for transmission */
+	while (llcp_sock->remote_ready &&
+		skb_queue_len(&llcp_sock->tx_pending_queue) <= local->remote_rw) {
+		struct sk_buff *pdu, *pending_pdu;
+
+		pdu = skb_dequeue(&llcp_sock->tx_queue);
+		if (pdu == NULL)
+			break;
+
+		/* Update N(S)/N(R) */
+		nfc_llcp_set_nrns(llcp_sock, pdu);
+
+		pending_pdu = skb_clone(pdu, GFP_KERNEL);
+
+		skb_queue_tail(&local->tx_queue, pdu);
+		skb_queue_tail(&llcp_sock->tx_pending_queue, pending_pdu);
+	}
+
+	release_sock(sk);
+	nfc_llcp_sock_put(llcp_sock);
+}
+
+static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
+				struct sk_buff *skb)
+{
+	struct nfc_llcp_sock *llcp_sock;
+	struct sock *sk;
+	u8 dsap, ssap;
+
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+
+	llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
+	if (llcp_sock == NULL) {
+		nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
+		return;
+	}
+
+	sk = &llcp_sock->sk;
+	lock_sock(sk);
+	if (sk->sk_state == LLCP_CLOSED) {
+		release_sock(sk);
+		nfc_llcp_sock_put(llcp_sock);
+	}
+
+
+	if (sk->sk_state == LLCP_CONNECTED) {
+		nfc_put_device(local->dev);
+		sk->sk_state = LLCP_CLOSED;
+		sk->sk_state_change(sk);
+	}
+
+	nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_DISC);
+
+	release_sock(sk);
+	nfc_llcp_sock_put(llcp_sock);
+}
+
+static void nfc_llcp_recv_cc(struct nfc_llcp_local *local,
+				struct sk_buff *skb)
+{
+	struct nfc_llcp_sock *llcp_sock;
+	u8 dsap, ssap;
+
+
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+
+	llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
+
+	if (llcp_sock == NULL)
+		llcp_sock = nfc_llcp_sock_get(local, dsap, LLCP_SAP_SDP);
+
+	if (llcp_sock == NULL) {
+		pr_err("Invalid CC\n");
+		nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
+
+		return;
+	}
+
+	llcp_sock->dsap = ssap;
+
+	nfc_llcp_parse_tlv(local, &skb->data[LLCP_HEADER_SIZE],
+				skb->len - LLCP_HEADER_SIZE);
+
+	nfc_llcp_sock_put(llcp_sock);
+}
+
+static void nfc_llcp_rx_work(struct work_struct *work)
+{
+	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+								rx_work);
+	u8 dsap, ssap, ptype;
+	struct sk_buff *skb;
+
+	skb = local->rx_pending;
+	if (skb == NULL) {
+		pr_debug("No pending SKB\n");
+		return;
+	}
+
+	ptype = nfc_llcp_ptype(skb);
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+
+	pr_debug("ptype 0x%x dsap 0x%x ssap 0x%x\n", ptype, dsap, ssap);
+
+	switch (ptype) {
+	case LLCP_PDU_SYMM:
+		pr_debug("SYMM\n");
+		break;
+
+	case LLCP_PDU_CONNECT:
+		pr_debug("CONNECT\n");
+		nfc_llcp_recv_connect(local, skb);
+		break;
+
+	case LLCP_PDU_DISC:
+		pr_debug("DISC\n");
+		nfc_llcp_recv_disc(local, skb);
+		break;
+
+	case LLCP_PDU_CC:
+		pr_debug("CC\n");
+		nfc_llcp_recv_cc(local, skb);
+		break;
+
+	case LLCP_PDU_I:
+	case LLCP_PDU_RR:
+		pr_debug("I frame\n");
+		nfc_llcp_recv_hdlc(local, skb);
+		break;
+
+	}
+
+	queue_work(local->tx_wq, &local->tx_work);
+	kfree_skb(local->rx_pending);
+	local->rx_pending = NULL;
+
+	return;
+}
+
+void nfc_llcp_recv(void *data, struct sk_buff *skb, int err)
+{
+	struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+
+	pr_debug("Received an LLCP PDU\n");
+	if (err < 0) {
+		pr_err("err %d", err);
+		return;
+	}
+
+	local->rx_pending = skb_get(skb);
+	del_timer(&local->link_timer);
+	queue_work(local->rx_wq, &local->rx_work);
+
+	return;
+}
+
+void nfc_llcp_mac_is_down(struct nfc_dev *dev)
+{
+	struct nfc_llcp_local *local;
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL)
+		return;
+
+	/* Close and purge all existing sockets */
+	nfc_llcp_socket_release(local);
+}
+
+void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
+			u8 comm_mode, u8 rf_mode)
+{
+	struct nfc_llcp_local *local;
+
+	pr_debug("rf mode %d\n", rf_mode);
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL)
+		return;
+
+	local->target_idx = target_idx;
+	local->comm_mode = comm_mode;
+	local->rf_mode = rf_mode;
+
+	if (rf_mode == NFC_RF_INITIATOR) {
+		pr_debug("Queueing Tx work\n");
+
+		queue_work(local->tx_wq, &local->tx_work);
+	} else {
+		mod_timer(&local->link_timer,
+			jiffies + msecs_to_jiffies(local->remote_lto));
+	}
+}
+
+int nfc_llcp_register_device(struct nfc_dev *ndev)
+{
+	struct device *dev = &ndev->dev;
+	struct nfc_llcp_local *local;
+	char name[32];
+	int err;
+
+	local = kzalloc(sizeof(struct nfc_llcp_local), GFP_KERNEL);
+	if (local == NULL)
+		return -ENOMEM;
+
+	local->dev = ndev;
+	INIT_LIST_HEAD(&local->list);
+	mutex_init(&local->sdp_lock);
+	mutex_init(&local->socket_lock);
+	init_timer(&local->link_timer);
+	local->link_timer.data = (unsigned long) local;
+	local->link_timer.function = nfc_llcp_symm_timer;
+
+	skb_queue_head_init(&local->tx_queue);
+	INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
+	snprintf(name, sizeof(name), "%s_llcp_tx_wq", dev_name(dev));
+	local->tx_wq = alloc_workqueue(name,
+			WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+	if (local->tx_wq == NULL) {
+		err = -ENOMEM;
+		goto err_local;
+	}
+
+	local->rx_pending = NULL;
+	INIT_WORK(&local->rx_work, nfc_llcp_rx_work);
+	snprintf(name, sizeof(name), "%s_llcp_rx_wq", dev_name(dev));
+	local->rx_wq = alloc_workqueue(name,
+			WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+	if (local->rx_wq == NULL) {
+		err = -ENOMEM;
+		goto err_tx_wq;
+	}
+
+	INIT_WORK(&local->timeout_work, nfc_llcp_timeout_work);
+	snprintf(name, sizeof(name), "%s_llcp_timeout_wq", dev_name(dev));
+	local->timeout_wq = alloc_workqueue(name,
+			WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+	if (local->timeout_wq == NULL) {
+		err = -ENOMEM;
+		goto err_rx_wq;
+	}
+
+	nfc_llcp_build_gb(local);
+
+	local->remote_miu = LLCP_DEFAULT_MIU;
+	local->remote_lto = LLCP_DEFAULT_LTO;
+	local->remote_rw = LLCP_DEFAULT_RW;
+
+	list_add(&llcp_devices, &local->list);
+
+	return 0;
+
+err_rx_wq:
+	destroy_workqueue(local->rx_wq);
+
+err_tx_wq:
+	destroy_workqueue(local->tx_wq);
+
+err_local:
+	kfree(local);
+
+	return 0;
+}
+
+void nfc_llcp_unregister_device(struct nfc_dev *dev)
+{
+	struct nfc_llcp_local *local = nfc_llcp_find_local(dev);
+
+	if (local == NULL) {
+		pr_debug("No such device\n");
+		return;
+	}
+
+	list_del(&local->list);
+	nfc_llcp_socket_release(local);
+	del_timer_sync(&local->link_timer);
+	skb_queue_purge(&local->tx_queue);
+	destroy_workqueue(local->tx_wq);
+	destroy_workqueue(local->rx_wq);
+	kfree(local->rx_pending);
+	kfree(local);
+}
+
+int __init nfc_llcp_init(void)
+{
+	INIT_LIST_HEAD(&llcp_devices);
+
+	return nfc_llcp_sock_init();
+}
+
+void nfc_llcp_exit(void)
+{
+	nfc_llcp_sock_exit();
+}
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h
new file mode 100644
index 000000000000..0ad2e3361584
--- /dev/null
+++ b/net/nfc/llcp/llcp.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2011  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+enum llcp_state {
+	LLCP_CONNECTED = 1, /* wait_for_packet() wants that */
+	LLCP_CLOSED,
+	LLCP_BOUND,
+	LLCP_LISTEN,
+};
+
+#define LLCP_DEFAULT_LTO 100
+#define LLCP_DEFAULT_RW  1
+#define LLCP_DEFAULT_MIU 128
+
+#define LLCP_WKS_NUM_SAP   16
+#define LLCP_SDP_NUM_SAP   16
+#define LLCP_LOCAL_NUM_SAP 32
+#define LLCP_LOCAL_SAP_OFFSET (LLCP_WKS_NUM_SAP + LLCP_SDP_NUM_SAP)
+#define LLCP_MAX_SAP (LLCP_WKS_NUM_SAP + LLCP_SDP_NUM_SAP + LLCP_LOCAL_NUM_SAP)
+
+struct nfc_llcp_sock;
+
+struct nfc_llcp_local {
+	struct list_head list;
+	struct nfc_dev *dev;
+
+	struct mutex sdp_lock;
+	struct mutex socket_lock;
+
+	struct timer_list link_timer;
+	struct sk_buff_head tx_queue;
+	struct workqueue_struct	*tx_wq;
+	struct work_struct	 tx_work;
+	struct workqueue_struct	*rx_wq;
+	struct work_struct	 rx_work;
+	struct sk_buff *rx_pending;
+	struct workqueue_struct	*timeout_wq;
+	struct work_struct	 timeout_work;
+
+	u32 target_idx;
+	u8 rf_mode;
+	u8 comm_mode;
+	unsigned long local_wks;      /* Well known services */
+	unsigned long local_sdp;      /* Local services  */
+	unsigned long local_sap; /* Local SAPs, not available for discovery */
+
+	/* local */
+	u8 gb[NFC_MAX_GT_LEN];
+	u8 gb_len;
+
+	/* remote */
+	u8 remote_gb[NFC_MAX_GT_LEN];
+	u8 remote_gb_len;
+
+	u8  remote_version;
+	u16 remote_miu;
+	u16 remote_lto;
+	u8  remote_opt;
+	u16 remote_wks;
+	u8  remote_rw;
+
+	/* sockets array */
+	struct nfc_llcp_sock *sockets[LLCP_MAX_SAP];
+};
+
+struct nfc_llcp_sock {
+	struct sock sk;
+	struct list_head list;
+	struct nfc_dev *dev;
+	struct nfc_llcp_local *local;
+	u32 target_idx;
+	u32 nfc_protocol;
+
+	u8 ssap;
+	u8 dsap;
+	char *service_name;
+	size_t service_name_len;
+
+	/* Link variables */
+	u8 send_n;
+	u8 send_ack_n;
+	u8 recv_n;
+	u8 recv_ack_n;
+
+	/* Is the remote peer ready to receive */
+	u8 remote_ready;
+
+	struct sk_buff_head tx_queue;
+	struct sk_buff_head tx_pending_queue;
+	struct sk_buff_head tx_backlog_queue;
+
+	struct list_head accept_queue;
+	struct sock *parent;
+};
+
+#define nfc_llcp_sock(sk) ((struct nfc_llcp_sock *) (sk))
+#define nfc_llcp_dev(sk)  (nfc_llcp_sock((sk))->dev)
+
+#define LLCP_HEADER_SIZE   2
+#define LLCP_SEQUENCE_SIZE 1
+
+/* LLCP versions: 1.1 is 1.0 plus SDP */
+#define LLCP_VERSION_10 0x10
+#define LLCP_VERSION_11 0x11
+
+/* LLCP PDU types */
+#define LLCP_PDU_SYMM     0x0
+#define LLCP_PDU_PAX      0x1
+#define LLCP_PDU_AGF      0x2
+#define LLCP_PDU_UI       0x3
+#define LLCP_PDU_CONNECT  0x4
+#define LLCP_PDU_DISC     0x5
+#define LLCP_PDU_CC       0x6
+#define LLCP_PDU_DM       0x7
+#define LLCP_PDU_FRMR     0x8
+#define LLCP_PDU_SNL      0x9
+#define LLCP_PDU_I        0xc
+#define LLCP_PDU_RR       0xd
+#define LLCP_PDU_RNR      0xe
+
+/* Parameters TLV types */
+#define LLCP_TLV_VERSION 0x1
+#define LLCP_TLV_MIUX    0x2
+#define LLCP_TLV_WKS     0x3
+#define LLCP_TLV_LTO     0x4
+#define LLCP_TLV_RW      0x5
+#define LLCP_TLV_SN      0x6
+#define LLCP_TLV_OPT     0x7
+#define LLCP_TLV_SDREQ   0x8
+#define LLCP_TLV_SDRES   0x9
+#define LLCP_TLV_MAX     0xa
+
+/* Well known LLCP SAP */
+#define LLCP_SAP_SDP   0x1
+#define LLCP_SAP_IP    0x2
+#define LLCP_SAP_OBEX  0x3
+#define LLCP_SAP_SNEP  0x4
+#define LLCP_SAP_MAX   0xff
+
+/* Disconnection reason code */
+#define LLCP_DM_DISC    0x00
+#define LLCP_DM_NOCONN  0x01
+#define LLCP_DM_NOBOUND 0x02
+#define LLCP_DM_REJ     0x03
+
+
+struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
+u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
+				struct nfc_llcp_sock *sock);
+u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local);
+void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap);
+
+/* Sock API */
+struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp);
+void nfc_llcp_sock_free(struct nfc_llcp_sock *sock);
+void nfc_llcp_accept_unlink(struct sock *sk);
+void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk);
+struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock);
+
+/* TLV API */
+int nfc_llcp_parse_tlv(struct nfc_llcp_local *local,
+			u8 *tlv_array, u16 tlv_array_len);
+
+/* Commands API */
+void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
+u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
+void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
+int nfc_llcp_disconnect(struct nfc_llcp_sock *sock);
+int nfc_llcp_send_symm(struct nfc_dev *dev);
+int nfc_llcp_send_connect(struct nfc_llcp_sock *sock);
+int nfc_llcp_send_cc(struct nfc_llcp_sock *sock);
+int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason);
+int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock);
+
+/* Socket API */
+int __init nfc_llcp_sock_init(void);
+void nfc_llcp_sock_exit(void);
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
new file mode 100644
index 000000000000..f738ccd535f1
--- /dev/null
+++ b/net/nfc/llcp/sock.c
@@ -0,0 +1,675 @@
+/*
+ * Copyright (C) 2011  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#define pr_fmt(fmt) "llcp: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nfc.h>
+
+#include "../nfc.h"
+#include "llcp.h"
+
+static struct proto llcp_sock_proto = {
+	.name     = "NFC_LLCP",
+	.owner    = THIS_MODULE,
+	.obj_size = sizeof(struct nfc_llcp_sock),
+};
+
+static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	struct nfc_llcp_local *local;
+	struct nfc_dev *dev;
+	struct sockaddr_nfc_llcp llcp_addr;
+	int len, ret = 0;
+
+	pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
+
+	if (!addr || addr->sa_family != AF_NFC)
+		return -EINVAL;
+
+	memset(&llcp_addr, 0, sizeof(llcp_addr));
+	len = min_t(unsigned int, sizeof(llcp_addr), alen);
+	memcpy(&llcp_addr, addr, len);
+
+	/* This is going to be a listening socket, dsap must be 0 */
+	if (llcp_addr.dsap != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != LLCP_CLOSED) {
+		ret = -EBADFD;
+		goto error;
+	}
+
+	dev = nfc_get_device(llcp_addr.dev_idx);
+	if (dev == NULL) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL) {
+		ret = -ENODEV;
+		goto put_dev;
+	}
+
+	llcp_sock->dev = dev;
+	llcp_sock->local = local;
+	llcp_sock->nfc_protocol = llcp_addr.nfc_protocol;
+	llcp_sock->service_name_len = min_t(unsigned int,
+			llcp_addr.service_name_len, NFC_LLCP_MAX_SERVICE_NAME);
+	llcp_sock->service_name = kmemdup(llcp_addr.service_name,
+				llcp_sock->service_name_len, GFP_KERNEL);
+
+	llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
+	if (llcp_sock->ssap == LLCP_MAX_SAP)
+		goto put_dev;
+
+	local->sockets[llcp_sock->ssap] = llcp_sock;
+
+	pr_debug("Socket bound to SAP %d\n", llcp_sock->ssap);
+
+	sk->sk_state = LLCP_BOUND;
+
+put_dev:
+	nfc_put_device(dev);
+
+error:
+	release_sock(sk);
+	return ret;
+}
+
+static int llcp_sock_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int ret = 0;
+
+	pr_debug("sk %p backlog %d\n", sk, backlog);
+
+	lock_sock(sk);
+
+	if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+			|| sk->sk_state != LLCP_BOUND) {
+		ret = -EBADFD;
+		goto error;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_ack_backlog = 0;
+
+	pr_debug("Socket listening\n");
+	sk->sk_state = LLCP_LISTEN;
+
+error:
+	release_sock(sk);
+
+	return ret;
+}
+
+void nfc_llcp_accept_unlink(struct sock *sk)
+{
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+
+	pr_debug("state %d\n", sk->sk_state);
+
+	list_del_init(&llcp_sock->accept_queue);
+	sk_acceptq_removed(llcp_sock->parent);
+	llcp_sock->parent = NULL;
+
+	sock_put(sk);
+}
+
+void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk)
+{
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	struct nfc_llcp_sock *llcp_sock_parent = nfc_llcp_sock(parent);
+
+	/* Lock will be free from unlink */
+	sock_hold(sk);
+
+	list_add_tail(&llcp_sock->accept_queue,
+			&llcp_sock_parent->accept_queue);
+	llcp_sock->parent = parent;
+	sk_acceptq_added(parent);
+}
+
+struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
+					struct socket *newsock)
+{
+	struct nfc_llcp_sock *lsk, *n, *llcp_parent;
+	struct sock *sk;
+
+	llcp_parent = nfc_llcp_sock(parent);
+
+	list_for_each_entry_safe(lsk, n, &llcp_parent->accept_queue,
+							accept_queue) {
+		sk = &lsk->sk;
+		lock_sock(sk);
+
+		if (sk->sk_state == LLCP_CLOSED) {
+			release_sock(sk);
+			nfc_llcp_accept_unlink(sk);
+			continue;
+		}
+
+		if (sk->sk_state == LLCP_CONNECTED || !newsock) {
+			nfc_llcp_accept_unlink(sk);
+			if (newsock)
+				sock_graft(sk, newsock);
+
+			release_sock(sk);
+
+			pr_debug("Returning sk state %d\n", sk->sk_state);
+
+			return sk;
+		}
+
+		release_sock(sk);
+	}
+
+	return NULL;
+}
+
+static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
+								int flags)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct sock *sk = sock->sk, *new_sk;
+	long timeo;
+	int ret = 0;
+
+	pr_debug("parent %p\n", sk);
+
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
+	if (sk->sk_state != LLCP_LISTEN) {
+		ret = -EBADFD;
+		goto error;
+	}
+
+	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+	/* Wait for an incoming connection. */
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
+	while (!(new_sk = nfc_llcp_accept_dequeue(sk, newsock))) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!timeo) {
+			ret = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			ret = sock_intr_errno(timeo);
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	if (ret)
+		goto error;
+
+	newsock->state = SS_CONNECTED;
+
+	pr_debug("new socket %p\n", new_sk);
+
+error:
+	release_sock(sk);
+
+	return ret;
+}
+
+static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr,
+			     int *len, int peer)
+{
+	struct sockaddr_nfc_llcp *llcp_addr = (struct sockaddr_nfc_llcp *) addr;
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+
+	pr_debug("%p\n", sk);
+
+	addr->sa_family = AF_NFC;
+	*len = sizeof(struct sockaddr_nfc_llcp);
+
+	llcp_addr->dev_idx = llcp_sock->dev->idx;
+	llcp_addr->dsap = llcp_sock->dsap;
+	llcp_addr->ssap = llcp_sock->ssap;
+	llcp_addr->service_name_len = llcp_sock->service_name_len;
+	memcpy(llcp_addr->service_name, llcp_sock->service_name,
+					llcp_addr->service_name_len);
+
+	return 0;
+}
+
+static inline unsigned int llcp_accept_poll(struct sock *parent)
+{
+	struct nfc_llcp_sock *llcp_sock, *n, *parent_sock;
+	struct sock *sk;
+
+	parent_sock = nfc_llcp_sock(parent);
+
+	list_for_each_entry_safe(llcp_sock, n, &parent_sock->accept_queue,
+								accept_queue) {
+		sk = &llcp_sock->sk;
+
+		if (sk->sk_state == LLCP_CONNECTED)
+			return POLLIN | POLLRDNORM;
+	}
+
+	return 0;
+}
+
+static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
+							poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask = 0;
+
+	pr_debug("%p\n", sk);
+
+	sock_poll_wait(file, sk_sleep(sk), wait);
+
+	if (sk->sk_state == LLCP_LISTEN)
+		return llcp_accept_poll(sk);
+
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		mask |= POLLIN;
+
+	if (sk->sk_state == LLCP_CLOSED)
+		mask |= POLLHUP;
+
+	return mask;
+}
+
+static int llcp_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_local *local;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+
+	if (!sk)
+		return 0;
+
+	pr_debug("%p\n", sk);
+
+	local = llcp_sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	mutex_lock(&local->socket_lock);
+
+	if (llcp_sock == local->sockets[llcp_sock->ssap]) {
+		local->sockets[llcp_sock->ssap] = NULL;
+	} else {
+		struct nfc_llcp_sock *parent, *s, *n;
+
+		parent = local->sockets[llcp_sock->ssap];
+
+		list_for_each_entry_safe(s, n, &parent->list, list)
+			if (llcp_sock == s) {
+				list_del(&s->list);
+				break;
+			}
+
+	}
+
+	mutex_unlock(&local->socket_lock);
+
+	lock_sock(sk);
+
+	/* Send a DISC */
+	if (sk->sk_state == LLCP_CONNECTED)
+		nfc_llcp_disconnect(llcp_sock);
+
+	if (sk->sk_state == LLCP_LISTEN) {
+		struct nfc_llcp_sock *lsk, *n;
+		struct sock *accept_sk;
+
+		list_for_each_entry_safe(lsk, n, &llcp_sock->accept_queue,
+								accept_queue) {
+			accept_sk = &lsk->sk;
+			lock_sock(accept_sk);
+
+			nfc_llcp_disconnect(lsk);
+			nfc_llcp_accept_unlink(accept_sk);
+
+			release_sock(accept_sk);
+
+			sock_set_flag(sk, SOCK_DEAD);
+			sock_orphan(accept_sk);
+			sock_put(accept_sk);
+		}
+	}
+
+	/* Freeing the SAP */
+	if ((sk->sk_state == LLCP_CONNECTED
+			&& llcp_sock->ssap > LLCP_LOCAL_SAP_OFFSET) ||
+	    sk->sk_state == LLCP_BOUND ||
+	    sk->sk_state == LLCP_LISTEN)
+		nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap);
+
+	sock_set_flag(sk, SOCK_DEAD);
+
+	release_sock(sk);
+
+	sock_orphan(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
+							int len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	struct sockaddr_nfc_llcp *addr = (struct sockaddr_nfc_llcp *)_addr;
+	struct nfc_dev *dev;
+	struct nfc_llcp_local *local;
+	int ret = 0;
+
+	pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags);
+
+	if (!addr || len < sizeof(struct sockaddr_nfc) ||
+			addr->sa_family != AF_NFC) {
+		pr_err("Invalid socket\n");
+		return -EINVAL;
+	}
+
+	if (addr->service_name_len == 0 && addr->dsap == 0) {
+		pr_err("Missing service name or dsap\n");
+		return -EINVAL;
+	}
+
+	pr_debug("addr dev_idx=%u target_idx=%u protocol=%u\n", addr->dev_idx,
+					addr->target_idx, addr->nfc_protocol);
+
+	lock_sock(sk);
+
+	if (sk->sk_state == LLCP_CONNECTED) {
+		ret = -EISCONN;
+		goto error;
+	}
+
+	dev = nfc_get_device(addr->dev_idx);
+	if (dev == NULL) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL) {
+		ret = -ENODEV;
+		goto put_dev;
+	}
+
+	device_lock(&dev->dev);
+	if (dev->dep_link_up == false) {
+		ret = -ENOLINK;
+		device_unlock(&dev->dev);
+		goto put_dev;
+	}
+	device_unlock(&dev->dev);
+
+	if (local->rf_mode == NFC_RF_INITIATOR &&
+			addr->target_idx != local->target_idx) {
+		ret = -ENOLINK;
+		goto put_dev;
+	}
+
+	llcp_sock->dev = dev;
+	llcp_sock->local = local;
+	llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
+	if (llcp_sock->ssap == LLCP_SAP_MAX) {
+		ret = -ENOMEM;
+		goto put_dev;
+	}
+	if (addr->service_name_len == 0)
+		llcp_sock->dsap = addr->dsap;
+	else
+		llcp_sock->dsap = LLCP_SAP_SDP;
+	llcp_sock->nfc_protocol = addr->nfc_protocol;
+	llcp_sock->service_name_len = min_t(unsigned int,
+			addr->service_name_len, NFC_LLCP_MAX_SERVICE_NAME);
+	llcp_sock->service_name = kmemdup(addr->service_name,
+				 llcp_sock->service_name_len, GFP_KERNEL);
+
+	local->sockets[llcp_sock->ssap] = llcp_sock;
+
+	ret = nfc_llcp_send_connect(llcp_sock);
+	if (ret)
+		goto put_dev;
+
+	sk->sk_state = LLCP_CONNECTED;
+
+	release_sock(sk);
+	return 0;
+
+put_dev:
+	nfc_put_device(dev);
+
+error:
+	release_sock(sk);
+	return ret;
+}
+
+static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+			     struct msghdr *msg, size_t len, int flags)
+{
+	int noblock = flags & MSG_DONTWAIT;
+	struct sock *sk = sock->sk;
+	unsigned int copied, rlen;
+	struct sk_buff *skb, *cskb;
+	int err = 0;
+
+	pr_debug("%p %zu\n", sk, len);
+
+	lock_sock(sk);
+
+	if (sk->sk_state == LLCP_CLOSED &&
+			skb_queue_empty(&sk->sk_receive_queue)) {
+		release_sock(sk);
+		return 0;
+	}
+
+	release_sock(sk);
+
+	if (flags & (MSG_OOB))
+		return -EOPNOTSUPP;
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb) {
+		pr_err("Recv datagram failed state %d %d %d",
+				sk->sk_state, err, sock_error(sk));
+
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			return 0;
+
+		return err;
+	}
+
+	rlen   = skb->len;		/* real length of skb */
+	copied = min_t(unsigned int, rlen, len);
+
+	cskb = skb;
+	if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
+		if (!(flags & MSG_PEEK))
+			skb_queue_head(&sk->sk_receive_queue, skb);
+		return -EFAULT;
+	}
+
+	/* Mark read part of skb as used */
+	if (!(flags & MSG_PEEK)) {
+
+		/* SOCK_STREAM: re-queue skb if it contains unreceived data */
+		if (sk->sk_type == SOCK_STREAM) {
+			skb_pull(skb, copied);
+			if (skb->len) {
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				goto done;
+			}
+		}
+
+		kfree_skb(skb);
+	}
+
+	/* XXX Queue backlogged skbs */
+
+done:
+	/* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */
+	if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC))
+		copied = rlen;
+
+	return copied;
+}
+
+static const struct proto_ops llcp_sock_ops = {
+	.family         = PF_NFC,
+	.owner          = THIS_MODULE,
+	.bind           = llcp_sock_bind,
+	.connect        = llcp_sock_connect,
+	.release        = llcp_sock_release,
+	.socketpair     = sock_no_socketpair,
+	.accept         = llcp_sock_accept,
+	.getname        = llcp_sock_getname,
+	.poll           = llcp_sock_poll,
+	.ioctl          = sock_no_ioctl,
+	.listen         = llcp_sock_listen,
+	.shutdown       = sock_no_shutdown,
+	.setsockopt     = sock_no_setsockopt,
+	.getsockopt     = sock_no_getsockopt,
+	.sendmsg        = sock_no_sendmsg,
+	.recvmsg        = llcp_sock_recvmsg,
+	.mmap           = sock_no_mmap,
+};
+
+static void llcp_sock_destruct(struct sock *sk)
+{
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+
+	pr_debug("%p\n", sk);
+
+	if (sk->sk_state == LLCP_CONNECTED)
+		nfc_put_device(llcp_sock->dev);
+
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	nfc_llcp_sock_free(llcp_sock);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		pr_err("Freeing alive NFC LLCP socket %p\n", sk);
+		return;
+	}
+}
+
+struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp)
+{
+	struct sock *sk;
+	struct nfc_llcp_sock *llcp_sock;
+
+	sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto);
+	if (!sk)
+		return NULL;
+
+	llcp_sock = nfc_llcp_sock(sk);
+
+	sock_init_data(sock, sk);
+	sk->sk_state = LLCP_CLOSED;
+	sk->sk_protocol = NFC_SOCKPROTO_LLCP;
+	sk->sk_type = type;
+	sk->sk_destruct = llcp_sock_destruct;
+
+	llcp_sock->ssap = 0;
+	llcp_sock->dsap = LLCP_SAP_SDP;
+	llcp_sock->send_n = llcp_sock->send_ack_n = 0;
+	llcp_sock->recv_n = llcp_sock->recv_ack_n = 0;
+	llcp_sock->remote_ready = 1;
+	skb_queue_head_init(&llcp_sock->tx_queue);
+	skb_queue_head_init(&llcp_sock->tx_pending_queue);
+	skb_queue_head_init(&llcp_sock->tx_backlog_queue);
+	INIT_LIST_HEAD(&llcp_sock->list);
+	INIT_LIST_HEAD(&llcp_sock->accept_queue);
+
+	if (sock != NULL)
+		sock->state = SS_UNCONNECTED;
+
+	return sk;
+}
+
+void nfc_llcp_sock_free(struct nfc_llcp_sock *sock)
+{
+	kfree(sock->service_name);
+
+	skb_queue_purge(&sock->tx_queue);
+	skb_queue_purge(&sock->tx_pending_queue);
+	skb_queue_purge(&sock->tx_backlog_queue);
+
+	list_del_init(&sock->accept_queue);
+
+	sock->parent = NULL;
+}
+
+static int llcp_sock_create(struct net *net, struct socket *sock,
+				const struct nfc_protocol *nfc_proto)
+{
+	struct sock *sk;
+
+	pr_debug("%p\n", sock);
+
+	if (sock->type != SOCK_STREAM && sock->type != SOCK_DGRAM)
+		return -ESOCKTNOSUPPORT;
+
+	sock->ops = &llcp_sock_ops;
+
+	sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC);
+	if (sk == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static const struct nfc_protocol llcp_nfc_proto = {
+	.id	  = NFC_SOCKPROTO_LLCP,
+	.proto    = &llcp_sock_proto,
+	.owner    = THIS_MODULE,
+	.create   = llcp_sock_create
+};
+
+int __init nfc_llcp_sock_init(void)
+{
+	return nfc_proto_register(&llcp_nfc_proto);
+}
+
+void nfc_llcp_sock_exit(void)
+{
+	nfc_proto_unregister(&llcp_nfc_proto);
+}
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 4d0fb125d033..2c2c4015c68b 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -46,6 +46,60 @@ struct nfc_rawsock {
 #define to_rawsock_sk(_tx_work) \
 	((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
 
+#ifdef CONFIG_NFC_LLCP
+
+void nfc_llcp_mac_is_down(struct nfc_dev *dev);
+void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
+			u8 comm_mode, u8 rf_mode);
+int nfc_llcp_register_device(struct nfc_dev *dev);
+void nfc_llcp_unregister_device(struct nfc_dev *dev);
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len);
+u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, u8 *general_bytes_len);
+int __init nfc_llcp_init(void);
+void nfc_llcp_exit(void);
+
+#else
+
+void nfc_llcp_mac_is_down(struct nfc_dev *dev)
+{
+}
+
+void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
+			u8 comm_mode, u8 rf_mode)
+{
+}
+
+static inline int nfc_llcp_register_device(struct nfc_dev *dev)
+{
+	return 0;
+}
+
+static inline void nfc_llcp_unregister_device(struct nfc_dev *dev)
+{
+}
+
+static inline int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+{
+	return 0;
+}
+
+static inline u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, u8 *gb_len)
+{
+	*gb_len = 0;
+	return NULL;
+}
+
+static inline int nfc_llcp_init(void)
+{
+	return 0;
+}
+
+static inline void nfc_llcp_exit(void)
+{
+}
+
+#endif
+
 int __init rawsock_init(void);
 void rawsock_exit(void);
 
-- 
cgit v1.2.3


From ca22e56debc57b47c422b749c93217ba62644be2 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 14 Dec 2011 14:29:38 -0800
Subject: driver-core: implement 'sysdev' functionality for regular devices and
 buses

All sysdev classes and sysdev devices will converted to regular devices
and buses to properly hook userspace into the event processing.

There is no interesting difference between a 'sysdev' and 'device' which
would justify to roll an entire own subsystem with different userspace
export semantics. Userspace relies on events and generic sysfs subsystem
infrastructure from sysdev devices, which are currently not properly
available.

Every converted sysdev class will create a regular device with the class
name in /sys/devices/system and all registered devices will becom a children
of theses devices.

For compatibility reasons, the sysdev class-wide attributes are created
at this parent device. (Do not copy that logic for anything new, subsystem-
wide properties belong to the subsystem, not to some fake parent device
created in /sys/devices.)

Every sysdev driver is implemented as a simple subsystem interface now,
and no longer called a driver.

After all sysdev classes are ported to regular driver core entities, the
sysdev implementation will be entirely removed from the kernel.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/base.h    |  12 +-
 drivers/base/bus.c     | 293 +++++++++++++++++++++++++++++++++++++++++++++----
 drivers/base/class.c   |  14 +--
 drivers/base/core.c    |  85 +++++++++++---
 drivers/base/init.c    |   1 -
 drivers/base/sys.c     |  10 +-
 include/linux/device.h |  78 ++++++++++++-
 7 files changed, 431 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 21c1b96c34c6..7a6ae4228761 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -4,7 +4,9 @@
  * struct subsys_private - structure to hold the private to the driver core portions of the bus_type/class structure.
  *
  * @subsys - the struct kset that defines this subsystem
- * @devices_kset - the list of devices associated
+ * @devices_kset - the subsystem's 'devices' directory
+ * @interfaces - list of subsystem interfaces associated
+ * @mutex - protect the devices, and interfaces lists.
  *
  * @drivers_kset - the list of drivers associated
  * @klist_devices - the klist to iterate over the @devices_kset
@@ -14,10 +16,8 @@
  * @bus - pointer back to the struct bus_type that this structure is associated
  *        with.
  *
- * @class_interfaces - list of class_interfaces associated
  * @glue_dirs - "glue" directory to put in-between the parent device to
  *              avoid namespace conflicts
- * @class_mutex - mutex to protect the children, devices, and interfaces lists.
  * @class - pointer back to the struct class that this structure is associated
  *          with.
  *
@@ -28,6 +28,8 @@
 struct subsys_private {
 	struct kset subsys;
 	struct kset *devices_kset;
+	struct list_head interfaces;
+	struct mutex mutex;
 
 	struct kset *drivers_kset;
 	struct klist klist_devices;
@@ -36,9 +38,7 @@ struct subsys_private {
 	unsigned int drivers_autoprobe:1;
 	struct bus_type *bus;
 
-	struct list_head class_interfaces;
 	struct kset glue_dirs;
-	struct mutex class_mutex;
 	struct class *class;
 };
 #define to_subsys_private(obj) container_of(obj, struct subsys_private, subsys.kobj)
@@ -94,7 +94,6 @@ extern int hypervisor_init(void);
 static inline int hypervisor_init(void) { return 0; }
 #endif
 extern int platform_bus_init(void);
-extern int system_bus_init(void);
 extern int cpu_dev_init(void);
 
 extern int bus_add_device(struct device *dev);
@@ -116,6 +115,7 @@ extern char *make_class_name(const char *name, struct kobject *kobj);
 
 extern int devres_release_all(struct device *dev);
 
+/* /sys/devices directory */
 extern struct kset *devices_kset;
 
 #if defined(CONFIG_MODULES) && defined(CONFIG_SYSFS)
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 000e7b2006f8..99dc5921e1dd 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -16,9 +16,14 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include <linux/mutex.h>
 #include "base.h"
 #include "power/power.h"
 
+/* /sys/devices/system */
+/* FIXME: make static after drivers/base/sys.c is deleted */
+struct kset *system_kset;
+
 #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr)
 
 /*
@@ -360,6 +365,47 @@ struct device *bus_find_device_by_name(struct bus_type *bus,
 }
 EXPORT_SYMBOL_GPL(bus_find_device_by_name);
 
+/**
+ * subsys_find_device_by_id - find a device with a specific enumeration number
+ * @subsys: subsystem
+ * @id: index 'id' in struct device
+ * @hint: device to check first
+ *
+ * Check the hint's next object and if it is a match return it directly,
+ * otherwise, fall back to a full list search. Either way a reference for
+ * the returned object is taken.
+ */
+struct device *subsys_find_device_by_id(struct bus_type *subsys, unsigned int id,
+					struct device *hint)
+{
+	struct klist_iter i;
+	struct device *dev;
+
+	if (!subsys)
+		return NULL;
+
+	if (hint) {
+		klist_iter_init_node(&subsys->p->klist_devices, &i, &hint->p->knode_bus);
+		dev = next_device(&i);
+		if (dev && dev->id == id && get_device(dev)) {
+			klist_iter_exit(&i);
+			return dev;
+		}
+		klist_iter_exit(&i);
+	}
+
+	klist_iter_init_node(&subsys->p->klist_devices, &i, NULL);
+	while ((dev = next_device(&i))) {
+		if (dev->id == id && get_device(dev)) {
+			klist_iter_exit(&i);
+			return dev;
+		}
+	}
+	klist_iter_exit(&i);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(subsys_find_device_by_id);
+
 static struct device_driver *next_driver(struct klist_iter *i)
 {
 	struct klist_node *n = klist_next(i);
@@ -487,38 +533,59 @@ out_put:
 void bus_probe_device(struct device *dev)
 {
 	struct bus_type *bus = dev->bus;
+	struct subsys_interface *sif;
 	int ret;
 
-	if (bus && bus->p->drivers_autoprobe) {
+	if (!bus)
+		return;
+
+	if (bus->p->drivers_autoprobe) {
 		ret = device_attach(dev);
 		WARN_ON(ret < 0);
 	}
+
+	mutex_lock(&bus->p->mutex);
+	list_for_each_entry(sif, &bus->p->interfaces, node)
+		if (sif->add_dev)
+			sif->add_dev(dev, sif);
+	mutex_unlock(&bus->p->mutex);
 }
 
 /**
  * bus_remove_device - remove device from bus
  * @dev: device to be removed
  *
- * - Remove symlink from bus's directory.
+ * - Remove device from all interfaces.
+ * - Remove symlink from bus' directory.
  * - Delete device from bus's list.
  * - Detach from its driver.
  * - Drop reference taken in bus_add_device().
  */
 void bus_remove_device(struct device *dev)
 {
-	if (dev->bus) {
-		sysfs_remove_link(&dev->kobj, "subsystem");
-		sysfs_remove_link(&dev->bus->p->devices_kset->kobj,
-				  dev_name(dev));
-		device_remove_attrs(dev->bus, dev);
-		if (klist_node_attached(&dev->p->knode_bus))
-			klist_del(&dev->p->knode_bus);
-
-		pr_debug("bus: '%s': remove device %s\n",
-			 dev->bus->name, dev_name(dev));
-		device_release_driver(dev);
-		bus_put(dev->bus);
-	}
+	struct bus_type *bus = dev->bus;
+	struct subsys_interface *sif;
+
+	if (!bus)
+		return;
+
+	mutex_lock(&bus->p->mutex);
+	list_for_each_entry(sif, &bus->p->interfaces, node)
+		if (sif->remove_dev)
+			sif->remove_dev(dev, sif);
+	mutex_unlock(&bus->p->mutex);
+
+	sysfs_remove_link(&dev->kobj, "subsystem");
+	sysfs_remove_link(&dev->bus->p->devices_kset->kobj,
+			  dev_name(dev));
+	device_remove_attrs(dev->bus, dev);
+	if (klist_node_attached(&dev->p->knode_bus))
+		klist_del(&dev->p->knode_bus);
+
+	pr_debug("bus: '%s': remove device %s\n",
+		 dev->bus->name, dev_name(dev));
+	device_release_driver(dev);
+	bus_put(dev->bus);
 }
 
 static int driver_add_attrs(struct bus_type *bus, struct device_driver *drv)
@@ -847,14 +914,14 @@ static ssize_t bus_uevent_store(struct bus_type *bus,
 static BUS_ATTR(uevent, S_IWUSR, NULL, bus_uevent_store);
 
 /**
- * bus_register - register a bus with the system.
+ * __bus_register - register a driver-core subsystem
  * @bus: bus.
  *
  * Once we have that, we registered the bus with the kobject
  * infrastructure, then register the children subsystems it has:
- * the devices and drivers that belong to the bus.
+ * the devices and drivers that belong to the subsystem.
  */
-int bus_register(struct bus_type *bus)
+int __bus_register(struct bus_type *bus, struct lock_class_key *key)
 {
 	int retval;
 	struct subsys_private *priv;
@@ -898,6 +965,8 @@ int bus_register(struct bus_type *bus)
 		goto bus_drivers_fail;
 	}
 
+	INIT_LIST_HEAD(&priv->interfaces);
+	__mutex_init(&priv->mutex, "subsys mutex", key);
 	klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put);
 	klist_init(&priv->klist_drivers, NULL, NULL);
 
@@ -927,7 +996,7 @@ out:
 	bus->p = NULL;
 	return retval;
 }
-EXPORT_SYMBOL_GPL(bus_register);
+EXPORT_SYMBOL_GPL(__bus_register);
 
 /**
  * bus_unregister - remove a bus from the system
@@ -939,6 +1008,8 @@ EXPORT_SYMBOL_GPL(bus_register);
 void bus_unregister(struct bus_type *bus)
 {
 	pr_debug("bus: '%s': unregistering\n", bus->name);
+	if (bus->dev_root)
+		device_unregister(bus->dev_root);
 	bus_remove_attrs(bus);
 	remove_probe_files(bus);
 	kset_unregister(bus->p->drivers_kset);
@@ -1028,10 +1099,194 @@ void bus_sort_breadthfirst(struct bus_type *bus,
 }
 EXPORT_SYMBOL_GPL(bus_sort_breadthfirst);
 
+/**
+ * subsys_dev_iter_init - initialize subsys device iterator
+ * @iter: subsys iterator to initialize
+ * @subsys: the subsys we wanna iterate over
+ * @start: the device to start iterating from, if any
+ * @type: device_type of the devices to iterate over, NULL for all
+ *
+ * Initialize subsys iterator @iter such that it iterates over devices
+ * of @subsys.  If @start is set, the list iteration will start there,
+ * otherwise if it is NULL, the iteration starts at the beginning of
+ * the list.
+ */
+void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct bus_type *subsys,
+			  struct device *start, const struct device_type *type)
+{
+	struct klist_node *start_knode = NULL;
+
+	if (start)
+		start_knode = &start->p->knode_bus;
+	klist_iter_init_node(&subsys->p->klist_devices, &iter->ki, start_knode);
+	iter->type = type;
+}
+EXPORT_SYMBOL_GPL(subsys_dev_iter_init);
+
+/**
+ * subsys_dev_iter_next - iterate to the next device
+ * @iter: subsys iterator to proceed
+ *
+ * Proceed @iter to the next device and return it.  Returns NULL if
+ * iteration is complete.
+ *
+ * The returned device is referenced and won't be released till
+ * iterator is proceed to the next device or exited.  The caller is
+ * free to do whatever it wants to do with the device including
+ * calling back into subsys code.
+ */
+struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter)
+{
+	struct klist_node *knode;
+	struct device *dev;
+
+	for (;;) {
+		knode = klist_next(&iter->ki);
+		if (!knode)
+			return NULL;
+		dev = container_of(knode, struct device_private, knode_bus)->device;
+		if (!iter->type || iter->type == dev->type)
+			return dev;
+	}
+}
+EXPORT_SYMBOL_GPL(subsys_dev_iter_next);
+
+/**
+ * subsys_dev_iter_exit - finish iteration
+ * @iter: subsys iterator to finish
+ *
+ * Finish an iteration.  Always call this function after iteration is
+ * complete whether the iteration ran till the end or not.
+ */
+void subsys_dev_iter_exit(struct subsys_dev_iter *iter)
+{
+	klist_iter_exit(&iter->ki);
+}
+EXPORT_SYMBOL_GPL(subsys_dev_iter_exit);
+
+int subsys_interface_register(struct subsys_interface *sif)
+{
+	struct bus_type *subsys;
+	struct subsys_dev_iter iter;
+	struct device *dev;
+
+	if (!sif || !sif->subsys)
+		return -ENODEV;
+
+	subsys = bus_get(sif->subsys);
+	if (!subsys)
+		return -EINVAL;
+
+	mutex_lock(&subsys->p->mutex);
+	list_add_tail(&sif->node, &subsys->p->interfaces);
+	if (sif->add_dev) {
+		subsys_dev_iter_init(&iter, subsys, NULL, NULL);
+		while ((dev = subsys_dev_iter_next(&iter)))
+			sif->add_dev(dev, sif);
+		subsys_dev_iter_exit(&iter);
+	}
+	mutex_unlock(&subsys->p->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(subsys_interface_register);
+
+void subsys_interface_unregister(struct subsys_interface *sif)
+{
+	struct bus_type *subsys = sif->subsys;
+	struct subsys_dev_iter iter;
+	struct device *dev;
+
+	if (!sif)
+		return;
+
+	mutex_lock(&subsys->p->mutex);
+	list_del_init(&sif->node);
+	if (sif->remove_dev) {
+		subsys_dev_iter_init(&iter, subsys, NULL, NULL);
+		while ((dev = subsys_dev_iter_next(&iter)))
+			sif->remove_dev(dev, sif);
+		subsys_dev_iter_exit(&iter);
+	}
+	mutex_unlock(&subsys->p->mutex);
+
+	bus_put(subsys);
+}
+EXPORT_SYMBOL_GPL(subsys_interface_unregister);
+
+static void system_root_device_release(struct device *dev)
+{
+	kfree(dev);
+}
+/**
+ * subsys_system_register - register a subsystem at /sys/devices/system/
+ * @subsys - system subsystem
+ * @groups - default attributes for the root device
+ *
+ * All 'system' subsystems have a /sys/devices/system/<name> root device
+ * with the name of the subsystem. The root device can carry subsystem-
+ * wide attributes. All registered devices are below this single root
+ * device and are named after the subsystem with a simple enumeration
+ * number appended. The registered devices are not explicitely named;
+ * only 'id' in the device needs to be set.
+ *
+ * Do not use this interface for anything new, it exists for compatibility
+ * with bad ideas only. New subsystems should use plain subsystems; and
+ * add the subsystem-wide attributes should be added to the subsystem
+ * directory itself and not some create fake root-device placed in
+ * /sys/devices/system/<name>.
+ */
+int subsys_system_register(struct bus_type *subsys,
+			   const struct attribute_group **groups)
+{
+	struct device *dev;
+	int err;
+
+	err = bus_register(subsys);
+	if (err < 0)
+		return err;
+
+	dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!dev) {
+		err = -ENOMEM;
+		goto err_dev;
+	}
+
+	err = dev_set_name(dev, "%s", subsys->name);
+	if (err < 0)
+		goto err_name;
+
+	dev->kobj.parent = &system_kset->kobj;
+	dev->groups = groups;
+	dev->release = system_root_device_release;
+
+	err = device_register(dev);
+	if (err < 0)
+		goto err_dev_reg;
+
+	subsys->dev_root = dev;
+	return 0;
+
+err_dev_reg:
+	put_device(dev);
+	dev = NULL;
+err_name:
+	kfree(dev);
+err_dev:
+	bus_unregister(subsys);
+	return err;
+}
+EXPORT_SYMBOL_GPL(subsys_system_register);
+
 int __init buses_init(void)
 {
 	bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL);
 	if (!bus_kset)
 		return -ENOMEM;
+
+	system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj);
+	if (!system_kset)
+		return -ENOMEM;
+
 	return 0;
 }
diff --git a/drivers/base/class.c b/drivers/base/class.c
index b80d91cc8c3a..03243d4002fd 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -184,9 +184,9 @@ int __class_register(struct class *cls, struct lock_class_key *key)
 	if (!cp)
 		return -ENOMEM;
 	klist_init(&cp->klist_devices, klist_class_dev_get, klist_class_dev_put);
-	INIT_LIST_HEAD(&cp->class_interfaces);
+	INIT_LIST_HEAD(&cp->interfaces);
 	kset_init(&cp->glue_dirs);
-	__mutex_init(&cp->class_mutex, "struct class mutex", key);
+	__mutex_init(&cp->mutex, "subsys mutex", key);
 	error = kobject_set_name(&cp->subsys.kobj, "%s", cls->name);
 	if (error) {
 		kfree(cp);
@@ -460,15 +460,15 @@ int class_interface_register(struct class_interface *class_intf)
 	if (!parent)
 		return -EINVAL;
 
-	mutex_lock(&parent->p->class_mutex);
-	list_add_tail(&class_intf->node, &parent->p->class_interfaces);
+	mutex_lock(&parent->p->mutex);
+	list_add_tail(&class_intf->node, &parent->p->interfaces);
 	if (class_intf->add_dev) {
 		class_dev_iter_init(&iter, parent, NULL, NULL);
 		while ((dev = class_dev_iter_next(&iter)))
 			class_intf->add_dev(dev, class_intf);
 		class_dev_iter_exit(&iter);
 	}
-	mutex_unlock(&parent->p->class_mutex);
+	mutex_unlock(&parent->p->mutex);
 
 	return 0;
 }
@@ -482,7 +482,7 @@ void class_interface_unregister(struct class_interface *class_intf)
 	if (!parent)
 		return;
 
-	mutex_lock(&parent->p->class_mutex);
+	mutex_lock(&parent->p->mutex);
 	list_del_init(&class_intf->node);
 	if (class_intf->remove_dev) {
 		class_dev_iter_init(&iter, parent, NULL, NULL);
@@ -490,7 +490,7 @@ void class_interface_unregister(struct class_interface *class_intf)
 			class_intf->remove_dev(dev, class_intf);
 		class_dev_iter_exit(&iter);
 	}
-	mutex_unlock(&parent->p->class_mutex);
+	mutex_unlock(&parent->p->mutex);
 
 	class_put(parent);
 }
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 82c865452c70..a31ea193fba0 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -117,6 +117,56 @@ static const struct sysfs_ops dev_sysfs_ops = {
 	.store	= dev_attr_store,
 };
 
+#define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr)
+
+ssize_t device_store_ulong(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+	char *end;
+	unsigned long new = simple_strtoul(buf, &end, 0);
+	if (end == buf)
+		return -EINVAL;
+	*(unsigned long *)(ea->var) = new;
+	/* Always return full write size even if we didn't consume all */
+	return size;
+}
+EXPORT_SYMBOL_GPL(device_store_ulong);
+
+ssize_t device_show_ulong(struct device *dev,
+			  struct device_attribute *attr,
+			  char *buf)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+	return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var));
+}
+EXPORT_SYMBOL_GPL(device_show_ulong);
+
+ssize_t device_store_int(struct device *dev,
+			 struct device_attribute *attr,
+			 const char *buf, size_t size)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+	char *end;
+	long new = simple_strtol(buf, &end, 0);
+	if (end == buf || new > INT_MAX || new < INT_MIN)
+		return -EINVAL;
+	*(int *)(ea->var) = new;
+	/* Always return full write size even if we didn't consume all */
+	return size;
+}
+EXPORT_SYMBOL_GPL(device_store_int);
+
+ssize_t device_show_int(struct device *dev,
+			struct device_attribute *attr,
+			char *buf)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var));
+}
+EXPORT_SYMBOL_GPL(device_show_int);
 
 /**
  *	device_release - free device structure.
@@ -463,7 +513,7 @@ static ssize_t show_dev(struct device *dev, struct device_attribute *attr,
 static struct device_attribute devt_attr =
 	__ATTR(dev, S_IRUGO, show_dev, NULL);
 
-/* kset to create /sys/devices/  */
+/* /sys/devices/ */
 struct kset *devices_kset;
 
 /**
@@ -710,6 +760,10 @@ static struct kobject *get_device_parent(struct device *dev,
 		return k;
 	}
 
+	/* subsystems can specify a default root directory for their devices */
+	if (!parent && dev->bus && dev->bus->dev_root)
+		return &dev->bus->dev_root->kobj;
+
 	if (parent)
 		return &parent->kobj;
 	return NULL;
@@ -730,14 +784,6 @@ static void cleanup_device_parent(struct device *dev)
 	cleanup_glue_dir(dev, dev->kobj.parent);
 }
 
-static void setup_parent(struct device *dev, struct device *parent)
-{
-	struct kobject *kobj;
-	kobj = get_device_parent(dev, parent);
-	if (kobj)
-		dev->kobj.parent = kobj;
-}
-
 static int device_add_class_symlinks(struct device *dev)
 {
 	int error;
@@ -890,6 +936,7 @@ int device_private_init(struct device *dev)
 int device_add(struct device *dev)
 {
 	struct device *parent = NULL;
+	struct kobject *kobj;
 	struct class_interface *class_intf;
 	int error = -EINVAL;
 
@@ -913,6 +960,10 @@ int device_add(struct device *dev)
 		dev->init_name = NULL;
 	}
 
+	/* subsystems can specify simple device enumeration */
+	if (!dev_name(dev) && dev->bus && dev->bus->dev_name)
+		dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id);
+
 	if (!dev_name(dev)) {
 		error = -EINVAL;
 		goto name_error;
@@ -921,7 +972,9 @@ int device_add(struct device *dev)
 	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
 
 	parent = get_device(dev->parent);
-	setup_parent(dev, parent);
+	kobj = get_device_parent(dev, parent);
+	if (kobj)
+		dev->kobj.parent = kobj;
 
 	/* use parent numa_node */
 	if (parent)
@@ -981,17 +1034,17 @@ int device_add(struct device *dev)
 			       &parent->p->klist_children);
 
 	if (dev->class) {
-		mutex_lock(&dev->class->p->class_mutex);
+		mutex_lock(&dev->class->p->mutex);
 		/* tie the class to the device */
 		klist_add_tail(&dev->knode_class,
 			       &dev->class->p->klist_devices);
 
 		/* notify any interfaces that the device is here */
 		list_for_each_entry(class_intf,
-				    &dev->class->p->class_interfaces, node)
+				    &dev->class->p->interfaces, node)
 			if (class_intf->add_dev)
 				class_intf->add_dev(dev, class_intf);
-		mutex_unlock(&dev->class->p->class_mutex);
+		mutex_unlock(&dev->class->p->mutex);
 	}
 done:
 	put_device(dev);
@@ -1106,15 +1159,15 @@ void device_del(struct device *dev)
 	if (dev->class) {
 		device_remove_class_symlinks(dev);
 
-		mutex_lock(&dev->class->p->class_mutex);
+		mutex_lock(&dev->class->p->mutex);
 		/* notify any interfaces that the device is now gone */
 		list_for_each_entry(class_intf,
-				    &dev->class->p->class_interfaces, node)
+				    &dev->class->p->interfaces, node)
 			if (class_intf->remove_dev)
 				class_intf->remove_dev(dev, class_intf);
 		/* remove the device from the class list */
 		klist_del(&dev->knode_class);
-		mutex_unlock(&dev->class->p->class_mutex);
+		mutex_unlock(&dev->class->p->mutex);
 	}
 	device_remove_file(dev, &uevent_attr);
 	device_remove_attrs(dev);
diff --git a/drivers/base/init.c b/drivers/base/init.c
index c8a934e79421..c16f0b808a17 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -31,7 +31,6 @@ void __init driver_init(void)
 	 * core core pieces.
 	 */
 	platform_bus_init();
-	system_bus_init();
 	cpu_dev_init();
 	memory_dev_init();
 }
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index 9dff77bfe1e3..409f5ce78829 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -126,7 +126,7 @@ void sysdev_class_remove_file(struct sysdev_class *c,
 }
 EXPORT_SYMBOL_GPL(sysdev_class_remove_file);
 
-static struct kset *system_kset;
+extern struct kset *system_kset;
 
 int sysdev_class_register(struct sysdev_class *cls)
 {
@@ -331,14 +331,6 @@ void sysdev_unregister(struct sys_device *sysdev)
 EXPORT_SYMBOL_GPL(sysdev_register);
 EXPORT_SYMBOL_GPL(sysdev_unregister);
 
-int __init system_bus_init(void)
-{
-	system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj);
-	if (!system_kset)
-		return -ENOMEM;
-	return 0;
-}
-
 #define to_ext_attr(x) container_of(x, struct sysdev_ext_attribute, attr)
 
 ssize_t sysdev_store_ulong(struct sys_device *sysdev,
diff --git a/include/linux/device.h b/include/linux/device.h
index 341fb740d851..7f9fc1505e94 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -53,6 +53,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * struct bus_type - The bus type of the device
  *
  * @name:	The name of the bus.
+ * @dev_name:	Used for subsystems to enumerate devices like ("foo%u", dev->id).
+ * @dev_root:	Default device to use as the parent.
  * @bus_attrs:	Default attributes of the bus.
  * @dev_attrs:	Default attributes of the devices on the bus.
  * @drv_attrs:	Default attributes of the device drivers on the bus.
@@ -86,6 +88,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  */
 struct bus_type {
 	const char		*name;
+	const char		*dev_name;
+	struct device		*dev_root;
 	struct bus_attribute	*bus_attrs;
 	struct device_attribute	*dev_attrs;
 	struct driver_attribute	*drv_attrs;
@@ -106,12 +110,30 @@ struct bus_type {
 	struct subsys_private *p;
 };
 
-extern int __must_check bus_register(struct bus_type *bus);
+/* This is a #define to keep the compiler from merging different
+ * instances of the __key variable */
+#define bus_register(subsys)			\
+({						\
+	static struct lock_class_key __key;	\
+	__bus_register(subsys, &__key);	\
+})
+extern int __must_check __bus_register(struct bus_type *bus,
+				       struct lock_class_key *key);
 extern void bus_unregister(struct bus_type *bus);
 
 extern int __must_check bus_rescan_devices(struct bus_type *bus);
 
 /* iterator helpers for buses */
+struct subsys_dev_iter {
+	struct klist_iter		ki;
+	const struct device_type	*type;
+};
+void subsys_dev_iter_init(struct subsys_dev_iter *iter,
+			 struct bus_type *subsys,
+			 struct device *start,
+			 const struct device_type *type);
+struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter);
+void subsys_dev_iter_exit(struct subsys_dev_iter *iter);
 
 int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data,
 		     int (*fn)(struct device *dev, void *data));
@@ -121,10 +143,10 @@ struct device *bus_find_device(struct bus_type *bus, struct device *start,
 struct device *bus_find_device_by_name(struct bus_type *bus,
 				       struct device *start,
 				       const char *name);
-
+struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id,
+					struct device *hint);
 int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
 		     void *data, int (*fn)(struct device_driver *, void *));
-
 void bus_sort_breadthfirst(struct bus_type *bus,
 			   int (*compare)(const struct device *a,
 					  const struct device *b));
@@ -255,6 +277,33 @@ struct device *driver_find_device(struct device_driver *drv,
 				  struct device *start, void *data,
 				  int (*match)(struct device *dev, void *data));
 
+/**
+ * struct subsys_interface - interfaces to device functions
+ * @name        name of the device function
+ * @subsystem   subsytem of the devices to attach to
+ * @node        the list of functions registered at the subsystem
+ * @add         device hookup to device function handler
+ * @remove      device hookup to device function handler
+ *
+ * Simple interfaces attached to a subsystem. Multiple interfaces can
+ * attach to a subsystem and its devices. Unlike drivers, they do not
+ * exclusively claim or control devices. Interfaces usually represent
+ * a specific functionality of a subsystem/class of devices.
+ */
+struct subsys_interface {
+	const char *name;
+	struct bus_type *subsys;
+	struct list_head node;
+	int (*add_dev)(struct device *dev, struct subsys_interface *sif);
+	int (*remove_dev)(struct device *dev, struct subsys_interface *sif);
+};
+
+int subsys_interface_register(struct subsys_interface *sif);
+void subsys_interface_unregister(struct subsys_interface *sif);
+
+int subsys_system_register(struct bus_type *subsys,
+			   const struct attribute_group **groups);
+
 /**
  * struct class - device classes
  * @name:	Name of the class.
@@ -438,8 +487,28 @@ struct device_attribute {
 			 const char *buf, size_t count);
 };
 
+struct dev_ext_attribute {
+	struct device_attribute attr;
+	void *var;
+};
+
+ssize_t device_show_ulong(struct device *dev, struct device_attribute *attr,
+			  char *buf);
+ssize_t device_store_ulong(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count);
+ssize_t device_show_int(struct device *dev, struct device_attribute *attr,
+			char *buf);
+ssize_t device_store_int(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count);
+
 #define DEVICE_ATTR(_name, _mode, _show, _store) \
-struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
+	struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
+#define DEVICE_ULONG_ATTR(_name, _mode, _var) \
+	struct dev_ext_attribute dev_attr_##_name = \
+		{ __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) }
+#define DEVICE_INT_ATTR(_name, _mode, _var) \
+	struct dev_ext_attribute dev_attr_##_name = \
+		{ __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) }
 
 extern int __must_check device_create_file(struct device *device,
 					const struct device_attribute *entry);
@@ -603,6 +672,7 @@ struct device {
 	struct device_node	*of_node; /* associated device tree node */
 
 	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
+	u32			id;	/* device instance */
 
 	spinlock_t		devres_lock;
 	struct list_head	devres_head;
-- 
cgit v1.2.3


From fe5ff8b84c8b03348a2f64ea9d884348faec2217 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 14 Dec 2011 15:21:07 -0800
Subject: edac: convert sysdev_class to a regular subsystem

After all sysdev classes are ported to regular driver core entities, the
sysdev implementation will be entirely removed from the kernel.

Cc: Doug Thompson <dougthompson@xmission.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/edac/edac_core.h         |  7 +++----
 drivers/edac/edac_device.c       |  1 -
 drivers/edac/edac_device_sysfs.c | 20 ++++++++++----------
 drivers/edac/edac_mc.c           |  1 -
 drivers/edac/edac_mc_sysfs.c     | 16 ++++++++--------
 drivers/edac/edac_module.h       |  2 --
 drivers/edac/edac_pci.c          |  1 -
 drivers/edac/edac_pci_sysfs.c    | 16 ++++++++--------
 drivers/edac/edac_stub.c         | 27 ++++++++++++++-------------
 drivers/edac/mce_amd_inj.c       | 13 ++++++-------
 include/linux/edac.h             |  8 ++++----
 11 files changed, 53 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index fe90cd4a7ebc..e48ab3108ad8 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -32,7 +32,6 @@
 #include <linux/completion.h>
 #include <linux/kobject.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/workqueue.h>
 #include <linux/edac.h>
 
@@ -243,8 +242,8 @@ struct edac_device_ctl_info {
 	 */
 	struct edac_dev_sysfs_attribute *sysfs_attributes;
 
-	/* pointer to main 'edac' class in sysfs */
-	struct sysdev_class *edac_class;
+	/* pointer to main 'edac' subsys in sysfs */
+	struct bus_type *edac_subsys;
 
 	/* the internal state of this controller instance */
 	int op_state;
@@ -342,7 +341,7 @@ struct edac_pci_ctl_info {
 
 	int pci_idx;
 
-	struct sysdev_class *edac_class;	/* pointer to class */
+	struct bus_type *edac_subsys;	/* pointer to subsystem */
 
 	/* the internal state of this controller instance */
 	int op_state;
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index c3f67437afb6..4b154593343a 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -23,7 +23,6 @@
 #include <linux/jiffies.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/sysdev.h>
 #include <linux/ctype.h>
 #include <linux/workqueue.h>
 #include <asm/uaccess.h>
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 86649df00285..b4ea185ccebf 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -1,5 +1,5 @@
 /*
- * file for managing the edac_device class of devices for EDAC
+ * file for managing the edac_device subsystem of devices for EDAC
  *
  * (C) 2007 SoftwareBitMaker 
  *
@@ -230,21 +230,21 @@ static struct kobj_type ktype_device_ctrl = {
  */
 int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
 {
-	struct sysdev_class *edac_class;
+	struct bus_type *edac_subsys;
 	int err;
 
 	debugf1("%s()\n", __func__);
 
 	/* get the /sys/devices/system/edac reference */
-	edac_class = edac_get_sysfs_class();
-	if (edac_class == NULL) {
-		debugf1("%s() no edac_class error\n", __func__);
+	edac_subsys = edac_get_sysfs_subsys();
+	if (edac_subsys == NULL) {
+		debugf1("%s() no edac_subsys error\n", __func__);
 		err = -ENODEV;
 		goto err_out;
 	}
 
-	/* Point to the 'edac_class' this instance 'reports' to */
-	edac_dev->edac_class = edac_class;
+	/* Point to the 'edac_subsys' this instance 'reports' to */
+	edac_dev->edac_subsys = edac_subsys;
 
 	/* Init the devices's kobject */
 	memset(&edac_dev->kobj, 0, sizeof(struct kobject));
@@ -261,7 +261,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
 
 	/* register */
 	err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl,
-				   &edac_class->kset.kobj,
+				   &edac_subsys->dev_root->kobj,
 				   "%s", edac_dev->name);
 	if (err) {
 		debugf1("%s()Failed to register '.../edac/%s'\n",
@@ -284,7 +284,7 @@ err_kobj_reg:
 	module_put(edac_dev->owner);
 
 err_mod_get:
-	edac_put_sysfs_class();
+	edac_put_sysfs_subsys();
 
 err_out:
 	return err;
@@ -308,7 +308,7 @@ void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev)
 	 *   b) 'kfree' the memory
 	 */
 	kobject_put(&dev->kobj);
-	edac_put_sysfs_class();
+	edac_put_sysfs_subsys();
 }
 
 /* edac_dev -> instance information */
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index d69144a09043..ca6c04d350ee 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -25,7 +25,6 @@
 #include <linux/jiffies.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/sysdev.h>
 #include <linux/ctype.h>
 #include <linux/edac.h>
 #include <asm/uaccess.h>
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 29ffa350bfbe..d56e63477d5c 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -1021,19 +1021,19 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
 int edac_sysfs_setup_mc_kset(void)
 {
 	int err = -EINVAL;
-	struct sysdev_class *edac_class;
+	struct bus_type *edac_subsys;
 
 	debugf1("%s()\n", __func__);
 
-	/* get the /sys/devices/system/edac class reference */
-	edac_class = edac_get_sysfs_class();
-	if (edac_class == NULL) {
-		debugf1("%s() no edac_class error=%d\n", __func__, err);
+	/* get the /sys/devices/system/edac subsys reference */
+	edac_subsys = edac_get_sysfs_subsys();
+	if (edac_subsys == NULL) {
+		debugf1("%s() no edac_subsys error=%d\n", __func__, err);
 		goto fail_out;
 	}
 
 	/* Init the MC's kobject */
-	mc_kset = kset_create_and_add("mc", NULL, &edac_class->kset.kobj);
+	mc_kset = kset_create_and_add("mc", NULL, &edac_subsys->dev_root->kobj);
 	if (!mc_kset) {
 		err = -ENOMEM;
 		debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
@@ -1045,7 +1045,7 @@ int edac_sysfs_setup_mc_kset(void)
 	return 0;
 
 fail_kset:
-	edac_put_sysfs_class();
+	edac_put_sysfs_subsys();
 
 fail_out:
 	return err;
@@ -1059,6 +1059,6 @@ fail_out:
 void edac_sysfs_teardown_mc_kset(void)
 {
 	kset_unregister(mc_kset);
-	edac_put_sysfs_class();
+	edac_put_sysfs_subsys();
 }
 
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 17aabb7b90ec..00f81b47a51f 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -10,8 +10,6 @@
 #ifndef	__EDAC_MODULE_H__
 #define	__EDAC_MODULE_H__
 
-#include <linux/sysdev.h>
-
 #include "edac_core.h"
 
 /*
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 2b378207d571..63af1c5673d1 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -19,7 +19,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/sysdev.h>
 #include <linux/ctype.h>
 #include <linux/workqueue.h>
 #include <asm/uaccess.h>
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 495198ad059c..97f5064e3992 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -338,12 +338,12 @@ static struct kobj_type ktype_edac_pci_main_kobj = {
  * edac_pci_main_kobj_setup()
  *
  *	setup the sysfs for EDAC PCI attributes
- *	assumes edac_class has already been initialized
+ *	assumes edac_subsys has already been initialized
  */
 static int edac_pci_main_kobj_setup(void)
 {
 	int err;
-	struct sysdev_class *edac_class;
+	struct bus_type *edac_subsys;
 
 	debugf0("%s()\n", __func__);
 
@@ -354,9 +354,9 @@ static int edac_pci_main_kobj_setup(void)
 	/* First time, so create the main kobject and its
 	 * controls and attributes
 	 */
-	edac_class = edac_get_sysfs_class();
-	if (edac_class == NULL) {
-		debugf1("%s() no edac_class\n", __func__);
+	edac_subsys = edac_get_sysfs_subsys();
+	if (edac_subsys == NULL) {
+		debugf1("%s() no edac_subsys\n", __func__);
 		err = -ENODEV;
 		goto decrement_count_fail;
 	}
@@ -381,7 +381,7 @@ static int edac_pci_main_kobj_setup(void)
 	/* Instanstiate the pci object */
 	err = kobject_init_and_add(edac_pci_top_main_kobj,
 				   &ktype_edac_pci_main_kobj,
-				   &edac_class->kset.kobj, "pci");
+				   &edac_subsys->dev_root->kobj, "pci");
 	if (err) {
 		debugf1("Failed to register '.../edac/pci'\n");
 		goto kobject_init_and_add_fail;
@@ -404,7 +404,7 @@ kzalloc_fail:
 	module_put(THIS_MODULE);
 
 mod_get_fail:
-	edac_put_sysfs_class();
+	edac_put_sysfs_subsys();
 
 decrement_count_fail:
 	/* if are on this error exit, nothing to tear down */
@@ -432,7 +432,7 @@ static void edac_pci_main_kobj_teardown(void)
 			__func__);
 		kobject_put(edac_pci_top_main_kobj);
 	}
-	edac_put_sysfs_class();
+	edac_put_sysfs_subsys();
 }
 
 /*
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 86ad2eee1201..670c4481453b 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -26,7 +26,7 @@ EXPORT_SYMBOL_GPL(edac_handlers);
 int edac_err_assert = 0;
 EXPORT_SYMBOL_GPL(edac_err_assert);
 
-static atomic_t edac_class_valid = ATOMIC_INIT(0);
+static atomic_t edac_subsys_valid = ATOMIC_INIT(0);
 
 /*
  * called to determine if there is an EDAC driver interested in
@@ -54,36 +54,37 @@ EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
  * sysfs object: /sys/devices/system/edac
  *	need to export to other files
  */
-struct sysdev_class edac_class = {
+struct bus_type edac_subsys = {
 	.name = "edac",
+	.dev_name = "edac",
 };
-EXPORT_SYMBOL_GPL(edac_class);
+EXPORT_SYMBOL_GPL(edac_subsys);
 
 /* return pointer to the 'edac' node in sysfs */
-struct sysdev_class *edac_get_sysfs_class(void)
+struct bus_type *edac_get_sysfs_subsys(void)
 {
 	int err = 0;
 
-	if (atomic_read(&edac_class_valid))
+	if (atomic_read(&edac_subsys_valid))
 		goto out;
 
 	/* create the /sys/devices/system/edac directory */
-	err = sysdev_class_register(&edac_class);
+	err = subsys_system_register(&edac_subsys, NULL);
 	if (err) {
 		printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n");
 		return NULL;
 	}
 
 out:
-	atomic_inc(&edac_class_valid);
-	return &edac_class;
+	atomic_inc(&edac_subsys_valid);
+	return &edac_subsys;
 }
-EXPORT_SYMBOL_GPL(edac_get_sysfs_class);
+EXPORT_SYMBOL_GPL(edac_get_sysfs_subsys);
 
-void edac_put_sysfs_class(void)
+void edac_put_sysfs_subsys(void)
 {
 	/* last user unregisters it */
-	if (atomic_dec_and_test(&edac_class_valid))
-		sysdev_class_unregister(&edac_class);
+	if (atomic_dec_and_test(&edac_subsys_valid))
+		bus_unregister(&edac_subsys);
 }
-EXPORT_SYMBOL_GPL(edac_put_sysfs_class);
+EXPORT_SYMBOL_GPL(edac_put_sysfs_subsys);
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index 73c3e26a0bce..885e8ad8fdcf 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -11,7 +11,6 @@
  */
 
 #include <linux/kobject.h>
-#include <linux/sysdev.h>
 #include <linux/edac.h>
 #include <linux/module.h>
 #include <asm/mce.h>
@@ -116,14 +115,14 @@ static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc,
 
 static int __init edac_init_mce_inject(void)
 {
-	struct sysdev_class *edac_class = NULL;
+	struct bus_type *edac_subsys = NULL;
 	int i, err = 0;
 
-	edac_class = edac_get_sysfs_class();
-	if (!edac_class)
+	edac_subsys = edac_get_sysfs_subsys();
+	if (!edac_subsys)
 		return -EINVAL;
 
-	mce_kobj = kobject_create_and_add("mce", &edac_class->kset.kobj);
+	mce_kobj = kobject_create_and_add("mce", &edac_subsys->dev_root->kobj);
 	if (!mce_kobj) {
 		printk(KERN_ERR "Error creating a mce kset.\n");
 		err = -ENOMEM;
@@ -147,7 +146,7 @@ err_sysfs_create:
 	kobject_del(mce_kobj);
 
 err_mce_kobj:
-	edac_put_sysfs_class();
+	edac_put_sysfs_subsys();
 
 	return err;
 }
@@ -161,7 +160,7 @@ static void __exit edac_exit_mce_inject(void)
 
 	kobject_del(mce_kobj);
 
-	edac_put_sysfs_class();
+	edac_put_sysfs_subsys();
 }
 
 module_init(edac_init_mce_inject);
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 055b248bdd53..1cd3947987e5 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -13,7 +13,7 @@
 #define _LINUX_EDAC_H_
 
 #include <linux/atomic.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 
 #define EDAC_OPSTATE_INVAL	-1
 #define EDAC_OPSTATE_POLL	0
@@ -23,12 +23,12 @@
 extern int edac_op_state;
 extern int edac_err_assert;
 extern atomic_t edac_handlers;
-extern struct sysdev_class edac_class;
+extern struct bus_type edac_subsys;
 
 extern int edac_handler_set(void);
 extern void edac_atomic_assert_error(void);
-extern struct sysdev_class *edac_get_sysfs_class(void);
-extern void edac_put_sysfs_class(void);
+extern struct bus_type *edac_get_sysfs_subsys(void);
+extern void edac_put_sysfs_subsys(void);
 
 static inline void opstate_init(void)
 {
-- 
cgit v1.2.3


From 3c8bedb7e42dacc141b1c42b01d9c309dc4ac462 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 15 Dec 2011 14:52:37 +0800
Subject: mfd: Declare da9052_regmap_config for the bus drivers

Fixes build failures.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/da9052/da9052.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index c8899ab20549..5702d1be13b4 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -126,4 +126,6 @@ static inline int da9052_reg_update(struct da9052 *da9052, unsigned char reg,
 int da9052_device_init(struct da9052 *da9052, u8 chip_id);
 void da9052_device_exit(struct da9052 *da9052);
 
+extern struct regmap_config da9052_regmap_config;
+
 #endif /* __MFD_DA9052_DA9052_H */
-- 
cgit v1.2.3


From abd63bc3a0f65ae9d85bc3b1bb067d3e3c2b2cc2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 14 Dec 2011 14:39:26 -0800
Subject: sched: Mark parent and real_parent as __rcu

The parent and real_parent pointers should be considered __rcu,
since they should be held under either tasklist_lock or
rcu_read_lock.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Link: http://lkml.kernel.org/r/20111214223925.GA27578@www.outflux.net
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cc8c6206657f..5ef09012a629 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1330,8 +1330,8 @@ struct task_struct {
 	 * older sibling, respectively.  (p->father can be replaced with 
 	 * p->real_parent->pid)
 	 */
-	struct task_struct *real_parent; /* real parent process */
-	struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */
+	struct task_struct __rcu *real_parent; /* real parent process */
+	struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
 	/*
 	 * children/sibling forms the list of my natural children
 	 */
-- 
cgit v1.2.3


From 52efdb89d60a0f19949129a08af3437a7aab70be Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 7 Dec 2011 12:01:36 +0100
Subject: iommu/amd: Add amd_iommu_device_info() function

This function can be used to find out which features
necessary for IOMMUv2 usage are available on a given device.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/amd-iommu.h | 26 ++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index d5074f428423..03944e76b700 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3565,3 +3565,46 @@ void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
 	dev_data->errata |= (1 << erratum);
 }
 EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
+
+int amd_iommu_device_info(struct pci_dev *pdev,
+                          struct amd_iommu_device_info *info)
+{
+	int max_pasids;
+	int pos;
+
+	if (pdev == NULL || info == NULL)
+		return -EINVAL;
+
+	if (!amd_iommu_v2_supported())
+		return -EINVAL;
+
+	memset(info, 0, sizeof(*info));
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
+	if (pos)
+		info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+	if (pos)
+		info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
+	if (pos) {
+		int features;
+
+		max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
+		max_pasids = min(max_pasids, (1 << 20));
+
+		info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+		info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
+
+		features = pci_pasid_features(pdev);
+		if (features & PCI_PASID_CAP_EXEC)
+			info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
+		if (features & PCI_PASID_CAP_PRIV)
+			info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(amd_iommu_device_info);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 06688c42167d..c03c281ae6ee 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -119,6 +119,32 @@ typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
 extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
 					amd_iommu_invalid_ppr_cb cb);
 
+/**
+ * amd_iommu_device_info() - Get information about IOMMUv2 support of a
+ *			     PCI device
+ * @pdev: PCI device to query information from
+ * @info: A pointer to an amd_iommu_device_info structure which will contain
+ *	  the information about the PCI device
+ *
+ * Returns 0 on success, negative value on error
+ */
+
+#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP     0x1    /* ATS feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP     0x2    /* PRI feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP   0x4    /* PASID context supported */
+#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP    0x8    /* Device may request execution
+						    on memory pages */
+#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP   0x10    /* Device may request
+						    super-user privileges */
+
+struct amd_iommu_device_info {
+	int max_pasids;
+	u32 flags;
+};
+
+extern int amd_iommu_device_info(struct pci_dev *pdev,
+				 struct amd_iommu_device_info *info);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From bc21662f729cd17d2af93e149f4eccafc7b10181 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 7 Dec 2011 12:24:42 +0100
Subject: iommu/amd: Add invalidate-context call-back

This call-back is invoked when the task that is bound to a
pasid is about to exit. The driver can use it to shutdown
all context related to that context in a safe way.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu_v2.c | 35 +++++++++++++++++++++++++++++++++++
 include/linux/amd-iommu.h    | 17 +++++++++++++++++
 2 files changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index fe812e2a0474..8add9f125d3e 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -63,6 +63,7 @@ struct device_state {
 	int pasid_levels;
 	int max_pasids;
 	amd_iommu_invalid_ppr_cb inv_ppr_cb;
+	amd_iommu_invalidate_ctx inv_ctx_cb;
 	spinlock_t lock;
 	wait_queue_head_t wq;
 };
@@ -637,6 +638,9 @@ again:
 		dev_state = pasid_state->device_state;
 		pasid     = pasid_state->pasid;
 
+		if (pasid_state->device_state->inv_ctx_cb)
+			dev_state->inv_ctx_cb(dev_state->pdev, pasid);
+
 		unbind_pasid(dev_state, pasid);
 
 		/* Task may be in the list multiple times */
@@ -881,6 +885,37 @@ out_unlock:
 }
 EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
 
+int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
+				    amd_iommu_invalidate_ctx cb)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	u16 devid;
+	int ret;
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	devid = device_id(pdev);
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	ret = -EINVAL;
+	dev_state = state_table[devid];
+	if (dev_state == NULL)
+		goto out_unlock;
+
+	dev_state->inv_ctx_cb = cb;
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
+
 static int __init amd_iommu_v2_init(void)
 {
 	size_t state_table_size;
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index c03c281ae6ee..ef00610837d4 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -145,6 +145,23 @@ struct amd_iommu_device_info {
 extern int amd_iommu_device_info(struct pci_dev *pdev,
 				 struct amd_iommu_device_info *info);
 
+/**
+ * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating
+ *				       a pasid context. This call-back is
+ *				       invoked when the IOMMUv2 driver needs to
+ *				       invalidate a PASID context, for example
+ *				       because the task that is bound to that
+ *				       context is about to exit.
+ *
+ * @pdev: The PCI device the call-back should be registered for
+ * @cb: The call-back function
+ */
+
+typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid);
+
+extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
+					   amd_iommu_invalidate_ctx cb);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From 648616343cdbe904c585a6c12e323d3b3c72e46f Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 15 Dec 2011 14:56:09 +0100
Subject: [S390] cputime: add sparse checking and cleanup

Make cputime_t and cputime64_t nocast to enable sparse checking to
detect incorrect use of cputime. Drop the cputime macros for simple
scalar operations. The conversion macros are still needed.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/ia64/include/asm/cputime.h        |  69 ++++++++--------
 arch/powerpc/include/asm/cputime.h     |  70 +++++++----------
 arch/s390/include/asm/cputime.h        | 140 +++++++++++++++------------------
 drivers/cpufreq/cpufreq_conservative.c |  29 ++++---
 drivers/cpufreq/cpufreq_ondemand.c     |  33 ++++----
 drivers/cpufreq/cpufreq_stats.c        |   5 +-
 drivers/macintosh/rack-meter.c         |  11 +--
 fs/proc/array.c                        |   8 +-
 fs/proc/stat.c                         |  27 +++----
 fs/proc/uptime.c                       |   4 +-
 include/asm-generic/cputime.h          |  62 +++++++--------
 include/linux/sched.h                  |   4 +-
 kernel/acct.c                          |   4 +-
 kernel/cpu.c                           |   3 +-
 kernel/exit.c                          |  22 ++----
 kernel/fork.c                          |  14 ++--
 kernel/itimer.c                        |  15 ++--
 kernel/posix-cpu-timers.c              | 132 ++++++++++++-------------------
 kernel/sched.c                         |  80 +++++++++----------
 kernel/sched_stats.h                   |   6 +-
 kernel/signal.c                        |   6 +-
 kernel/sys.c                           |   6 +-
 kernel/tsacct.c                        |   2 +-
 23 files changed, 323 insertions(+), 429 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 6073b187528a..461e52f0277f 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -26,59 +26,51 @@
 #include <linux/jiffies.h>
 #include <asm/processor.h>
 
-typedef u64 cputime_t;
-typedef u64 cputime64_t;
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
 
-#define cputime_zero			((cputime_t)0)
 #define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_max			((~((cputime_t)0) >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n)		((__a) /  (__n))
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-
-#define cputime64_zero			((cputime64_t)0)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime64_sub(__a, __b)		((__a) - (__b))
-#define cputime_to_cputime64(__ct)	(__ct)
 
 /*
  * Convert cputime <-> jiffies (HZ)
  */
-#define cputime_to_jiffies(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies_to_cputime(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
-#define cputime64_to_jiffies64(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies64_to_cputime64(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime_to_jiffies(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies_to_cputime(__jif)	\
+	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif)	\
+	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
 
 /*
  * Convert cputime <-> microseconds
  */
-#define cputime_to_usecs(__ct)		((__ct) / NSEC_PER_USEC)
-#define usecs_to_cputime(__usecs)	((__usecs) * NSEC_PER_USEC)
+#define cputime_to_usecs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs)	\
+	(__force cputime_t)((__usecs) * NSEC_PER_USEC)
 
 /*
  * Convert cputime <-> seconds
  */
-#define cputime_to_secs(__ct)		((__ct) / NSEC_PER_SEC)
-#define secs_to_cputime(__secs)		((__secs) * NSEC_PER_SEC)
+#define cputime_to_secs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs)		\
+	(__force cputime_t)((__secs) * NSEC_PER_SEC)
 
 /*
  * Convert cputime <-> timespec (nsec)
  */
 static inline cputime_t timespec_to_cputime(const struct timespec *val)
 {
-	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
-	return (ret + val->tv_nsec);
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+	return (__force cputime_t) ret;
 }
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
 {
-	val->tv_sec  = ct / NSEC_PER_SEC;
-	val->tv_nsec = ct % NSEC_PER_SEC;
+	val->tv_sec  = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
 }
 
 /*
@@ -86,25 +78,28 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
  */
 static inline cputime_t timeval_to_cputime(struct timeval *val)
 {
-	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
-	return (ret + val->tv_usec * NSEC_PER_USEC);
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+	return (__force cputime_t) ret;
 }
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
 {
-	val->tv_sec = ct / NSEC_PER_SEC;
-	val->tv_usec = (ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+	val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
 }
 
 /*
  * Convert cputime <-> clock (USER_HZ)
  */
-#define cputime_to_clock_t(__ct)	((__ct) / (NSEC_PER_SEC / USER_HZ))
-#define clock_t_to_cputime(__x)		((__x) * (NSEC_PER_SEC / USER_HZ))
+#define cputime_to_clock_t(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x)		\
+	(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
 
 /*
  * Convert cputime64 to clock.
  */
-#define cputime64_to_clock_t(__ct)      cputime_to_clock_t((cputime_t)__ct)
+#define cputime64_to_clock_t(__ct)	\
+	cputime_to_clock_t((__force cputime_t)__ct)
 
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __IA64_CPUTIME_H */
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 1cf20bdfbeca..e94935c52019 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -29,25 +29,8 @@ static inline void setup_cputime_one_jiffy(void) { }
 #include <asm/time.h>
 #include <asm/param.h>
 
-typedef u64 cputime_t;
-typedef u64 cputime64_t;
-
-#define cputime_zero			((cputime_t)0)
-#define cputime_max			((~((cputime_t)0) >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n)		((__a) /  (__n))
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-
-#define cputime64_zero			((cputime64_t)0)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime64_sub(__a, __b)		((__a) - (__b))
-#define cputime_to_cputime64(__ct)	(__ct)
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
 
 #ifdef __KERNEL__
 
@@ -65,7 +48,7 @@ DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 
 static inline unsigned long cputime_to_jiffies(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_jiffies_factor);
+	return mulhdu((__force u64) ct, __cputime_jiffies_factor);
 }
 
 /* Estimate the scaled cputime by scaling the real cputime based on
@@ -74,14 +57,15 @@ static inline cputime_t cputime_to_scaled(const cputime_t ct)
 {
 	if (cpu_has_feature(CPU_FTR_SPURR) &&
 	    __get_cpu_var(cputime_last_delta))
-		return ct * __get_cpu_var(cputime_scaled_last_delta) /
-			    __get_cpu_var(cputime_last_delta);
+		return (__force u64) ct *
+			__get_cpu_var(cputime_scaled_last_delta) /
+			__get_cpu_var(cputime_last_delta);
 	return ct;
 }
 
 static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 {
-	cputime_t ct;
+	u64 ct;
 	unsigned long sec;
 
 	/* have to be a little careful about overflow */
@@ -93,7 +77,7 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 	}
 	if (sec)
 		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+	return (__force cputime_t) ct;
 }
 
 static inline void setup_cputime_one_jiffy(void)
@@ -103,7 +87,7 @@ static inline void setup_cputime_one_jiffy(void)
 
 static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 {
-	cputime_t ct;
+	u64 ct;
 	u64 sec;
 
 	/* have to be a little careful about overflow */
@@ -114,13 +98,13 @@ static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 		do_div(ct, HZ);
 	}
 	if (sec)
-		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+		ct += (u64) sec * tb_ticks_per_sec;
+	return (__force cputime64_t) ct;
 }
 
 static inline u64 cputime64_to_jiffies64(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_jiffies_factor);
+	return mulhdu((__force u64) ct, __cputime_jiffies_factor);
 }
 
 /*
@@ -130,12 +114,12 @@ extern u64 __cputime_msec_factor;
 
 static inline unsigned long cputime_to_usecs(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC;
+	return mulhdu((__force u64) ct, __cputime_msec_factor) * USEC_PER_MSEC;
 }
 
 static inline cputime_t usecs_to_cputime(const unsigned long us)
 {
-	cputime_t ct;
+	u64 ct;
 	unsigned long sec;
 
 	/* have to be a little careful about overflow */
@@ -147,7 +131,7 @@ static inline cputime_t usecs_to_cputime(const unsigned long us)
 	}
 	if (sec)
 		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+	return (__force cputime_t) ct;
 }
 
 /*
@@ -157,12 +141,12 @@ extern u64 __cputime_sec_factor;
 
 static inline unsigned long cputime_to_secs(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_sec_factor);
+	return mulhdu((__force u64) ct, __cputime_sec_factor);
 }
 
 static inline cputime_t secs_to_cputime(const unsigned long sec)
 {
-	return (cputime_t) sec * tb_ticks_per_sec;
+	return (__force cputime_t)((u64) sec * tb_ticks_per_sec);
 }
 
 /*
@@ -170,7 +154,7 @@ static inline cputime_t secs_to_cputime(const unsigned long sec)
  */
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
 {
-	u64 x = ct;
+	u64 x = (__force u64) ct;
 	unsigned int frac;
 
 	frac = do_div(x, tb_ticks_per_sec);
@@ -182,11 +166,11 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
 
 static inline cputime_t timespec_to_cputime(const struct timespec *p)
 {
-	cputime_t ct;
+	u64 ct;
 
 	ct = (u64) p->tv_nsec * tb_ticks_per_sec;
 	do_div(ct, 1000000000);
-	return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+	return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
 }
 
 /*
@@ -194,7 +178,7 @@ static inline cputime_t timespec_to_cputime(const struct timespec *p)
  */
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
 {
-	u64 x = ct;
+	u64 x = (__force u64) ct;
 	unsigned int frac;
 
 	frac = do_div(x, tb_ticks_per_sec);
@@ -206,11 +190,11 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
 
 static inline cputime_t timeval_to_cputime(const struct timeval *p)
 {
-	cputime_t ct;
+	u64 ct;
 
 	ct = (u64) p->tv_usec * tb_ticks_per_sec;
 	do_div(ct, 1000000);
-	return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+	return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
 }
 
 /*
@@ -220,12 +204,12 @@ extern u64 __cputime_clockt_factor;
 
 static inline unsigned long cputime_to_clock_t(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_clockt_factor);
+	return mulhdu((__force u64) ct, __cputime_clockt_factor);
 }
 
 static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 {
-	cputime_t ct;
+	u64 ct;
 	unsigned long sec;
 
 	/* have to be a little careful about overflow */
@@ -236,8 +220,8 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 		do_div(ct, USER_HZ);
 	}
 	if (sec)
-		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+		ct += (u64) sec * tb_ticks_per_sec;
+	return (__force cputime_t) ct;
 }
 
 #define cputime64_to_clock_t(ct)	cputime_to_clock_t((cputime_t)(ct))
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 081434878296..0887a0463e33 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -16,114 +16,98 @@
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
-typedef unsigned long long cputime_t;
-typedef unsigned long long cputime64_t;
+typedef unsigned long long __nocast cputime_t;
+typedef unsigned long long __nocast cputime64_t;
 
-#ifndef __s390x__
-
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+static inline unsigned long __div(unsigned long long n, unsigned long base)
 {
+#ifndef __s390x__
 	register_pair rp;
 
 	rp.pair = n >> 1;
 	asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1));
 	return rp.subreg.odd;
+#else /* __s390x__ */
+	return n / base;
+#endif /* __s390x__ */
 }
 
-#else /* __s390x__ */
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+/*
+ * Convert cputime to jiffies and back.
+ */
+static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
 {
-	return n / base;
+	return __div((__force unsigned long long) cputime, 4096000000ULL / HZ);
 }
 
-#endif /* __s390x__ */
+static inline cputime_t jiffies_to_cputime(const unsigned int jif)
+{
+	return (__force cputime_t)(jif * (4096000000ULL / HZ));
+}
 
-#define cputime_zero			(0ULL)
-#define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_max			((~0UL >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n) ({		\
-	unsigned long long __div = (__a);	\
-	do_div(__div,__n);			\
-	__div;					\
-})
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-#define cputime_to_jiffies(__ct)	(__div((__ct), 4096000000ULL / HZ))
-#define cputime_to_scaled(__ct)		(__ct)
-#define jiffies_to_cputime(__hz)	((cputime_t)(__hz) * (4096000000ULL / HZ))
-
-#define cputime64_zero			(0ULL)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime_to_cputime64(__ct)	(__ct)
-
-static inline u64
-cputime64_to_jiffies64(cputime64_t cputime)
-{
-	do_div(cputime, 4096000000ULL / HZ);
-	return cputime;
+static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
+{
+	unsigned long long jif = (__force unsigned long long) cputime;
+	do_div(jif, 4096000000ULL / HZ);
+	return jif;
+}
+
+static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
+{
+	return (__force cputime64_t)(jif * (4096000000ULL / HZ));
 }
 
 /*
  * Convert cputime to microseconds and back.
  */
-static inline unsigned int
-cputime_to_usecs(const cputime_t cputime)
+static inline unsigned int cputime_to_usecs(const cputime_t cputime)
 {
-	return cputime_div(cputime, 4096);
+	return (__force unsigned long long) cputime >> 12;
 }
 
-static inline cputime_t
-usecs_to_cputime(const unsigned int m)
+static inline cputime_t usecs_to_cputime(const unsigned int m)
 {
-	return (cputime_t) m * 4096;
+	return (__force cputime_t)(m * 4096ULL);
 }
 
 /*
  * Convert cputime to milliseconds and back.
  */
-static inline unsigned int
-cputime_to_secs(const cputime_t cputime)
+static inline unsigned int cputime_to_secs(const cputime_t cputime)
 {
-	return __div(cputime, 2048000000) >> 1;
+	return __div((__force unsigned long long) cputime, 2048000000) >> 1;
 }
 
-static inline cputime_t
-secs_to_cputime(const unsigned int s)
+static inline cputime_t secs_to_cputime(const unsigned int s)
 {
-	return (cputime_t) s * 4096000000ULL;
+	return (__force cputime_t)(s * 4096000000ULL);
 }
 
 /*
  * Convert cputime to timespec and back.
  */
-static inline cputime_t
-timespec_to_cputime(const struct timespec *value)
+static inline cputime_t timespec_to_cputime(const struct timespec *value)
 {
-	return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
+	unsigned long long ret = value->tv_sec * 4096000000ULL;
+	return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000);
 }
 
-static inline void
-cputime_to_timespec(const cputime_t cputime, struct timespec *value)
+static inline void cputime_to_timespec(const cputime_t cputime,
+				       struct timespec *value)
 {
+	unsigned long long __cputime = (__force unsigned long long) cputime;
 #ifndef __s390x__
 	register_pair rp;
 
-	rp.pair = cputime >> 1;
+	rp.pair = __cputime >> 1;
 	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
 	value->tv_nsec = rp.subreg.even * 1000 / 4096;
 	value->tv_sec = rp.subreg.odd;
 #else
-	value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
-	value->tv_sec = cputime / 4096000000ULL;
+	value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096;
+	value->tv_sec = __cputime / 4096000000ULL;
 #endif
 }
 
@@ -132,50 +116,52 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
  * Since cputime and timeval have the same resolution (microseconds)
  * this is easy.
  */
-static inline cputime_t
-timeval_to_cputime(const struct timeval *value)
+static inline cputime_t timeval_to_cputime(const struct timeval *value)
 {
-	return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
+	unsigned long long ret = value->tv_sec * 4096000000ULL;
+	return (__force cputime_t)(ret + value->tv_usec * 4096ULL);
 }
 
-static inline void
-cputime_to_timeval(const cputime_t cputime, struct timeval *value)
+static inline void cputime_to_timeval(const cputime_t cputime,
+				      struct timeval *value)
 {
+	unsigned long long __cputime = (__force unsigned long long) cputime;
 #ifndef __s390x__
 	register_pair rp;
 
-	rp.pair = cputime >> 1;
+	rp.pair = __cputime >> 1;
 	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
 	value->tv_usec = rp.subreg.even / 4096;
 	value->tv_sec = rp.subreg.odd;
 #else
-	value->tv_usec = (cputime % 4096000000ULL) / 4096;
-	value->tv_sec = cputime / 4096000000ULL;
+	value->tv_usec = (__cputime % 4096000000ULL) / 4096;
+	value->tv_sec = __cputime / 4096000000ULL;
 #endif
 }
 
 /*
  * Convert cputime to clock and back.
  */
-static inline clock_t
-cputime_to_clock_t(cputime_t cputime)
+static inline clock_t cputime_to_clock_t(cputime_t cputime)
 {
-	return cputime_div(cputime, 4096000000ULL / USER_HZ);
+	unsigned long long clock = (__force unsigned long long) cputime;
+	do_div(clock, 4096000000ULL / USER_HZ);
+	return clock;
 }
 
-static inline cputime_t
-clock_t_to_cputime(unsigned long x)
+static inline cputime_t clock_t_to_cputime(unsigned long x)
 {
-	return (cputime_t) x * (4096000000ULL / USER_HZ);
+	return (__force cputime_t)(x * (4096000000ULL / USER_HZ));
 }
 
 /*
  * Convert cputime64 to clock.
  */
-static inline clock_t
-cputime64_to_clock_t(cputime64_t cputime)
+static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
 {
-       return cputime_div(cputime, 4096000000ULL / USER_HZ);
+	unsigned long long clock = (__force unsigned long long) cputime;
+	do_div(clock, 4096000000ULL / USER_HZ);
+	return clock;
 }
 
 struct s390_idle_data {
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index c97b468ee9f7..7f31a031c0b5 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -103,15 +103,14 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
 	cputime64_t busy_time;
 
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
-			kstat_cpu(cpu).cpustat.system);
-
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
-
-	idle_time = cputime64_sub(cur_wall_time, busy_time);
+	busy_time  = kstat_cpu(cpu).cpustat.user;
+	busy_time += kstat_cpu(cpu).cpustat.system;
+	busy_time += kstat_cpu(cpu).cpustat.irq;
+	busy_time += kstat_cpu(cpu).cpustat.softirq;
+	busy_time += kstat_cpu(cpu).cpustat.steal;
+	busy_time += kstat_cpu(cpu).cpustat.nice;
+
+	idle_time = cur_wall_time - busy_time;
 	if (wall)
 		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
 
@@ -353,20 +352,20 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 
 		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 
-		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
-				j_dbs_info->prev_cpu_wall);
+		wall_time = (unsigned int)
+			(cur_wall_time - j_dbs_info->prev_cpu_wall);
 		j_dbs_info->prev_cpu_wall = cur_wall_time;
 
-		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
-				j_dbs_info->prev_cpu_idle);
+		idle_time = (unsigned int)
+			(cur_idle_time - j_dbs_info->prev_cpu_idle);
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 
 		if (dbs_tuners_ins.ignore_nice) {
 			cputime64_t cur_nice;
 			unsigned long cur_nice_jiffies;
 
-			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
-					 j_dbs_info->prev_cpu_nice);
+			cur_nice = kstat_cpu(j).cpustat.nice -
+					j_dbs_info->prev_cpu_nice;
 			/*
 			 * Assumption: nice time between sampling periods will
 			 * be less than 2^32 jiffies for 32 bit sys
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index fa8af4ebb1d6..07cffe2f6cff 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -127,15 +127,14 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
 	cputime64_t busy_time;
 
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
-			kstat_cpu(cpu).cpustat.system);
-
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
-
-	idle_time = cputime64_sub(cur_wall_time, busy_time);
+	busy_time  = kstat_cpu(cpu).cpustat.user;
+	busy_time += kstat_cpu(cpu).cpustat.system;
+	busy_time += kstat_cpu(cpu).cpustat.irq;
+	busy_time += kstat_cpu(cpu).cpustat.softirq;
+	busy_time += kstat_cpu(cpu).cpustat.steal;
+	busy_time += kstat_cpu(cpu).cpustat.nice;
+
+	idle_time = cur_wall_time - busy_time;
 	if (wall)
 		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
 
@@ -442,24 +441,24 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 		cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
 
-		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
-				j_dbs_info->prev_cpu_wall);
+		wall_time = (unsigned int)
+			(cur_wall_time - j_dbs_info->prev_cpu_wall);
 		j_dbs_info->prev_cpu_wall = cur_wall_time;
 
-		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
-				j_dbs_info->prev_cpu_idle);
+		idle_time = (unsigned int)
+			(cur_idle_time - j_dbs_info->prev_cpu_idle);
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 
-		iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
-				j_dbs_info->prev_cpu_iowait);
+		iowait_time = (unsigned int)
+			(cur_iowait_time - j_dbs_info->prev_cpu_iowait);
 		j_dbs_info->prev_cpu_iowait = cur_iowait_time;
 
 		if (dbs_tuners_ins.ignore_nice) {
 			cputime64_t cur_nice;
 			unsigned long cur_nice_jiffies;
 
-			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
-					 j_dbs_info->prev_cpu_nice);
+			cur_nice = kstat_cpu(j).cpustat.nice -
+					j_dbs_info->prev_cpu_nice;
 			/*
 			 * Assumption: nice time between sampling periods will
 			 * be less than 2^32 jiffies for 32 bit sys
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index c5072a91e848..2a508edd768b 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -61,9 +61,8 @@ static int cpufreq_stats_update(unsigned int cpu)
 	spin_lock(&cpufreq_stats_lock);
 	stat = per_cpu(cpufreq_stats_table, cpu);
 	if (stat->time_in_state)
-		stat->time_in_state[stat->last_index] =
-			cputime64_add(stat->time_in_state[stat->last_index],
-				      cputime_sub(cur_time, stat->last_time));
+		stat->time_in_state[stat->last_index] +=
+			cur_time - stat->last_time;
 	stat->last_time = cur_time;
 	spin_unlock(&cpufreq_stats_lock);
 	return 0;
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index 2637c139777b..909908ebf164 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -83,11 +83,10 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
 {
 	cputime64_t retval;
 
-	retval = cputime64_add(kstat_cpu(cpu).cpustat.idle,
-			kstat_cpu(cpu).cpustat.iowait);
+	retval = kstat_cpu(cpu).cpustat.idle + kstat_cpu(cpu).cpustat.iowait;
 
 	if (rackmeter_ignore_nice)
-		retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice);
+		retval += kstat_cpu(cpu).cpustat.nice;
 
 	return retval;
 }
@@ -220,13 +219,11 @@ static void rackmeter_do_timer(struct work_struct *work)
 	int i, offset, load, cumm, pause;
 
 	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
-	total_ticks = (unsigned int)cputime64_sub(cur_jiffies,
-						  rcpu->prev_wall);
+	total_ticks = (unsigned int) (cur_jiffies - rcpu->prev_wall);
 	rcpu->prev_wall = cur_jiffies;
 
 	total_idle_ticks = get_cpu_idle_time(cpu);
-	idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks,
-				rcpu->prev_idle);
+	idle_ticks = (unsigned int) (total_idle_ticks - rcpu->prev_idle);
 	rcpu->prev_idle = total_idle_ticks;
 
 	/* We do a very dumb calculation to update the LEDs for now,
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a1dafd228d1..8c344f037bd0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -394,8 +394,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 	sigemptyset(&sigign);
 	sigemptyset(&sigcatch);
-	cutime = cstime = utime = stime = cputime_zero;
-	cgtime = gtime = cputime_zero;
+	cutime = cstime = utime = stime = 0;
+	cgtime = gtime = 0;
 
 	if (lock_task_sighand(task, &flags)) {
 		struct signal_struct *sig = task->signal;
@@ -423,14 +423,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				gtime = cputime_add(gtime, t->gtime);
+				gtime += t->gtime;
 				t = next_thread(t);
 			} while (t != task);
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
 			thread_group_times(task, &utime, &stime);
-			gtime = cputime_add(gtime, sig->gtime);
+			gtime += sig->gtime;
 		}
 
 		sid = task_session_nr_ns(task, ns);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 2a30d67dd6b8..714d5d131e76 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -30,7 +30,7 @@ static cputime64_t get_idle_time(int cpu)
 	if (idle_time == -1ULL) {
 		/* !NO_HZ so we can rely on cpustat.idle */
 		idle = kstat_cpu(cpu).cpustat.idle;
-		idle = cputime64_add(idle, arch_idle_time(cpu));
+		idle += arch_idle_time(cpu);
 	} else
 		idle = nsecs_to_jiffies64(1000 * idle_time);
 
@@ -63,23 +63,22 @@ static int show_stat(struct seq_file *p, void *v)
 	struct timespec boottime;
 
 	user = nice = system = idle = iowait =
-		irq = softirq = steal = cputime64_zero;
-	guest = guest_nice = cputime64_zero;
+		irq = softirq = steal = 0;
+	guest = guest_nice = 0;
 	getboottime(&boottime);
 	jif = boottime.tv_sec;
 
 	for_each_possible_cpu(i) {
-		user = cputime64_add(user, kstat_cpu(i).cpustat.user);
-		nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
-		system = cputime64_add(system, kstat_cpu(i).cpustat.system);
-		idle = cputime64_add(idle, get_idle_time(i));
-		iowait = cputime64_add(iowait, get_iowait_time(i));
-		irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
-		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
-		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
-		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		guest_nice = cputime64_add(guest_nice,
-			kstat_cpu(i).cpustat.guest_nice);
+		user += kstat_cpu(i).cpustat.user;
+		nice += kstat_cpu(i).cpustat.nice;
+		system += kstat_cpu(i).cpustat.system;
+		idle += get_idle_time(i);
+		iowait += get_iowait_time(i);
+		irq += kstat_cpu(i).cpustat.irq;
+		softirq += kstat_cpu(i).cpustat.softirq;
+		steal += kstat_cpu(i).cpustat.steal;
+		guest += kstat_cpu(i).cpustat.guest;
+		guest_nice += kstat_cpu(i).cpustat.guest_nice;
 		sum += kstat_cpu_irqs_sum(i);
 		sum += arch_irq_stat_cpu(i);
 
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 766b1d456050..ac5243657da3 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -12,10 +12,10 @@ static int uptime_proc_show(struct seq_file *m, void *v)
 	struct timespec uptime;
 	struct timespec idle;
 	int i;
-	cputime_t idletime = cputime_zero;
+	cputime_t idletime = 0;
 
 	for_each_possible_cpu(i)
-		idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
+		idletime += kstat_cpu(i).cpustat.idle;
 
 	do_posix_clock_monotonic_gettime(&uptime);
 	monotonic_to_bootbased(&uptime);
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 62ce6823c0f2..77202e2c9fc5 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -4,70 +4,64 @@
 #include <linux/time.h>
 #include <linux/jiffies.h>
 
-typedef unsigned long cputime_t;
+typedef unsigned long __nocast cputime_t;
 
-#define cputime_zero			(0UL)
 #define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_max			((~0UL >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n)		((__a) /  (__n))
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-#define cputime_to_jiffies(__ct)	(__ct)
+#define cputime_to_jiffies(__ct)	(__force unsigned long)(__ct)
 #define cputime_to_scaled(__ct)		(__ct)
-#define jiffies_to_cputime(__hz)	(__hz)
+#define jiffies_to_cputime(__hz)	(__force cputime_t)(__hz)
 
-typedef u64 cputime64_t;
+typedef u64 __nocast cputime64_t;
 
-#define cputime64_zero (0ULL)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime64_sub(__a, __b)		((__a) - (__b))
-#define cputime64_to_jiffies64(__ct)	(__ct)
-#define jiffies64_to_cputime64(__jif)	(__jif)
-#define cputime_to_cputime64(__ct)	((u64) __ct)
-#define cputime64_gt(__a, __b)		((__a) >  (__b))
+#define cputime64_to_jiffies64(__ct)	(__force u64)(__ct)
+#define jiffies64_to_cputime64(__jif)	(__force cputime64_t)(__jif)
 
-#define nsecs_to_cputime64(__ct)	nsecs_to_jiffies64(__ct)
+#define nsecs_to_cputime64(__ct)	\
+	jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
 
 
 /*
  * Convert cputime to microseconds and back.
  */
-#define cputime_to_usecs(__ct)		jiffies_to_usecs(__ct)
-#define usecs_to_cputime(__msecs)	usecs_to_jiffies(__msecs)
+#define cputime_to_usecs(__ct)		\
+	jiffies_to_usecs(cputime_to_jiffies(__ct));
+#define usecs_to_cputime(__msecs)	\
+	jiffies_to_cputime(usecs_to_jiffies(__msecs));
 
 /*
  * Convert cputime to seconds and back.
  */
-#define cputime_to_secs(jif)		((jif) / HZ)
-#define secs_to_cputime(sec)		((sec) * HZ)
+#define cputime_to_secs(jif)		(cputime_to_jiffies(jif) / HZ)
+#define secs_to_cputime(sec)		jiffies_to_cputime((sec) * HZ)
 
 /*
  * Convert cputime to timespec and back.
  */
-#define timespec_to_cputime(__val)	timespec_to_jiffies(__val)
-#define cputime_to_timespec(__ct,__val)	jiffies_to_timespec(__ct,__val)
+#define timespec_to_cputime(__val)	\
+	jiffies_to_cputime(timespec_to_jiffies(__val))
+#define cputime_to_timespec(__ct,__val)	\
+	jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
 
 /*
  * Convert cputime to timeval and back.
  */
-#define timeval_to_cputime(__val)	timeval_to_jiffies(__val)
-#define cputime_to_timeval(__ct,__val)	jiffies_to_timeval(__ct,__val)
+#define timeval_to_cputime(__val)	\
+	jiffies_to_cputime(timeval_to_jiffies(__val))
+#define cputime_to_timeval(__ct,__val)	\
+	jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
 
 /*
  * Convert cputime to clock and back.
  */
-#define cputime_to_clock_t(__ct)	jiffies_to_clock_t(__ct)
-#define clock_t_to_cputime(__x)		clock_t_to_jiffies(__x)
+#define cputime_to_clock_t(__ct)	\
+	jiffies_to_clock_t(cputime_to_jiffies(__ct))
+#define clock_t_to_cputime(__x)		\
+	jiffies_to_cputime(clock_t_to_jiffies(__x))
 
 /*
  * Convert cputime64 to clock.
  */
-#define cputime64_to_clock_t(__ct)	jiffies_64_to_clock_t(__ct)
+#define cputime64_to_clock_t(__ct)	\
+	jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c4f3e9b9bc5..5649032d73fe 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -483,8 +483,8 @@ struct task_cputime {
 
 #define INIT_CPUTIME	\
 	(struct task_cputime) {					\
-		.utime = cputime_zero,				\
-		.stime = cputime_zero,				\
+		.utime = 0,					\
+		.stime = 0,					\
 		.sum_exec_runtime = 0,				\
 	}
 
diff --git a/kernel/acct.c b/kernel/acct.c
index fa7eb3de2ddc..203dfead2e06 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -613,8 +613,8 @@ void acct_collect(long exitcode, int group_dead)
 		pacct->ac_flag |= ACORE;
 	if (current->flags & PF_SIGNALED)
 		pacct->ac_flag |= AXSIG;
-	pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime);
-	pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime);
+	pacct->ac_utime += current->utime;
+	pacct->ac_stime += current->stime;
 	pacct->ac_minflt += current->min_flt;
 	pacct->ac_majflt += current->maj_flt;
 	spin_unlock_irq(&current->sighand->siglock);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 563f13609470..3f8ee8a138c4 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -178,8 +178,7 @@ static inline void check_for_tasks(int cpu)
 	write_lock_irq(&tasklist_lock);
 	for_each_process(p) {
 		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
-		    (!cputime_eq(p->utime, cputime_zero) ||
-		     !cputime_eq(p->stime, cputime_zero)))
+		    (p->utime || p->stime))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
 				"(state = %ld, flags = %x)\n",
 				p->comm, task_pid_nr(p), cpu,
diff --git a/kernel/exit.c b/kernel/exit.c
index d0b7d988f873..5e0d1f4c696e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -121,9 +121,9 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime = cputime_add(sig->utime, tsk->utime);
-		sig->stime = cputime_add(sig->stime, tsk->stime);
-		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
+		sig->utime += tsk->utime;
+		sig->stime += tsk->stime;
+		sig->gtime += tsk->gtime;
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
@@ -1255,19 +1255,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		sig = p->signal;
-		psig->cutime =
-			cputime_add(psig->cutime,
-			cputime_add(tgutime,
-				    sig->cutime));
-		psig->cstime =
-			cputime_add(psig->cstime,
-			cputime_add(tgstime,
-				    sig->cstime));
-		psig->cgtime =
-			cputime_add(psig->cgtime,
-			cputime_add(p->gtime,
-			cputime_add(sig->gtime,
-				    sig->cgtime)));
+		psig->cutime += tgutime + sig->cutime;
+		psig->cstime += tgstime + sig->cstime;
+		psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
 		psig->cmin_flt +=
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
diff --git a/kernel/fork.c b/kernel/fork.c
index da4a6a10d088..b058c5820ecd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1023,8 +1023,8 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
  */
 static void posix_cpu_timers_init(struct task_struct *tsk)
 {
-	tsk->cputime_expires.prof_exp = cputime_zero;
-	tsk->cputime_expires.virt_exp = cputime_zero;
+	tsk->cputime_expires.prof_exp = 0;
+	tsk->cputime_expires.virt_exp = 0;
 	tsk->cputime_expires.sched_exp = 0;
 	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
 	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
@@ -1132,14 +1132,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	init_sigpending(&p->pending);
 
-	p->utime = cputime_zero;
-	p->stime = cputime_zero;
-	p->gtime = cputime_zero;
-	p->utimescaled = cputime_zero;
-	p->stimescaled = cputime_zero;
+	p->utime = p->stime = p->gtime = 0;
+	p->utimescaled = p->stimescaled = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-	p->prev_utime = cputime_zero;
-	p->prev_stime = cputime_zero;
+	p->prev_utime = p->prev_stime = 0;
 #endif
 #if defined(SPLIT_RSS_COUNTING)
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
diff --git a/kernel/itimer.c b/kernel/itimer.c
index d802883153da..22000c3db0dd 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -52,22 +52,22 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 
 	cval = it->expires;
 	cinterval = it->incr;
-	if (!cputime_eq(cval, cputime_zero)) {
+	if (cval) {
 		struct task_cputime cputime;
 		cputime_t t;
 
 		thread_group_cputimer(tsk, &cputime);
 		if (clock_id == CPUCLOCK_PROF)
-			t = cputime_add(cputime.utime, cputime.stime);
+			t = cputime.utime + cputime.stime;
 		else
 			/* CPUCLOCK_VIRT */
 			t = cputime.utime;
 
-		if (cputime_le(cval, t))
+		if (cval < t)
 			/* about to fire */
 			cval = cputime_one_jiffy;
 		else
-			cval = cputime_sub(cval, t);
+			cval = cval - t;
 	}
 
 	spin_unlock_irq(&tsk->sighand->siglock);
@@ -161,10 +161,9 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 
 	cval = it->expires;
 	cinterval = it->incr;
-	if (!cputime_eq(cval, cputime_zero) ||
-	    !cputime_eq(nval, cputime_zero)) {
-		if (cputime_gt(nval, cputime_zero))
-			nval = cputime_add(nval, cputime_one_jiffy);
+	if (cval || nval) {
+		if (nval > 0)
+			nval += cputime_one_jiffy;
 		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
 	}
 	it->expires = nval;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index e7cb76dc18f5..125cb67daa21 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -78,7 +78,7 @@ static inline int cpu_time_before(const clockid_t which_clock,
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 		return now.sched < then.sched;
 	}  else {
-		return cputime_lt(now.cpu, then.cpu);
+		return now.cpu < then.cpu;
 	}
 }
 static inline void cpu_time_add(const clockid_t which_clock,
@@ -88,7 +88,7 @@ static inline void cpu_time_add(const clockid_t which_clock,
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 		acc->sched += val.sched;
 	}  else {
-		acc->cpu = cputime_add(acc->cpu, val.cpu);
+		acc->cpu += val.cpu;
 	}
 }
 static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
@@ -98,24 +98,11 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 		a.sched -= b.sched;
 	}  else {
-		a.cpu = cputime_sub(a.cpu, b.cpu);
+		a.cpu -= b.cpu;
 	}
 	return a;
 }
 
-/*
- * Divide and limit the result to res >= 1
- *
- * This is necessary to prevent signal delivery starvation, when the result of
- * the division would be rounded down to 0.
- */
-static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
-{
-	cputime_t res = cputime_div(time, div);
-
-	return max_t(cputime_t, res, 1);
-}
-
 /*
  * Update expiry time from increment, and increase overrun count,
  * given the current clock sample.
@@ -148,28 +135,26 @@ static void bump_cpu_timer(struct k_itimer *timer,
 	} else {
 		cputime_t delta, incr;
 
-		if (cputime_lt(now.cpu, timer->it.cpu.expires.cpu))
+		if (now.cpu < timer->it.cpu.expires.cpu)
 			return;
 		incr = timer->it.cpu.incr.cpu;
-		delta = cputime_sub(cputime_add(now.cpu, incr),
-				    timer->it.cpu.expires.cpu);
+		delta = now.cpu + incr - timer->it.cpu.expires.cpu;
 		/* Don't use (incr*2 < delta), incr*2 might overflow. */
-		for (i = 0; cputime_lt(incr, cputime_sub(delta, incr)); i++)
-			     incr = cputime_add(incr, incr);
-		for (; i >= 0; incr = cputime_halve(incr), i--) {
-			if (cputime_lt(delta, incr))
+		for (i = 0; incr < delta - incr; i++)
+			     incr += incr;
+		for (; i >= 0; incr = incr >> 1, i--) {
+			if (delta < incr)
 				continue;
-			timer->it.cpu.expires.cpu =
-				cputime_add(timer->it.cpu.expires.cpu, incr);
+			timer->it.cpu.expires.cpu += incr;
 			timer->it_overrun += 1 << i;
-			delta = cputime_sub(delta, incr);
+			delta -= incr;
 		}
 	}
 }
 
 static inline cputime_t prof_ticks(struct task_struct *p)
 {
-	return cputime_add(p->utime, p->stime);
+	return p->utime + p->stime;
 }
 static inline cputime_t virt_ticks(struct task_struct *p)
 {
@@ -248,8 +233,8 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 
 	t = tsk;
 	do {
-		times->utime = cputime_add(times->utime, t->utime);
-		times->stime = cputime_add(times->stime, t->stime);
+		times->utime += t->utime;
+		times->stime += t->stime;
 		times->sum_exec_runtime += task_sched_runtime(t);
 	} while_each_thread(tsk, t);
 out:
@@ -258,10 +243,10 @@ out:
 
 static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
 {
-	if (cputime_gt(b->utime, a->utime))
+	if (b->utime > a->utime)
 		a->utime = b->utime;
 
-	if (cputime_gt(b->stime, a->stime))
+	if (b->stime > a->stime)
 		a->stime = b->stime;
 
 	if (b->sum_exec_runtime > a->sum_exec_runtime)
@@ -306,7 +291,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
 		return -EINVAL;
 	case CPUCLOCK_PROF:
 		thread_group_cputime(p, &cputime);
-		cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+		cpu->cpu = cputime.utime + cputime.stime;
 		break;
 	case CPUCLOCK_VIRT:
 		thread_group_cputime(p, &cputime);
@@ -470,26 +455,24 @@ static void cleanup_timers(struct list_head *head,
 			   unsigned long long sum_exec_runtime)
 {
 	struct cpu_timer_list *timer, *next;
-	cputime_t ptime = cputime_add(utime, stime);
+	cputime_t ptime = utime + stime;
 
 	list_for_each_entry_safe(timer, next, head, entry) {
 		list_del_init(&timer->entry);
-		if (cputime_lt(timer->expires.cpu, ptime)) {
-			timer->expires.cpu = cputime_zero;
+		if (timer->expires.cpu < ptime) {
+			timer->expires.cpu = 0;
 		} else {
-			timer->expires.cpu = cputime_sub(timer->expires.cpu,
-							 ptime);
+			timer->expires.cpu -= ptime;
 		}
 	}
 
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
 		list_del_init(&timer->entry);
-		if (cputime_lt(timer->expires.cpu, utime)) {
-			timer->expires.cpu = cputime_zero;
+		if (timer->expires.cpu < utime) {
+			timer->expires.cpu = 0;
 		} else {
-			timer->expires.cpu = cputime_sub(timer->expires.cpu,
-							 utime);
+			timer->expires.cpu -= utime;
 		}
 	}
 
@@ -520,8 +503,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
 	struct signal_struct *const sig = tsk->signal;
 
 	cleanup_timers(tsk->signal->cpu_timers,
-		       cputime_add(tsk->utime, sig->utime),
-		       cputime_add(tsk->stime, sig->stime),
+		       tsk->utime + sig->utime, tsk->stime + sig->stime,
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 }
 
@@ -540,8 +522,7 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
 
 static inline int expires_gt(cputime_t expires, cputime_t new_exp)
 {
-	return cputime_eq(expires, cputime_zero) ||
-	       cputime_gt(expires, new_exp);
+	return expires == 0 || expires > new_exp;
 }
 
 /*
@@ -651,7 +632,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
-		cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+		cpu->cpu = cputime.utime + cputime.stime;
 		break;
 	case CPUCLOCK_VIRT:
 		cpu->cpu = cputime.utime;
@@ -918,12 +899,12 @@ static void check_thread_timers(struct task_struct *tsk,
 	unsigned long soft;
 
 	maxfire = 20;
-	tsk->cputime_expires.prof_exp = cputime_zero;
+	tsk->cputime_expires.prof_exp = 0;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
+		if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
 			tsk->cputime_expires.prof_exp = t->expires.cpu;
 			break;
 		}
@@ -933,12 +914,12 @@ static void check_thread_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	tsk->cputime_expires.virt_exp = cputime_zero;
+	tsk->cputime_expires.virt_exp = 0;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
+		if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
 			tsk->cputime_expires.virt_exp = t->expires.cpu;
 			break;
 		}
@@ -1009,20 +990,19 @@ static u32 onecputick;
 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 			     cputime_t *expires, cputime_t cur_time, int signo)
 {
-	if (cputime_eq(it->expires, cputime_zero))
+	if (!it->expires)
 		return;
 
-	if (cputime_ge(cur_time, it->expires)) {
-		if (!cputime_eq(it->incr, cputime_zero)) {
-			it->expires = cputime_add(it->expires, it->incr);
+	if (cur_time >= it->expires) {
+		if (it->incr) {
+			it->expires += it->incr;
 			it->error += it->incr_error;
 			if (it->error >= onecputick) {
-				it->expires = cputime_sub(it->expires,
-							  cputime_one_jiffy);
+				it->expires -= cputime_one_jiffy;
 				it->error -= onecputick;
 			}
 		} else {
-			it->expires = cputime_zero;
+			it->expires = 0;
 		}
 
 		trace_itimer_expire(signo == SIGPROF ?
@@ -1031,9 +1011,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
 
-	if (!cputime_eq(it->expires, cputime_zero) &&
-	    (cputime_eq(*expires, cputime_zero) ||
-	     cputime_lt(it->expires, *expires))) {
+	if (it->expires && (!*expires || it->expires < *expires)) {
 		*expires = it->expires;
 	}
 }
@@ -1048,9 +1026,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
  */
 static inline int task_cputime_zero(const struct task_cputime *cputime)
 {
-	if (cputime_eq(cputime->utime, cputime_zero) &&
-	    cputime_eq(cputime->stime, cputime_zero) &&
-	    cputime->sum_exec_runtime == 0)
+	if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
 		return 1;
 	return 0;
 }
@@ -1076,15 +1052,15 @@ static void check_process_timers(struct task_struct *tsk,
 	 */
 	thread_group_cputimer(tsk, &cputime);
 	utime = cputime.utime;
-	ptime = cputime_add(utime, cputime.stime);
+	ptime = utime + cputime.stime;
 	sum_sched_runtime = cputime.sum_exec_runtime;
 	maxfire = 20;
-	prof_expires = cputime_zero;
+	prof_expires = 0;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *tl = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || cputime_lt(ptime, tl->expires.cpu)) {
+		if (!--maxfire || ptime < tl->expires.cpu) {
 			prof_expires = tl->expires.cpu;
 			break;
 		}
@@ -1094,12 +1070,12 @@ static void check_process_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	virt_expires = cputime_zero;
+	virt_expires = 0;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *tl = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || cputime_lt(utime, tl->expires.cpu)) {
+		if (!--maxfire || utime < tl->expires.cpu) {
 			virt_expires = tl->expires.cpu;
 			break;
 		}
@@ -1154,8 +1130,7 @@ static void check_process_timers(struct task_struct *tsk,
 			}
 		}
 		x = secs_to_cputime(soft);
-		if (cputime_eq(prof_expires, cputime_zero) ||
-		    cputime_lt(x, prof_expires)) {
+		if (!prof_expires || x < prof_expires) {
 			prof_expires = x;
 		}
 	}
@@ -1249,12 +1224,9 @@ out:
 static inline int task_cputime_expired(const struct task_cputime *sample,
 					const struct task_cputime *expires)
 {
-	if (!cputime_eq(expires->utime, cputime_zero) &&
-	    cputime_ge(sample->utime, expires->utime))
+	if (expires->utime && sample->utime >= expires->utime)
 		return 1;
-	if (!cputime_eq(expires->stime, cputime_zero) &&
-	    cputime_ge(cputime_add(sample->utime, sample->stime),
-		       expires->stime))
+	if (expires->stime && sample->utime + sample->stime >= expires->stime)
 		return 1;
 	if (expires->sum_exec_runtime != 0 &&
 	    sample->sum_exec_runtime >= expires->sum_exec_runtime)
@@ -1389,18 +1361,18 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 		 * it to be relative, *newval argument is relative and we update
 		 * it to be absolute.
 		 */
-		if (!cputime_eq(*oldval, cputime_zero)) {
-			if (cputime_le(*oldval, now.cpu)) {
+		if (*oldval) {
+			if (*oldval <= now.cpu) {
 				/* Just about to fire. */
 				*oldval = cputime_one_jiffy;
 			} else {
-				*oldval = cputime_sub(*oldval, now.cpu);
+				*oldval -= now.cpu;
 			}
 		}
 
-		if (cputime_eq(*newval, cputime_zero))
+		if (!*newval)
 			return;
-		*newval = cputime_add(*newval, now.cpu);
+		*newval += now.cpu;
 	}
 
 	/*
diff --git a/kernel/sched.c b/kernel/sched.c
index d6b149ccf925..18cad4467e61 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2166,7 +2166,7 @@ static int irqtime_account_hi_update(void)
 
 	local_irq_save(flags);
 	latest_ns = this_cpu_read(cpu_hardirq_time);
-	if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
+	if (nsecs_to_cputime64(latest_ns) > cpustat->irq)
 		ret = 1;
 	local_irq_restore(flags);
 	return ret;
@@ -2181,7 +2181,7 @@ static int irqtime_account_si_update(void)
 
 	local_irq_save(flags);
 	latest_ns = this_cpu_read(cpu_softirq_time);
-	if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
+	if (nsecs_to_cputime64(latest_ns) > cpustat->softirq)
 		ret = 1;
 	local_irq_restore(flags);
 	return ret;
@@ -3868,19 +3868,17 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 		       cputime_t cputime_scaled)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	cputime64_t tmp;
 
 	/* Add user time to process. */
-	p->utime = cputime_add(p->utime, cputime);
-	p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
+	p->utime += cputime;
+	p->utimescaled += cputime_scaled;
 	account_group_user_time(p, cputime);
 
 	/* Add user time to cpustat. */
-	tmp = cputime_to_cputime64(cputime);
 	if (TASK_NICE(p) > 0)
-		cpustat->nice = cputime64_add(cpustat->nice, tmp);
+		cpustat->nice += (__force cputime64_t) cputime;
 	else
-		cpustat->user = cputime64_add(cpustat->user, tmp);
+		cpustat->user += (__force cputime64_t) cputime;
 
 	cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
 	/* Account for user time used */
@@ -3896,24 +3894,21 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 static void account_guest_time(struct task_struct *p, cputime_t cputime,
 			       cputime_t cputime_scaled)
 {
-	cputime64_t tmp;
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 
-	tmp = cputime_to_cputime64(cputime);
-
 	/* Add guest time to process. */
-	p->utime = cputime_add(p->utime, cputime);
-	p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
+	p->utime += cputime;
+	p->utimescaled += cputime_scaled;
 	account_group_user_time(p, cputime);
-	p->gtime = cputime_add(p->gtime, cputime);
+	p->gtime += cputime;
 
 	/* Add guest time to cpustat. */
 	if (TASK_NICE(p) > 0) {
-		cpustat->nice = cputime64_add(cpustat->nice, tmp);
-		cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
+		cpustat->nice += (__force cputime64_t) cputime;
+		cpustat->guest_nice += (__force cputime64_t) cputime;
 	} else {
-		cpustat->user = cputime64_add(cpustat->user, tmp);
-		cpustat->guest = cputime64_add(cpustat->guest, tmp);
+		cpustat->user += (__force cputime64_t) cputime;
+		cpustat->guest += (__force cputime64_t) cputime;
 	}
 }
 
@@ -3928,15 +3923,13 @@ static inline
 void __account_system_time(struct task_struct *p, cputime_t cputime,
 			cputime_t cputime_scaled, cputime64_t *target_cputime64)
 {
-	cputime64_t tmp = cputime_to_cputime64(cputime);
-
 	/* Add system time to process. */
-	p->stime = cputime_add(p->stime, cputime);
-	p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
+	p->stime += cputime;
+	p->stimescaled += cputime_scaled;
 	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
-	*target_cputime64 = cputime64_add(*target_cputime64, tmp);
+	*target_cputime64 += (__force cputime64_t) cputime;
 	cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
 
 	/* Account for system time used */
@@ -3978,9 +3971,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 void account_steal_time(cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	cputime64_t cputime64 = cputime_to_cputime64(cputime);
 
-	cpustat->steal = cputime64_add(cpustat->steal, cputime64);
+	cpustat->steal += (__force cputime64_t) cputime;
 }
 
 /*
@@ -3990,13 +3982,12 @@ void account_steal_time(cputime_t cputime)
 void account_idle_time(cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	cputime64_t cputime64 = cputime_to_cputime64(cputime);
 	struct rq *rq = this_rq();
 
 	if (atomic_read(&rq->nr_iowait) > 0)
-		cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
+		cpustat->iowait += (__force cputime64_t) cputime;
 	else
-		cpustat->idle = cputime64_add(cpustat->idle, cputime64);
+		cpustat->idle += (__force cputime64_t) cputime;
 }
 
 static __always_inline bool steal_account_process_tick(void)
@@ -4046,16 +4037,15 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 						struct rq *rq)
 {
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
-	cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 
 	if (steal_account_process_tick())
 		return;
 
 	if (irqtime_account_hi_update()) {
-		cpustat->irq = cputime64_add(cpustat->irq, tmp);
+		cpustat->irq += (__force cputime64_t) cputime_one_jiffy;
 	} else if (irqtime_account_si_update()) {
-		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+		cpustat->softirq += (__force cputime64_t) cputime_one_jiffy;
 	} else if (this_cpu_ksoftirqd() == p) {
 		/*
 		 * ksoftirqd time do not get accounted in cpu_softirq_time.
@@ -4171,7 +4161,7 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 
 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
-	cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
+	cputime_t rtime, utime = p->utime, total = utime + p->stime;
 
 	/*
 	 * Use CFS's precise accounting:
@@ -4179,11 +4169,11 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
 
 	if (total) {
-		u64 temp = rtime;
+		u64 temp = (__force u64) rtime;
 
-		temp *= utime;
-		do_div(temp, total);
-		utime = (cputime_t)temp;
+		temp *= (__force u64) utime;
+		do_div(temp, (__force u32) total);
+		utime = (__force cputime_t) temp;
 	} else
 		utime = rtime;
 
@@ -4191,7 +4181,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	 * Compare with previous values, to keep monotonicity:
 	 */
 	p->prev_utime = max(p->prev_utime, utime);
-	p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
+	p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
 
 	*ut = p->prev_utime;
 	*st = p->prev_stime;
@@ -4208,21 +4198,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 
 	thread_group_cputime(p, &cputime);
 
-	total = cputime_add(cputime.utime, cputime.stime);
+	total = cputime.utime + cputime.stime;
 	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
 
 	if (total) {
-		u64 temp = rtime;
+		u64 temp = (__force u64) rtime;
 
-		temp *= cputime.utime;
-		do_div(temp, total);
-		utime = (cputime_t)temp;
+		temp *= (__force u64) cputime.utime;
+		do_div(temp, (__force u32) total);
+		utime = (__force cputime_t) temp;
 	} else
 		utime = rtime;
 
 	sig->prev_utime = max(sig->prev_utime, utime);
-	sig->prev_stime = max(sig->prev_stime,
-			      cputime_sub(rtime, sig->prev_utime));
+	sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
 
 	*ut = sig->prev_utime;
 	*st = sig->prev_stime;
@@ -9769,7 +9758,8 @@ static void cpuacct_update_stats(struct task_struct *tsk,
 	ca = task_ca(tsk);
 
 	do {
-		__percpu_counter_add(&ca->cpustat[idx], val, batch);
+		__percpu_counter_add(&ca->cpustat[idx],
+				     (__force s64) val, batch);
 		ca = ca->parent;
 	} while (ca);
 	rcu_read_unlock();
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 87f9e36ea56e..4b71dbef271d 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -283,8 +283,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
 		return;
 
 	raw_spin_lock(&cputimer->lock);
-	cputimer->cputime.utime =
-		cputime_add(cputimer->cputime.utime, cputime);
+	cputimer->cputime.utime += cputime;
 	raw_spin_unlock(&cputimer->lock);
 }
 
@@ -307,8 +306,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
 		return;
 
 	raw_spin_lock(&cputimer->lock);
-	cputimer->cputime.stime =
-		cputime_add(cputimer->cputime.stime, cputime);
+	cputimer->cputime.stime += cputime;
 	raw_spin_unlock(&cputimer->lock);
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index b3f78d09a105..739ef2bf105c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1629,10 +1629,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 	info.si_uid = __task_cred(tsk)->uid;
 	rcu_read_unlock();
 
-	info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
-				tsk->signal->utime));
-	info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
-				tsk->signal->stime));
+	info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime);
+	info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime);
 
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
diff --git a/kernel/sys.c b/kernel/sys.c
index 481611fbd079..ddf8155bf3f8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1605,7 +1605,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	unsigned long maxrss = 0;
 
 	memset((char *) r, 0, sizeof *r);
-	utime = stime = cputime_zero;
+	utime = stime = 0;
 
 	if (who == RUSAGE_THREAD) {
 		task_times(current, &utime, &stime);
@@ -1635,8 +1635,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 
 		case RUSAGE_SELF:
 			thread_group_times(p, &tgutime, &tgstime);
-			utime = cputime_add(utime, tgutime);
-			stime = cputime_add(stime, tgstime);
+			utime += tgutime;
+			stime += tgstime;
 			r->ru_nvcsw += p->signal->nvcsw;
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_minflt += p->signal->min_flt;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 5bbfac85866e..23b4d784ebdd 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -127,7 +127,7 @@ void acct_update_integrals(struct task_struct *tsk)
 
 		local_irq_save(flags);
 		time = tsk->stime + tsk->utime;
-		dtime = cputime_sub(time, tsk->acct_timexpd);
+		dtime = time - tsk->acct_timexpd;
 		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
 		delta = value.tv_sec;
 		delta = delta * USEC_PER_SEC + value.tv_usec;
-- 
cgit v1.2.3


From 888bdaa9b2c426dcca214e6efd388080938082cb Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Wed, 14 Dec 2011 23:34:31 +0000
Subject: Move limit definitions outside CONFIG_INET

They need to be available for other protocols as well, since
they are used in sock.c openly

Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
CC: David S. Miller <davem@davemloft.net>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1513994ce207..9b296ea41bb8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -384,13 +384,13 @@ mem_cgroup_print_bad_page(struct page *page)
 }
 #endif
 
-#ifdef CONFIG_INET
 enum {
 	UNDER_LIMIT,
 	SOFT_LIMIT,
 	OVER_LIMIT,
 };
 
+#ifdef CONFIG_INET
 struct sock;
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
 void sock_update_memcg(struct sock *sk);
-- 
cgit v1.2.3


From bdd90d5e36a55271beb957b3d7ca3e29b2a90207 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 14 Dec 2011 12:20:27 +0100
Subject: cfg80211: validate nl80211 station handling better

The nl80211 station handling code is a bit messy
and doesn't do a lot of validation. It seems like
this could be an issue for drivers that don't use
mac80211 to validate everything.

As cfg80211 doesn't keep station state, move the
validation of allowing supported_rates to change
for TDLS only in station mode to mac80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |   6 +-
 include/net/cfg80211.h  |   7 +-
 net/mac80211/cfg.c      |   8 ++
 net/wireless/nl80211.c  | 199 +++++++++++++++++++++++++++---------------------
 4 files changed, 131 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index a18760684fc9..f795cb7dccdd 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1536,7 +1536,11 @@ enum nl80211_iftype {
  * @NL80211_STA_FLAG_WME: station is WME/QoS capable
  * @NL80211_STA_FLAG_MFP: station uses management frame protection
  * @NL80211_STA_FLAG_AUTHENTICATED: station is authenticated
- * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer
+ * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer -- this flag should
+ *	only be used in managed mode (even in the flags mask). Note that the
+ *	flag can't be changed, it is only valid while adding a station, and
+ *	attempts to change it will silently be ignored (rather than rejected
+ *	as errors.)
  * @NL80211_STA_FLAG_MAX: highest station flag number currently defined
  * @__NL80211_STA_FLAG_AFTER_LAST: internal use
  */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 150c0ee714c2..5eda5933ae01 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1346,7 +1346,12 @@ struct cfg80211_gtk_rekey_data {
  *
  * @add_station: Add a new station.
  * @del_station: Remove a station; @mac may be NULL to remove all stations.
- * @change_station: Modify a given station.
+ * @change_station: Modify a given station. Note that flags changes are not much
+ *	validated in cfg80211, in particular the auth/assoc/authorized flags
+ *	might come to the driver in invalid combinations -- make sure to check
+ *	them, also against the existing state! Also, supported_rates changes are
+ *	not checked in station mode -- drivers need to reject (or ignore) them
+ *	for anything but TDLS peers.
  * @get_station: get station information for the station identified by @mac
  * @dump_station: dump station callback -- resume dump at index @idx
  *
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 393b2a4445b8..944051b43bad 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -976,6 +976,14 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
+	/* in station mode, supported rates are only valid with TDLS */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    params->supported_rates &&
+	    !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
 	if (params->vlan && params->vlan != sta->sdata->dev) {
 		vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d86428145c32..b07c4fc4ae22 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2579,6 +2579,9 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.ht_capa =
 			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
 
+	if (!rdev->ops->change_station)
+		return -EOPNOTSUPP;
+
 	if (parse_station_flags(info, &params))
 		return -EINVAL;
 
@@ -2590,73 +2593,84 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.plink_state =
 		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
 
-	params.vlan = get_vlan(info, rdev);
-	if (IS_ERR(params.vlan))
-		return PTR_ERR(params.vlan);
-
-	/* validate settings */
-	err = 0;
-
 	switch (dev->ieee80211_ptr->iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_P2P_GO:
 		/* disallow mesh-specific things */
 		if (params.plink_action)
-			err = -EINVAL;
+			return -EINVAL;
+
+		/* TDLS can't be set, ... */
+		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
+			return -EINVAL;
+		/*
+		 * ... but don't bother the driver with it. This works around
+		 * a hostapd/wpa_supplicant issue -- it always includes the
+		 * TLDS_PEER flag in the mask even for AP mode.
+		 */
+		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+
+		/* accept only the listed bits */
+		if (params.sta_flags_mask &
+				~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
+				  BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
+				  BIT(NL80211_STA_FLAG_WME) |
+				  BIT(NL80211_STA_FLAG_MFP)))
+			return -EINVAL;
+
+		/* must be last in here for error handling */
+		params.vlan = get_vlan(info, rdev);
+		if (IS_ERR(params.vlan))
+			return PTR_ERR(params.vlan);
 		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
 		/* disallow things sta doesn't support */
 		if (params.plink_action)
-			err = -EINVAL;
-		if (params.vlan)
-			err = -EINVAL;
-		if (params.supported_rates &&
-		    !(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
-			err = -EINVAL;
+			return -EINVAL;
 		if (params.ht_capa)
-			err = -EINVAL;
+			return -EINVAL;
 		if (params.listen_interval >= 0)
-			err = -EINVAL;
-		if (params.sta_flags_mask &
-				~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
-				  BIT(NL80211_STA_FLAG_TDLS_PEER)))
-			err = -EINVAL;
-		/* can't change the TDLS bit */
-		if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
-		    (params.sta_flags_mask & BIT(NL80211_STA_FLAG_TDLS_PEER)))
-			err = -EINVAL;
+			return -EINVAL;
+		/*
+		 * Don't allow userspace to change the TDLS_PEER flag,
+		 * but silently ignore attempts to change it since we
+		 * don't have state here to verify that it doesn't try
+		 * to change the flag.
+		 */
+		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+
+		/* reject any changes other than AUTHORIZED */
+		if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
+			return -EINVAL;
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
 		/* disallow things mesh doesn't support */
 		if (params.vlan)
-			err = -EINVAL;
+			return -EINVAL;
 		if (params.ht_capa)
-			err = -EINVAL;
+			return -EINVAL;
 		if (params.listen_interval >= 0)
-			err = -EINVAL;
+			return -EINVAL;
+		/*
+		 * No special handling for TDLS here -- the userspace
+		 * mesh code doesn't have this bug.
+		 */
 		if (params.sta_flags_mask &
 				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
 				  BIT(NL80211_STA_FLAG_MFP) |
 				  BIT(NL80211_STA_FLAG_AUTHORIZED)))
-			err = -EINVAL;
+			return -EINVAL;
 		break;
 	default:
-		err = -EINVAL;
+		return -EOPNOTSUPP;
 	}
 
-	if (err)
-		goto out;
-
-	if (!rdev->ops->change_station) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
+	/* be aware of params.vlan when changing code here */
 
 	err = rdev->ops->change_station(&rdev->wiphy, dev, mac_addr, &params);
 
- out:
 	if (params.vlan)
 		dev_put(params.vlan);
 
@@ -2711,70 +2725,81 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.plink_action =
 		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
 
+	if (!rdev->ops->add_station)
+		return -EOPNOTSUPP;
+
 	if (parse_station_flags(info, &params))
 		return -EINVAL;
 
-	/* parse WME attributes if sta is WME capable */
-	if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
-	    (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) &&
-	    info->attrs[NL80211_ATTR_STA_WME]) {
-		struct nlattr *tb[NL80211_STA_WME_MAX + 1];
-		struct nlattr *nla;
+	switch (dev->ieee80211_ptr->iftype) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_P2P_GO:
+		/* parse WME attributes if sta is WME capable */
+		if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
+		    (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) &&
+		    info->attrs[NL80211_ATTR_STA_WME]) {
+			struct nlattr *tb[NL80211_STA_WME_MAX + 1];
+			struct nlattr *nla;
+
+			nla = info->attrs[NL80211_ATTR_STA_WME];
+			err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
+					       nl80211_sta_wme_policy);
+			if (err)
+				return err;
 
-		nla = info->attrs[NL80211_ATTR_STA_WME];
-		err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
-				       nl80211_sta_wme_policy);
-		if (err)
-			return err;
+			if (tb[NL80211_STA_WME_UAPSD_QUEUES])
+				params.uapsd_queues =
+				     nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]);
+			if (params.uapsd_queues &
+					~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
+				return -EINVAL;
 
-		if (tb[NL80211_STA_WME_UAPSD_QUEUES])
-			params.uapsd_queues =
-			     nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]);
-		if (params.uapsd_queues & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
-			return -EINVAL;
+			if (tb[NL80211_STA_WME_MAX_SP])
+				params.max_sp =
+				     nla_get_u8(tb[NL80211_STA_WME_MAX_SP]);
 
-		if (tb[NL80211_STA_WME_MAX_SP])
-			params.max_sp =
-			     nla_get_u8(tb[NL80211_STA_WME_MAX_SP]);
+			if (params.max_sp &
+					~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
+				return -EINVAL;
 
-		if (params.max_sp & ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
+			params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD;
+		}
+		/* TDLS peers cannot be added */
+		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
 			return -EINVAL;
+		/* but don't bother the driver with it */
+		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
 
-		params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD;
+		/* must be last in here for error handling */
+		params.vlan = get_vlan(info, rdev);
+		if (IS_ERR(params.vlan))
+			return PTR_ERR(params.vlan);
+		break;
+	case NL80211_IFTYPE_MESH_POINT:
+		/* TDLS peers cannot be added */
+		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
+			return -EINVAL;
+		break;
+	case NL80211_IFTYPE_STATION:
+		/* Only TDLS peers can be added */
+		if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
+			return -EINVAL;
+		/* Can only add if TDLS ... */
+		if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS))
+			return -EOPNOTSUPP;
+		/* ... with external setup is supported */
+		if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP))
+			return -EOPNOTSUPP;
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION)
-		return -EINVAL;
-
-	/*
-	 * Only managed stations can add TDLS peers, and only when the
-	 * wiphy supports external TDLS setup.
-	 */
-	if (dev->ieee80211_ptr->iftype == NL80211_IFTYPE_STATION &&
-	    !((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
-	      (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
-	      (rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)))
-		return -EINVAL;
-
-	params.vlan = get_vlan(info, rdev);
-	if (IS_ERR(params.vlan))
-		return PTR_ERR(params.vlan);
-
-	/* validate settings */
-	err = 0;
-
-	if (!rdev->ops->add_station) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
+	/* be aware of params.vlan when changing code here */
 
 	err = rdev->ops->add_station(&rdev->wiphy, dev, mac_addr, &params);
 
- out:
 	if (params.vlan)
 		dev_put(params.vlan);
 	return err;
-- 
cgit v1.2.3


From 8bc1f85c02a20a59956b00b3acea12c04dce9ae8 Mon Sep 17 00:00:00 2001
From: Eugeni Dodonov <eugeni.dodonov@intel.com>
Date: Wed, 23 Nov 2011 16:42:14 -0200
Subject: iommu: Export intel_iommu_enabled to signal when iommu is in use

In i915 driver, we do not enable either rc6 or semaphores on SNB when dmar
is enabled. The new 'intel_iommu_enabled' variable signals when the
iommu code is in operation.

Cc: Ted Phelps <phelps@gnusto.com>
Cc: Peter <pab1612@gmail.com>
Cc: Lukas Hejtmanek <xhejtman@fi.muni.cz>
Cc: Andrew Lutomirski <luto@mit.edu>
CC: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-off-by: Keith Packard <keithp@keithp.com>
---
 drivers/iommu/intel-iommu.c   | 5 +++++
 include/linux/dma_remapping.h | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c0c7820d4c46..8dc19b8f5d45 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -405,6 +405,9 @@ int dmar_disabled = 0;
 int dmar_disabled = 1;
 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
 
+int intel_iommu_enabled = 0;
+EXPORT_SYMBOL_GPL(intel_iommu_enabled);
+
 static int dmar_map_gfx = 1;
 static int dmar_forcedac;
 static int intel_iommu_strict;
@@ -3647,6 +3650,8 @@ int __init intel_iommu_init(void)
 
 	bus_register_notifier(&pci_bus_type, &device_nb);
 
+	intel_iommu_enabled = 1;
+
 	return 0;
 }
 
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index ef90cbd8e173..57c9a8ae4f2d 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -31,6 +31,7 @@ extern void free_dmar_iommu(struct intel_iommu *iommu);
 extern int iommu_calculate_agaw(struct intel_iommu *iommu);
 extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
 extern int dmar_disabled;
+extern int intel_iommu_enabled;
 #else
 static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
 {
@@ -44,6 +45,7 @@ static inline void free_dmar_iommu(struct intel_iommu *iommu)
 {
 }
 #define dmar_disabled	(1)
+#define intel_iommu_enabled (0)
 #endif
 
 
-- 
cgit v1.2.3


From e7c466e58eb1ff9bf49c2f3902622dc11a8c7022 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:42:42 +0000
Subject: sock_diag: Move the SOCK_DIAG_BY_FAMILY cmd declaration

It should belong to sock_diag, not inet_diag.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 1 -
 include/linux/sock_diag.h | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 78972a149dff..a27e62178d43 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -6,7 +6,6 @@
 /* Just some random number */
 #define TCPDIAG_GETSOCK 18
 #define DCCPDIAG_GETSOCK 19
-#define SOCK_DIAG_BY_FAMILY 20
 
 #define INET_DIAG_GETSOCK_MAX 24
 
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index ba4933b1213b..7999778ad08d 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -1,5 +1,8 @@
 #ifndef __SOCK_DIAG_H__
 #define __SOCK_DIAG_H__
+
+#define SOCK_DIAG_BY_FAMILY 20
+
 struct sk_buff;
 struct nlmsghdr;
 
-- 
cgit v1.2.3


From f65c1b534b99aef1809b893387b295963821549f Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:43:44 +0000
Subject: sock_diag: Generalize requests cookies managements

The sk address is used as a cookie between dump/get_exact calls.
It will be required for unix socket sdumping, so move it from
inet_diag to sock_diag.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  1 -
 include/linux/sock_diag.h |  3 +++
 net/core/sock_diag.c      | 19 +++++++++++++++++++
 net/ipv4/inet_diag.c      | 23 ++++-------------------
 net/ipv4/udp_diag.c       |  2 +-
 5 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index a27e62178d43..afa5d5c74169 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -168,7 +168,6 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 		struct inet_diag_req *req);
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
-int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 7999778ad08d..379d5dccf8e1 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -22,5 +22,8 @@ void sock_diag_unregister(struct sock_diag_handler *h);
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
+int sock_diag_check_cookie(void *sk, __u32 *cookie);
+void sock_diag_save_cookie(void *sk, __u32 *cookie);
+
 extern struct sock *sock_diag_nlsk;
 #endif
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index cee96f368108..711bdefe7753 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -12,6 +12,25 @@ static struct sock_diag_handler *sock_diag_handlers[AF_MAX];
 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
 
+int sock_diag_check_cookie(void *sk, __u32 *cookie)
+{
+	if ((cookie[0] != INET_DIAG_NOCOOKIE ||
+	     cookie[1] != INET_DIAG_NOCOOKIE) &&
+	    ((u32)(unsigned long)sk != cookie[0] ||
+	     (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1]))
+		return -ESTALE;
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
+
+void sock_diag_save_cookie(void *sk, __u32 *cookie)
+{
+	cookie[0] = (u32)(unsigned long)sk;
+	cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+}
+EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
+
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
 {
 	mutex_lock(&sock_diag_table_mutex);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fa27313765f3..fb2e47ff59f7 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -102,8 +102,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 	r->idiag_retrans = 0;
 
 	r->id.idiag_if = sk->sk_bound_dev_if;
-	r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
-	r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+	sock_diag_save_cookie(sk, r->id.idiag_cookie);
 
 	r->id.idiag_sport = inet->inet_sport;
 	r->id.idiag_dport = inet->inet_dport;
@@ -221,8 +220,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	r->idiag_family	      = tw->tw_family;
 	r->idiag_retrans      = 0;
 	r->id.idiag_if	      = tw->tw_bound_dev_if;
-	r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
-	r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
+	sock_diag_save_cookie(tw, r->id.idiag_cookie);
 	r->id.idiag_sport     = tw->tw_sport;
 	r->id.idiag_dport     = tw->tw_dport;
 	r->id.idiag_src[0]    = tw->tw_rcv_saddr;
@@ -261,18 +259,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 	return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh);
 }
 
-int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req)
-{
-	if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
-	     req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
-	    ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
-	     (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
-		return -ESTALE;
-	else
-		return 0;
-}
-EXPORT_SYMBOL_GPL(inet_diag_check_cookie);
-
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
 		const struct nlmsghdr *nlh, struct inet_diag_req *req)
 {
@@ -304,7 +290,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
 	if (sk == NULL)
 		goto out_nosk;
 
-	err = inet_diag_check_cookie(sk, req);
+	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
 	if (err)
 		goto out;
 
@@ -617,8 +603,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	r->idiag_retrans = req->retrans;
 
 	r->id.idiag_if = sk->sk_bound_dev_if;
-	r->id.idiag_cookie[0] = (u32)(unsigned long)req;
-	r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
+	sock_diag_save_cookie(req, r->id.idiag_cookie);
 
 	tmo = req->expires - jiffies;
 	if (tmo < 0)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index fe9db8675acb..69f8a7ca63dd 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -57,7 +57,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 	if (sk == NULL)
 		goto out_nosk;
 
-	err = inet_diag_check_cookie(sk, req);
+	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
 	if (err)
 		goto out;
 
-- 
cgit v1.2.3


From 22931d3b906cd0a1726a49a09713f9220a5fab8a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:44:35 +0000
Subject: unix_diag: Basic module skeleton

Includes basic module_init/_exit functionality, dump/get_exact stubs
and declares the basic API structures for request and response.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h | 24 ++++++++++++++++++++
 net/unix/diag.c           | 57 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 include/linux/unix_diag.h
 create mode 100644 net/unix/diag.c

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
new file mode 100644
index 000000000000..445184a85763
--- /dev/null
+++ b/include/linux/unix_diag.h
@@ -0,0 +1,24 @@
+#ifndef __UNIX_DIAG_H__
+#define __UNIX_DIAG_H__
+
+struct unix_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+	__u16	pad;
+	__u32	udiag_states;
+	__u32	udiag_ino;
+	__u32	udiag_show;
+	__u32	udiag_cookie[2];
+};
+
+struct unix_diag_msg {
+	__u8	udiag_family;
+	__u8	udiag_type;
+	__u8	udiag_state;
+	__u8	pad;
+
+	__u32	udiag_ino;
+	__u32	udiag_cookie[2];
+};
+
+#endif
diff --git a/net/unix/diag.c b/net/unix/diag.c
new file mode 100644
index 000000000000..6be16c0ad38f
--- /dev/null
+++ b/net/unix/diag.c
@@ -0,0 +1,57 @@
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/sock_diag.h>
+#include <linux/unix_diag.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/af_unix.h>
+#include <net/tcp_states.h>
+
+#define UNIX_DIAG_PUT(skb, attrtype, attrlen) \
+	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
+
+static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return 0;
+}
+
+static int unix_diag_get_exact(struct sk_buff *in_skb,
+			       const struct nlmsghdr *nlh,
+			       struct unix_diag_req *req)
+{
+	return -EAFNOSUPPORT;
+}
+
+static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct unix_diag_req);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP)
+		return netlink_dump_start(sock_diag_nlsk, skb, h,
+					  unix_diag_dump, NULL, 0);
+	else
+		return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h));
+}
+
+static struct sock_diag_handler unix_diag_handler = {
+	.family = AF_UNIX,
+	.dump = unix_diag_handler_dump,
+};
+
+static int __init unix_diag_init(void)
+{
+	return sock_diag_register(&unix_diag_handler);
+}
+
+static void __exit unix_diag_exit(void)
+{
+	sock_diag_unregister(&unix_diag_handler);
+}
+
+module_init(unix_diag_init);
+module_exit(unix_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */);
-- 
cgit v1.2.3


From f5248b48a64c221dd6157ab9cbee5a36ee45e6ed Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:45:24 +0000
Subject: unix_diag: Unix socket name NLA

Report the sun_path when requested as NLA. With leading '\0' if
present but without the leading AF_UNIX bits.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  8 ++++++++
 net/unix/diag.c           | 20 ++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 445184a85763..cc4df34d4c14 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -11,6 +11,8 @@ struct unix_diag_req {
 	__u32	udiag_cookie[2];
 };
 
+#define UDIAG_SHOW_NAME		0x00000001	/* show name (not path) */
+
 struct unix_diag_msg {
 	__u8	udiag_family;
 	__u8	udiag_type;
@@ -21,4 +23,10 @@ struct unix_diag_msg {
 	__u32	udiag_cookie[2];
 };
 
+enum {
+	UNIX_DIAG_NAME,
+
+	UNIX_DIAG_MAX,
+};
+
 #endif
diff --git a/net/unix/diag.c b/net/unix/diag.c
index d7bd48c49ee5..161ce6c05e31 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -10,6 +10,22 @@
 #define UNIX_DIAG_PUT(skb, attrtype, attrlen) \
 	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
 
+static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct unix_address *addr = unix_sk(sk)->addr;
+	char *s;
+
+	if (addr) {
+		s = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short));
+		memcpy(s, addr->name->sun_path, addr->len - sizeof(short));
+	}
+
+	return 0;
+
+rtattr_failure:
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -28,6 +44,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	rep->udiag_ino = sk_ino;
 	sock_diag_save_cookie(sk, rep->udiag_cookie);
 
+	if ((req->udiag_show & UDIAG_SHOW_NAME) &&
+			sk_diag_dump_name(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From 5f7b0569460b7d8d01ca776430a00505a68b7584 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:45:43 +0000
Subject: unix_diag: Unix inode info NLA

Actually, the socket path if it's not anonymous doesn't give
a clue to which file the socket is bound to. Even if the path
is absolute, it can be unlinked and then new socket can be
bound to it.

With this NLA it's possible to check which file a particular
socket is really bound to.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  7 +++++++
 net/unix/diag.c           | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index cc4df34d4c14..3e53adbe9c7f 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -12,6 +12,7 @@ struct unix_diag_req {
 };
 
 #define UDIAG_SHOW_NAME		0x00000001	/* show name (not path) */
+#define UDIAG_SHOW_VFS		0x00000002	/* show VFS inode info */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -25,8 +26,14 @@ struct unix_diag_msg {
 
 enum {
 	UNIX_DIAG_NAME,
+	UNIX_DIAG_VFS,
 
 	UNIX_DIAG_MAX,
 };
 
+struct unix_diag_vfs {
+	__u32	udiag_vfs_ino;
+	__u32	udiag_vfs_dev;
+};
+
 #endif
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 161ce6c05e31..83799ef19b49 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -26,6 +26,23 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
+static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct dentry *dentry = unix_sk(sk)->dentry;
+	struct unix_diag_vfs *uv;
+
+	if (dentry) {
+		uv = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_VFS, sizeof(*uv));
+		uv->udiag_vfs_ino = dentry->d_inode->i_ino;
+		uv->udiag_vfs_dev = dentry->d_sb->s_dev;
+	}
+
+	return 0;
+
+rtattr_failure:
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -48,6 +65,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 			sk_diag_dump_name(sk, skb))
 		goto nlmsg_failure;
 
+	if ((req->udiag_show & UDIAG_SHOW_VFS) &&
+			sk_diag_dump_vfs(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From ac02be8d96af9f66a4de86781ee9facc2dff99d4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:45:58 +0000
Subject: unix_diag: Unix peer inode NLA

Report the peer socket inode ID as NLA. With this it's finally
possible to find out the other end of an interesting unix connection.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  2 ++
 net/unix/diag.c           | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 3e53adbe9c7f..2d74a86024ac 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -13,6 +13,7 @@ struct unix_diag_req {
 
 #define UDIAG_SHOW_NAME		0x00000001	/* show name (not path) */
 #define UDIAG_SHOW_VFS		0x00000002	/* show VFS inode info */
+#define UDIAG_SHOW_PEER		0x00000004	/* show peer socket info */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -27,6 +28,7 @@ struct unix_diag_msg {
 enum {
 	UNIX_DIAG_NAME,
 	UNIX_DIAG_VFS,
+	UNIX_DIAG_PEER,
 
 	UNIX_DIAG_MAX,
 };
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 83799ef19b49..0e0fda786afe 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -43,6 +43,26 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
+static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct sock *peer;
+	int ino;
+
+	peer = unix_peer_get(sk);
+	if (peer) {
+		unix_state_lock(peer);
+		ino = sock_i_ino(peer);
+		unix_state_unlock(peer);
+		sock_put(peer);
+
+		RTA_PUT_U32(nlskb, UNIX_DIAG_PEER, ino);
+	}
+
+	return 0;
+rtattr_failure:
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -69,6 +89,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 			sk_diag_dump_vfs(sk, skb))
 		goto nlmsg_failure;
 
+	if ((req->udiag_show & UDIAG_SHOW_PEER) &&
+			sk_diag_dump_peer(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From 2aac7a2cb0d9d8c65fc7dde3e19e46b3e878d23d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:46:14 +0000
Subject: unix_diag: Pending connections IDs NLA

When establishing a unix connection on stream sockets the
server end receives an skb with socket in its receive queue.

Report who is waiting for these ends to be accepted for
listening sockets via NLA.

There's a lokcing issue with this -- the unix sk state lock is
required to access the peer, and it is taken under the listening
sk's queue lock. Strictly speaking the queue lock should be taken
inside the state lock, but since in this case these two sockets
are different it shouldn't lead to deadlock.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  2 ++
 net/unix/diag.c           | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 2d74a86024ac..03ffb7de15b6 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -14,6 +14,7 @@ struct unix_diag_req {
 #define UDIAG_SHOW_NAME		0x00000001	/* show name (not path) */
 #define UDIAG_SHOW_VFS		0x00000002	/* show VFS inode info */
 #define UDIAG_SHOW_PEER		0x00000004	/* show peer socket info */
+#define UDIAG_SHOW_ICONS	0x00000008	/* show pending connections */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -29,6 +30,7 @@ enum {
 	UNIX_DIAG_NAME,
 	UNIX_DIAG_VFS,
 	UNIX_DIAG_PEER,
+	UNIX_DIAG_ICONS,
 
 	UNIX_DIAG_MAX,
 };
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 0e0fda786afe..24c7a65d9cb1 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -63,6 +63,41 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
+static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct sk_buff *skb;
+	u32 *buf;
+	int i;
+
+	if (sk->sk_state == TCP_LISTEN) {
+		spin_lock(&sk->sk_receive_queue.lock);
+		buf = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_ICONS, sk->sk_receive_queue.qlen);
+		i = 0;
+		skb_queue_walk(&sk->sk_receive_queue, skb) {
+			struct sock *req, *peer;
+
+			req = skb->sk;
+			/*
+			 * The state lock is outer for the same sk's
+			 * queue lock. With the other's queue locked it's
+			 * OK to lock the state.
+			 */
+			unix_state_lock_nested(req);
+			peer = unix_sk(req)->peer;
+			if (peer)
+				buf[i++] = sock_i_ino(peer);
+			unix_state_unlock(req);
+		}
+		spin_unlock(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+
+rtattr_failure:
+	spin_unlock(&sk->sk_receive_queue.lock);
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -93,6 +128,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 			sk_diag_dump_peer(sk, skb))
 		goto nlmsg_failure;
 
+	if ((req->udiag_show & UDIAG_SHOW_ICONS) &&
+			sk_diag_dump_icons(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From cbf391958afb9b82c72324a15891eb3102200085 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:46:31 +0000
Subject: unix_diag: Receive queue lenght NLA

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  2 ++
 net/unix/diag.c           | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 03ffb7de15b6..3f7afb007d70 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -15,6 +15,7 @@ struct unix_diag_req {
 #define UDIAG_SHOW_VFS		0x00000002	/* show VFS inode info */
 #define UDIAG_SHOW_PEER		0x00000004	/* show peer socket info */
 #define UDIAG_SHOW_ICONS	0x00000008	/* show pending connections */
+#define UDIAG_SHOW_RQLEN	0x00000010	/* show skb receive queue len */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -31,6 +32,7 @@ enum {
 	UNIX_DIAG_VFS,
 	UNIX_DIAG_PEER,
 	UNIX_DIAG_ICONS,
+	UNIX_DIAG_RQLEN,
 
 	UNIX_DIAG_MAX,
 };
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 24c7a65d9cb1..a5c4aab0380d 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -98,6 +98,15 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
+static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
+{
+	RTA_PUT_U32(nlskb, UNIX_DIAG_RQLEN, sk->sk_receive_queue.qlen);
+	return 0;
+
+rtattr_failure:
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -132,6 +141,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 			sk_diag_dump_icons(sk, skb))
 		goto nlmsg_failure;
 
+	if ((req->udiag_show & UDIAG_SHOW_RQLEN) &&
+			sk_diag_show_rqlen(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From 14596f7006297b67516e2b6a2b26bcb11fe08fb3 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 15 Dec 2011 13:51:16 +0000
Subject: ethtool: Clarify use of size field for ETHTOOL_GRXFHINDIR

In order to find out the device's RX flow hash table size, ethtool
initially uses ETHTOOL_GRXFHINDIR with a buffer size of zero.  This
must be supported, but it is not necessary to support any other user
buffer size less than the device table size.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 20db5b275c3f..0ec2fd412d03 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -543,8 +543,9 @@ struct compat_ethtool_rxnfc {
 /**
  * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection
  * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR
- * @size: On entry, the array size of the user buffer.  On return from
- *	%ETHTOOL_GRXFHINDIR, the array size of the hardware indirection table.
+ * @size: On entry, the array size of the user buffer, which may be zero
+ *	for %ETHTOOL_GRXFHINDIR.  On return from %ETHTOOL_GRXFHINDIR, the
+ *	array size of the hardware indirection table.
  * @ring_index: RX ring/queue index for each hash value
  */
 struct ethtool_rxfh_indir {
-- 
cgit v1.2.3


From 7850f63f1620512631445b901ae11cd149e7375c Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 15 Dec 2011 13:55:01 +0000
Subject: ethtool: Centralise validation of ETHTOOL_{G, S}RXFHINDIR parameters

Add a new ethtool operation (get_rxfh_indir_size) to get the
indirectional table size.  Use this to validate the user buffer size
before calling get_rxfh_indir or set_rxfh_indir.  Use get_rxnfc to get
the number of RX rings, and validate the contents of the new
indirection table before calling set_rxfh_indir.  Remove this
validation from drivers.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Dimitris Michailidis <dm@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c    | 39 ++++-------
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 27 ++++----
 drivers/net/ethernet/sfc/ethtool.c                 | 35 ++++------
 drivers/net/vmxnet3/vmxnet3_ethtool.c              | 35 ++++------
 include/linux/ethtool.h                            | 11 +--
 net/core/ethtool.c                                 | 81 +++++++++++++++-------
 6 files changed, 117 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 90d44af85600..a688b9d975a2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -2302,18 +2302,20 @@ static int bnx2x_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 	}
 }
 
-static int bnx2x_get_rxfh_indir(struct net_device *dev,
-				struct ethtool_rxfh_indir *indir)
+static u32 bnx2x_get_rxfh_indir_size(struct net_device *dev)
+{
+	struct bnx2x *bp = netdev_priv(dev);
+
+	return (bp->multi_mode == ETH_RSS_MODE_DISABLED ?
+		0 : T_ETH_INDIRECTION_TABLE_SIZE);
+}
+
+static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir)
 {
 	struct bnx2x *bp = netdev_priv(dev);
-	size_t copy_size =
-		min_t(size_t, indir->size, T_ETH_INDIRECTION_TABLE_SIZE);
 	u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
 	size_t i;
 
-	if (bp->multi_mode == ETH_RSS_MODE_DISABLED)
-		return -EOPNOTSUPP;
-
 	/* Get the current configuration of the RSS indirection table */
 	bnx2x_get_rss_ind_table(&bp->rss_conf_obj, ind_table);
 
@@ -2326,33 +2328,19 @@ static int bnx2x_get_rxfh_indir(struct net_device *dev,
 	 * align the returned table to the Client ID of the leading RSS
 	 * queue.
 	 */
-	for (i = 0; i < copy_size; i++)
-		indir->ring_index[i] = ind_table[i] - bp->fp->cl_id;
-
-	indir->size = T_ETH_INDIRECTION_TABLE_SIZE;
+	for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++)
+		indir[i] = ind_table[i] - bp->fp->cl_id;
 
 	return 0;
 }
 
-static int bnx2x_set_rxfh_indir(struct net_device *dev,
-				const struct ethtool_rxfh_indir *indir)
+static int bnx2x_set_rxfh_indir(struct net_device *dev, const u32 *indir)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	size_t i;
 	u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
-	u32 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp);
-
-	if (bp->multi_mode == ETH_RSS_MODE_DISABLED)
-		return -EOPNOTSUPP;
-
-	/* validate the size */
-	if (indir->size != T_ETH_INDIRECTION_TABLE_SIZE)
-		return -EINVAL;
 
 	for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) {
-		/* validate the indices */
-		if (indir->ring_index[i] >= num_eth_queues)
-			return -EINVAL;
 		/*
 		 * The same as in bnx2x_get_rxfh_indir: we can't use a memcpy()
 		 * as an internal storage of an indirection table is a u8 array
@@ -2362,7 +2350,7 @@ static int bnx2x_set_rxfh_indir(struct net_device *dev,
 		 * align the received table to the Client ID of the leading RSS
 		 * queue
 		 */
-		ind_table[i] = indir->ring_index[i] + bp->fp->cl_id;
+		ind_table[i] = indir[i] + bp->fp->cl_id;
 	}
 
 	return bnx2x_config_rss_pf(bp, ind_table, false);
@@ -2395,6 +2383,7 @@ static const struct ethtool_ops bnx2x_ethtool_ops = {
 	.set_phys_id		= bnx2x_set_phys_id,
 	.get_ethtool_stats	= bnx2x_get_ethtool_stats,
 	.get_rxnfc		= bnx2x_get_rxnfc,
+	.get_rxfh_indir_size	= bnx2x_get_rxfh_indir_size,
 	.get_rxfh_indir		= bnx2x_get_rxfh_indir,
 	.set_rxfh_indir		= bnx2x_set_rxfh_indir,
 };
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index a34e7ce7e214..8ffd55bdef3d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1871,30 +1871,30 @@ static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
 	return err;
 }
 
-static int get_rss_table(struct net_device *dev, struct ethtool_rxfh_indir *p)
+static u32 get_rss_table_size(struct net_device *dev)
 {
 	const struct port_info *pi = netdev_priv(dev);
-	unsigned int n = min_t(unsigned int, p->size, pi->rss_size);
 
-	p->size = pi->rss_size;
+	return pi->rss_size;
+}
+
+static int get_rss_table(struct net_device *dev, u32 *p)
+{
+	const struct port_info *pi = netdev_priv(dev);
+	unsigned int n = pi->rss_size;
+
 	while (n--)
-		p->ring_index[n] = pi->rss[n];
+		p[n] = pi->rss[n];
 	return 0;
 }
 
-static int set_rss_table(struct net_device *dev,
-			 const struct ethtool_rxfh_indir *p)
+static int set_rss_table(struct net_device *dev, const u32 *p)
 {
 	unsigned int i;
 	struct port_info *pi = netdev_priv(dev);
 
-	if (p->size != pi->rss_size)
-		return -EINVAL;
-	for (i = 0; i < p->size; i++)
-		if (p->ring_index[i] >= pi->nqsets)
-			return -EINVAL;
-	for (i = 0; i < p->size; i++)
-		pi->rss[i] = p->ring_index[i];
+	for (i = 0; i < pi->rss_size; i++)
+		pi->rss[i] = p[i];
 	if (pi->adapter->flags & FULL_INIT_DONE)
 		return write_rss(pi, pi->rss);
 	return 0;
@@ -1989,6 +1989,7 @@ static struct ethtool_ops cxgb_ethtool_ops = {
 	.get_wol           = get_wol,
 	.set_wol           = set_wol,
 	.get_rxnfc         = get_rxnfc,
+	.get_rxfh_indir_size = get_rss_table_size,
 	.get_rxfh_indir    = get_rss_table,
 	.set_rxfh_indir    = set_rss_table,
 	.flash_device      = set_flash,
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index f3cd96dfa398..1be51b2bfa42 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -956,40 +956,28 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev,
 	return rc < 0 ? rc : 0;
 }
 
-static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev,
-				      struct ethtool_rxfh_indir *indir)
+static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	size_t copy_size =
-		min_t(size_t, indir->size, ARRAY_SIZE(efx->rx_indir_table));
 
-	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0)
-		return -EOPNOTSUPP;
+	return (efx_nic_rev(efx) < EFX_REV_FALCON_B0 ?
+		0 : ARRAY_SIZE(efx->rx_indir_table));
+}
+
+static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
 
-	indir->size = ARRAY_SIZE(efx->rx_indir_table);
-	memcpy(indir->ring_index, efx->rx_indir_table,
-	       copy_size * sizeof(indir->ring_index[0]));
+	memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
 	return 0;
 }
 
 static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev,
-				      const struct ethtool_rxfh_indir *indir)
+				      const u32 *indir)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	size_t i;
-
-	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0)
-		return -EOPNOTSUPP;
-
-	/* Validate size and indices */
-	if (indir->size != ARRAY_SIZE(efx->rx_indir_table))
-		return -EINVAL;
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		if (indir->ring_index[i] >= efx->n_rx_channels)
-			return -EINVAL;
 
-	memcpy(efx->rx_indir_table, indir->ring_index,
-	       sizeof(efx->rx_indir_table));
+	memcpy(efx->rx_indir_table, indir, sizeof(efx->rx_indir_table));
 	efx_nic_push_rx_indir_table(efx);
 	return 0;
 }
@@ -1020,6 +1008,7 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.reset			= efx_ethtool_reset,
 	.get_rxnfc		= efx_ethtool_get_rxnfc,
 	.set_rx_ntuple		= efx_ethtool_set_rx_ntuple,
+	.get_rxfh_indir_size	= efx_ethtool_get_rxfh_indir_size,
 	.get_rxfh_indir		= efx_ethtool_get_rxfh_indir,
 	.set_rxfh_indir		= efx_ethtool_set_rxfh_indir,
 };
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index b492ee1e5f17..a3eb75a62ea9 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -565,44 +565,38 @@ vmxnet3_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
 }
 
 #ifdef VMXNET3_RSS
+static u32
+vmxnet3_get_rss_indir_size(struct net_device *netdev)
+{
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
+
+	return rssConf->indTableSize;
+}
+
 static int
-vmxnet3_get_rss_indir(struct net_device *netdev,
-		      struct ethtool_rxfh_indir *p)
+vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
-	unsigned int n = min_t(unsigned int, p->size, rssConf->indTableSize);
+	unsigned int n = rssConf->indTableSize;
 
-	p->size = rssConf->indTableSize;
 	while (n--)
-		p->ring_index[n] = rssConf->indTable[n];
+		p[n] = rssConf->indTable[n];
 	return 0;
 
 }
 
 static int
-vmxnet3_set_rss_indir(struct net_device *netdev,
-		      const struct ethtool_rxfh_indir *p)
+vmxnet3_set_rss_indir(struct net_device *netdev, const u32 *p)
 {
 	unsigned int i;
 	unsigned long flags;
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
 
-	if (p->size != rssConf->indTableSize)
-		return -EINVAL;
-	for (i = 0; i < rssConf->indTableSize; i++) {
-		/*
-		 * Return with error code if any of the queue indices
-		 * is out of range
-		 */
-		if (p->ring_index[i] < 0 ||
-		    p->ring_index[i] >= adapter->num_rx_queues)
-			return -EINVAL;
-	}
-
 	for (i = 0; i < rssConf->indTableSize; i++)
-		rssConf->indTable[i] = p->ring_index[i];
+		rssConf->indTable[i] = p[i];
 
 	spin_lock_irqsave(&adapter->cmd_lock, flags);
 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
@@ -629,6 +623,7 @@ static struct ethtool_ops vmxnet3_ethtool_ops = {
 	.set_ringparam     = vmxnet3_set_ringparam,
 	.get_rxnfc         = vmxnet3_get_rxnfc,
 #ifdef VMXNET3_RSS
+	.get_rxfh_indir_size = vmxnet3_get_rss_indir_size,
 	.get_rxfh_indir    = vmxnet3_get_rss_indir,
 	.set_rxfh_indir    = vmxnet3_set_rss_indir,
 #endif
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 0ec2fd412d03..3b9f09d55b5c 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -828,9 +828,13 @@ u32 ethtool_op_get_link(struct net_device *dev);
  *	error code or zero.
  * @set_rx_ntuple: Set an RX n-tuple rule.  Returns a negative error code
  *	or zero.
+ * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
+ *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
+ *	Will not be called if @get_rxfh_indir_size returns zero.
  *	Returns a negative error code or zero.
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
+ *	Will not be called if @get_rxfh_indir_size returns zero.
  *	Returns a negative error code or zero.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
@@ -894,10 +898,9 @@ struct ethtool_ops {
 	int	(*reset)(struct net_device *, u32 *);
 	int	(*set_rx_ntuple)(struct net_device *,
 				 struct ethtool_rx_ntuple *);
-	int	(*get_rxfh_indir)(struct net_device *,
-				  struct ethtool_rxfh_indir *);
-	int	(*set_rxfh_indir)(struct net_device *,
-				  const struct ethtool_rxfh_indir *);
+	u32	(*get_rxfh_indir_size)(struct net_device *);
+	int	(*get_rxfh_indir)(struct net_device *, u32 *);
+	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 31b0b7f5383e..69f71b86b035 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -515,34 +515,44 @@ err_out:
 static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 						     void __user *useraddr)
 {
-	struct ethtool_rxfh_indir *indir;
-	u32 table_size;
-	size_t full_size;
+	u32 user_size, dev_size;
+	u32 *indir;
 	int ret;
 
-	if (!dev->ethtool_ops->get_rxfh_indir)
+	if (!dev->ethtool_ops->get_rxfh_indir_size ||
+	    !dev->ethtool_ops->get_rxfh_indir)
+		return -EOPNOTSUPP;
+	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+	if (dev_size == 0)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&table_size,
+	if (copy_from_user(&user_size,
 			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
-			   sizeof(table_size)))
+			   sizeof(user_size)))
 		return -EFAULT;
 
-	if (table_size >
-	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
-		return -ENOMEM;
-	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
-	indir = kzalloc(full_size, GFP_USER);
+	if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			 &dev_size, sizeof(dev_size)))
+		return -EFAULT;
+
+	/* If the user buffer size is 0, this is just a query for the
+	 * device table size.  Otherwise, if it's smaller than the
+	 * device table size it's an error.
+	 */
+	if (user_size < dev_size)
+		return user_size == 0 ? 0 : -EINVAL;
+
+	indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
 	if (!indir)
 		return -ENOMEM;
 
-	indir->cmd = ETHTOOL_GRXFHINDIR;
-	indir->size = table_size;
 	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
 	if (ret)
 		goto out;
 
-	if (copy_to_user(useraddr, indir, full_size))
+	if (copy_to_user(useraddr +
+			 offsetof(struct ethtool_rxfh_indir, ring_index[0]),
+			 indir, dev_size * sizeof(indir[0])))
 		ret = -EFAULT;
 
 out:
@@ -553,32 +563,51 @@ out:
 static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 						     void __user *useraddr)
 {
-	struct ethtool_rxfh_indir *indir;
-	u32 table_size;
-	size_t full_size;
+	struct ethtool_rxnfc rx_rings;
+	u32 user_size, dev_size, i;
+	u32 *indir;
 	int ret;
 
-	if (!dev->ethtool_ops->set_rxfh_indir)
+	if (!dev->ethtool_ops->get_rxfh_indir_size ||
+	    !dev->ethtool_ops->set_rxfh_indir ||
+	    !dev->ethtool_ops->get_rxnfc)
+		return -EOPNOTSUPP;
+	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+	if (dev_size == 0)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&table_size,
+	if (copy_from_user(&user_size,
 			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
-			   sizeof(table_size)))
+			   sizeof(user_size)))
 		return -EFAULT;
 
-	if (table_size >
-	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
-		return -ENOMEM;
-	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
-	indir = kmalloc(full_size, GFP_USER);
+	if (user_size != dev_size)
+		return -EINVAL;
+
+	indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
 	if (!indir)
 		return -ENOMEM;
 
-	if (copy_from_user(indir, useraddr, full_size)) {
+	if (copy_from_user(indir,
+			   useraddr +
+			   offsetof(struct ethtool_rxfh_indir, ring_index[0]),
+			   dev_size * sizeof(indir[0]))) {
 		ret = -EFAULT;
 		goto out;
 	}
 
+	/* Validate ring indices */
+	rx_rings.cmd = ETHTOOL_GRXRINGS;
+	ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL);
+	if (ret)
+		goto out;
+	for (i = 0; i < dev_size; i++) {
+		if (indir[i] >= rx_rings.data) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
 	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
 
 out:
-- 
cgit v1.2.3


From 278bc4296bd64ffd1d3913b487dc8a520e423a7a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 15 Dec 2011 13:56:49 +0000
Subject: ethtool: Define and apply a default policy for RX flow hash
 indirection

All drivers that support modification of the RX flow hash indirection
table initialise it in the same way: RX rings are assigned to table
entries in rotation.  Make that default policy explicit by having them
call a ethtool_rxfh_indir_default() function.

In the ethtool core, add support for a zero size value for
ETHTOOL_SRXFHINDIR, which resets the table to this default.

Partly-suggested-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Shreyas N Bhatewara <sbhatewara@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |  3 ++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |  2 +-
 drivers/net/ethernet/sfc/efx.c                  |  3 ++-
 drivers/net/vmxnet3/vmxnet3_drv.c               |  3 ++-
 include/linux/ethtool.h                         | 23 ++++++++++++++---
 net/core/ethtool.c                              | 33 +++++++++++++++----------
 6 files changed, 47 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 64f5cf5c68d1..2b731b253598 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1545,7 +1545,8 @@ static inline int bnx2x_init_rss_pf(struct bnx2x *bp)
 	if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
 		for (i = 0; i < sizeof(ind_table); i++)
 			ind_table[i] =
-				bp->fp->cl_id +	(i % num_eth_queues);
+				bp->fp->cl_id +
+				ethtool_rxfh_indir_default(i, num_eth_queues);
 	}
 
 	/*
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 8ffd55bdef3d..fccbe490c7f0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3449,7 +3449,7 @@ static int __devinit init_rss(struct adapter *adap)
 		if (!pi->rss)
 			return -ENOMEM;
 		for (j = 0; j < pi->rss_size; j++)
-			pi->rss[j] = j % pi->nqsets;
+			pi->rss[j] = ethtool_rxfh_indir_default(j, pi->nqsets);
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 14e134d3b4d7..44a82c6c60a7 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1336,7 +1336,8 @@ static int efx_probe_nic(struct efx_nic *efx)
 	if (efx->n_channels > 1)
 		get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key));
 	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		efx->rx_indir_table[i] = i % efx->n_rx_channels;
+		efx->rx_indir_table[i] =
+			ethtool_rxfh_indir_default(i, efx->n_rx_channels);
 
 	efx_set_channels(efx);
 	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 1c2ae11a9e35..de7fc345148a 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2167,7 +2167,8 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
 		get_random_bytes(&rssConf->hashKey[0], rssConf->hashKeySize);
 		for (i = 0; i < rssConf->indTableSize; i++)
-			rssConf->indTable[i] = i % adapter->num_rx_queues;
+			rssConf->indTable[i] = ethtool_rxfh_indir_default(
+				i, adapter->num_rx_queues);
 
 		devRead->rssConfDesc.confVer = 1;
 		devRead->rssConfDesc.confLen = sizeof(*rssConf);
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 3b9f09d55b5c..b38bf69310ee 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -543,10 +543,15 @@ struct compat_ethtool_rxnfc {
 /**
  * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection
  * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR
- * @size: On entry, the array size of the user buffer, which may be zero
- *	for %ETHTOOL_GRXFHINDIR.  On return from %ETHTOOL_GRXFHINDIR, the
- *	array size of the hardware indirection table.
+ * @size: On entry, the array size of the user buffer, which may be zero.
+ *	On return from %ETHTOOL_GRXFHINDIR, the array size of the hardware
+ *	indirection table.
  * @ring_index: RX ring/queue index for each hash value
+ *
+ * For %ETHTOOL_GRXFHINDIR, a @size of zero means that only the size
+ * should be returned.  For %ETHTOOL_SRXFHINDIR, a @size of zero means
+ * the table should be reset to default values.  This last feature
+ * is not supported by the original implementations.
  */
 struct ethtool_rxfh_indir {
 	__u32	cmd;
@@ -749,6 +754,18 @@ struct net_device;
 /* Some generic methods drivers may use in their ethtool_ops */
 u32 ethtool_op_get_link(struct net_device *dev);
 
+/**
+ * ethtool_rxfh_indir_default - get default value for RX flow hash indirection
+ * @index: Index in RX flow hash indirection table
+ * @n_rx_rings: Number of RX rings to use
+ *
+ * This function provides the default policy for RX flow hash indirection.
+ */
+static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
+{
+	return index % n_rx_rings;
+}
+
 /**
  * struct ethtool_ops - optional netdev operations
  * @get_settings: Get various device settings including Ethernet link
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 69f71b86b035..597732c989ca 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -581,31 +581,38 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 			   sizeof(user_size)))
 		return -EFAULT;
 
-	if (user_size != dev_size)
+	if (user_size != 0 && user_size != dev_size)
 		return -EINVAL;
 
 	indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
 	if (!indir)
 		return -ENOMEM;
 
-	if (copy_from_user(indir,
-			   useraddr +
-			   offsetof(struct ethtool_rxfh_indir, ring_index[0]),
-			   dev_size * sizeof(indir[0]))) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	/* Validate ring indices */
 	rx_rings.cmd = ETHTOOL_GRXRINGS;
 	ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL);
 	if (ret)
 		goto out;
-	for (i = 0; i < dev_size; i++) {
-		if (indir[i] >= rx_rings.data) {
-			ret = -EINVAL;
+
+	if (user_size == 0) {
+		for (i = 0; i < dev_size; i++)
+			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+	} else {
+		if (copy_from_user(indir,
+				  useraddr +
+				  offsetof(struct ethtool_rxfh_indir,
+					   ring_index[0]),
+				  dev_size * sizeof(indir[0]))) {
+			ret = -EFAULT;
 			goto out;
 		}
+
+		/* Validate ring indices */
+		for (i = 0; i < dev_size; i++) {
+			if (indir[i] >= rx_rings.data) {
+				ret = -EINVAL;
+				goto out;
+			}
+		}
 	}
 
 	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
-- 
cgit v1.2.3


From 54848d73f9f254631303d6eab9b976855988b266 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 5 Apr 2011 13:21:19 -0600
Subject: writeback: charge leaked page dirties to active tasks

It's a years long problem that a large number of short-lived dirtiers
(eg. gcc instances in a fast kernel build) may starve long-run dirtiers
(eg. dd) as well as pushing the dirty pages to the global hard limit.

The solution is to charge the pages dirtied by the exited gcc to the
other random dirtying tasks. It sounds not perfect, however should
behave good enough in practice, seeing as that throttled tasks aren't
actually running so those that are running are more likely to pick it up
and get throttled, therefore promoting an equal spread.

Randy: fix compile error: 'dirty_throttle_leaks' undeclared in exit.c

Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/writeback.h |  2 ++
 kernel/exit.c             |  3 +++
 mm/page-writeback.c       | 27 +++++++++++++++++++++++++++
 3 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a378c295851f..05eaf5e3aad7 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -7,6 +7,8 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 
+DECLARE_PER_CPU(int, dirty_throttle_leaks);
+
 /*
  * The 1/4 region under the global dirty thresh is for smooth dirty throttling:
  *
diff --git a/kernel/exit.c b/kernel/exit.c
index d0b7d988f873..d4aac24cc469 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -51,6 +51,7 @@
 #include <trace/events/sched.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/oom.h>
+#include <linux/writeback.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1037,6 +1038,8 @@ NORET_TYPE void do_exit(long code)
 	validate_creds_for_do_exit(tsk);
 
 	preempt_disable();
+	if (tsk->nr_dirtied)
+		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
 	exit_rcu();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 50f08241f981..619c445fc03c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1214,6 +1214,22 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
 
 static DEFINE_PER_CPU(int, bdp_ratelimits);
 
+/*
+ * Normal tasks are throttled by
+ *	loop {
+ *		dirty tsk->nr_dirtied_pause pages;
+ *		take a snap in balance_dirty_pages();
+ *	}
+ * However there is a worst case. If every task exit immediately when dirtied
+ * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
+ * called to throttle the page dirties. The solution is to save the not yet
+ * throttled page dirties in dirty_throttle_leaks on task exit and charge them
+ * randomly into the running tasks. This works well for the above worst case,
+ * as the new task will pick up and accumulate the old task's leaked dirty
+ * count and eventually get throttled.
+ */
+DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
+
 /**
  * balance_dirty_pages_ratelimited_nr - balance dirty memory state
  * @mapping: address_space which was dirtied
@@ -1261,6 +1277,17 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
 			ratelimit = 0;
 		}
 	}
+	/*
+	 * Pick up the dirtied pages by the exited tasks. This avoids lots of
+	 * short-lived tasks (eg. gcc invocations in a kernel build) escaping
+	 * the dirty throttling and livelock other long-run dirtiers.
+	 */
+	p = &__get_cpu_var(dirty_throttle_leaks);
+	if (*p > 0 && current->nr_dirtied < ratelimit) {
+		nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
+		*p -= nr_pages_dirtied;
+		current->nr_dirtied += nr_pages_dirtied;
+	}
 	preempt_enable();
 
 	if (unlikely(current->nr_dirtied >= ratelimit))
-- 
cgit v1.2.3


From 2f800fbd777b792de54187088df19a7df0251254 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 8 Aug 2011 15:22:00 -0600
Subject: writeback: fix dirtied pages accounting on redirty

De-account the accumulative dirty counters on page redirty.

Page redirties (very common in ext4) will introduce mismatch between
counters (a) and (b)

a) NR_DIRTIED, BDI_DIRTIED, tsk->nr_dirtied
b) NR_WRITTEN, BDI_WRITTEN

This will introduce systematic errors in balanced_rate and result in
dirty page position errors (ie. the dirty pages are no longer balanced
around the global/bdi setpoints).

Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/writeback.h |  2 ++
 mm/page-writeback.c       | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 05eaf5e3aad7..b30419cd425e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -197,6 +197,8 @@ void writeback_set_ratelimit(void);
 void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end);
 
+void account_page_redirty(struct page *page);
+
 /* pdflush.c */
 extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
 				   read-only. */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5d1ef5d8613a..96b3e7aa705c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1824,6 +1824,24 @@ int __set_page_dirty_nobuffers(struct page *page)
 }
 EXPORT_SYMBOL(__set_page_dirty_nobuffers);
 
+/*
+ * Call this whenever redirtying a page, to de-account the dirty counters
+ * (NR_DIRTIED, BDI_DIRTIED, tsk->nr_dirtied), so that they match the written
+ * counters (NR_WRITTEN, BDI_WRITTEN) in long term. The mismatches will lead to
+ * systematic errors in balanced_dirty_ratelimit and the dirty pages position
+ * control.
+ */
+void account_page_redirty(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	if (mapping && mapping_cap_account_dirty(mapping)) {
+		current->nr_dirtied--;
+		dec_zone_page_state(page, NR_DIRTIED);
+		dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
+	}
+}
+EXPORT_SYMBOL(account_page_redirty);
+
 /*
  * When a writepage implementation decides that it doesn't want to write this
  * page for some reason, it should redirty the locked page via
@@ -1832,6 +1850,7 @@ EXPORT_SYMBOL(__set_page_dirty_nobuffers);
 int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
 {
 	wbc->pages_skipped++;
+	account_page_redirty(page);
 	return __set_page_dirty_nobuffers(page);
 }
 EXPORT_SYMBOL(redirty_page_for_writepage);
-- 
cgit v1.2.3


From 83712358ba0a1497ce59a4f84ce4dd0f803fe6fc Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sat, 11 Jun 2011 19:25:42 -0600
Subject: writeback: dirty ratelimit - think time compensation

Compensate the task's think time when computing the final pause time,
so that ->dirty_ratelimit can be executed accurately.

        think time := time spend outside of balance_dirty_pages()

In the rare case that the task slept longer than the 200ms period time
(result in negative pause time), the sleep time will be compensated in
the following periods, too, if it's less than 1 second.

Accumulated errors are carefully avoided as long as the max pause area
is not hitted.

Pseudo code:

        period = pages_dirtied / task_ratelimit;
        think = jiffies - dirty_paused_when;
        pause = period - think;

1) normal case: period > think

        pause = period - think
        dirty_paused_when = jiffies + pause
        nr_dirtied = 0

                             period time
              |===============================>|
                  think time      pause time
              |===============>|==============>|
        ------|----------------|---------------|------------------------
        dirty_paused_when   jiffies

2) no pause case: period <= think

        don't pause; reduce future pause time by:
        dirty_paused_when += period
        nr_dirtied = 0

                           period time
              |===============================>|
                                  think time
              |===================================================>|
        ------|--------------------------------+-------------------|----
        dirty_paused_when                                       jiffies

Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/sched.h            |  1 +
 include/trace/events/writeback.h | 14 +++++++++++---
 kernel/fork.c                    |  1 +
 mm/page-writeback.c              | 36 ++++++++++++++++++++++++++++++++----
 4 files changed, 45 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c4f3e9b9bc5..984c3b295978 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1527,6 +1527,7 @@ struct task_struct {
 	 */
 	int nr_dirtied;
 	int nr_dirtied_pause;
+	unsigned long dirty_paused_when; /* start of a write-and-pause period */
 
 #ifdef CONFIG_LATENCYTOP
 	int latency_record_count;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 99d1d0decf88..8588a8918023 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -300,12 +300,13 @@ TRACE_EVENT(balance_dirty_pages,
 		 unsigned long dirty_ratelimit,
 		 unsigned long task_ratelimit,
 		 unsigned long dirtied,
+		 unsigned long period,
 		 long pause,
 		 unsigned long start_time),
 
 	TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
 		dirty_ratelimit, task_ratelimit,
-		dirtied, pause, start_time),
+		dirtied, period, pause, start_time),
 
 	TP_STRUCT__entry(
 		__array(	 char,	bdi, 32)
@@ -320,6 +321,8 @@ TRACE_EVENT(balance_dirty_pages,
 		__field(unsigned int,	dirtied_pause)
 		__field(unsigned long,	paused)
 		__field(	 long,	pause)
+		__field(unsigned long,	period)
+		__field(	 long,	think)
 	),
 
 	TP_fast_assign(
@@ -336,6 +339,9 @@ TRACE_EVENT(balance_dirty_pages,
 		__entry->task_ratelimit	= KBps(task_ratelimit);
 		__entry->dirtied	= dirtied;
 		__entry->dirtied_pause	= current->nr_dirtied_pause;
+		__entry->think		= current->dirty_paused_when == 0 ? 0 :
+			 (long)(jiffies - current->dirty_paused_when) * 1000/HZ;
+		__entry->period		= period * 1000 / HZ;
 		__entry->pause		= pause * 1000 / HZ;
 		__entry->paused		= (jiffies - start_time) * 1000 / HZ;
 	),
@@ -346,7 +352,7 @@ TRACE_EVENT(balance_dirty_pages,
 		  "bdi_setpoint=%lu bdi_dirty=%lu "
 		  "dirty_ratelimit=%lu task_ratelimit=%lu "
 		  "dirtied=%u dirtied_pause=%u "
-		  "paused=%lu pause=%ld",
+		  "paused=%lu pause=%ld period=%lu think=%ld",
 		  __entry->bdi,
 		  __entry->limit,
 		  __entry->setpoint,
@@ -358,7 +364,9 @@ TRACE_EVENT(balance_dirty_pages,
 		  __entry->dirtied,
 		  __entry->dirtied_pause,
 		  __entry->paused,	/* ms */
-		  __entry->pause	/* ms */
+		  __entry->pause,	/* ms */
+		  __entry->period,	/* ms */
+		  __entry->think	/* ms */
 	  )
 );
 
diff --git a/kernel/fork.c b/kernel/fork.c
index da4a6a10d088..f8668cf6a32d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1296,6 +1296,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	p->nr_dirtied = 0;
 	p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
+	p->dirty_paused_when = 0;
 
 	/*
 	 * Ok, make it visible to the rest of the system.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 96b3e7aa705c..491932155825 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1016,6 +1016,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long bdi_thresh;
+	long period;
 	long pause = 0;
 	long uninitialized_var(max_pause);
 	bool dirty_exceeded = false;
@@ -1026,6 +1027,8 @@ static void balance_dirty_pages(struct address_space *mapping,
 	unsigned long start_time = jiffies;
 
 	for (;;) {
+		unsigned long now = jiffies;
+
 		/*
 		 * Unstable writes are a feature of certain networked
 		 * filesystems (i.e. NFS) in which data may have been
@@ -1045,8 +1048,11 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 */
 		freerun = dirty_freerun_ceiling(dirty_thresh,
 						background_thresh);
-		if (nr_dirty <= freerun)
+		if (nr_dirty <= freerun) {
+			current->dirty_paused_when = now;
+			current->nr_dirtied = 0;
 			break;
+		}
 
 		if (unlikely(!writeback_in_progress(bdi)))
 			bdi_start_background_writeback(bdi);
@@ -1104,10 +1110,21 @@ static void balance_dirty_pages(struct address_space *mapping,
 		task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
 							RATELIMIT_CALC_SHIFT;
 		if (unlikely(task_ratelimit == 0)) {
+			period = max_pause;
 			pause = max_pause;
 			goto pause;
 		}
-		pause = HZ * pages_dirtied / task_ratelimit;
+		period = HZ * pages_dirtied / task_ratelimit;
+		pause = period;
+		if (current->dirty_paused_when)
+			pause -= now - current->dirty_paused_when;
+		/*
+		 * For less than 1s think time (ext3/4 may block the dirtier
+		 * for up to 800ms from time to time on 1-HDD; so does xfs,
+		 * however at much less frequency), try to compensate it in
+		 * future periods by updating the virtual time; otherwise just
+		 * do a reset, as it may be a light dirtier.
+		 */
 		if (unlikely(pause <= 0)) {
 			trace_balance_dirty_pages(bdi,
 						  dirty_thresh,
@@ -1118,8 +1135,16 @@ static void balance_dirty_pages(struct address_space *mapping,
 						  dirty_ratelimit,
 						  task_ratelimit,
 						  pages_dirtied,
+						  period,
 						  pause,
 						  start_time);
+			if (pause < -HZ) {
+				current->dirty_paused_when = now;
+				current->nr_dirtied = 0;
+			} else if (period) {
+				current->dirty_paused_when += period;
+				current->nr_dirtied = 0;
+			}
 			pause = 1; /* avoid resetting nr_dirtied_pause below */
 			break;
 		}
@@ -1135,11 +1160,15 @@ pause:
 					  dirty_ratelimit,
 					  task_ratelimit,
 					  pages_dirtied,
+					  period,
 					  pause,
 					  start_time);
 		__set_current_state(TASK_KILLABLE);
 		io_schedule_timeout(pause);
 
+		current->dirty_paused_when = now + pause;
+		current->nr_dirtied = 0;
+
 		/*
 		 * This is typically equal to (nr_dirty < dirty_thresh) and can
 		 * also keep "1000+ dd on a slow USB stick" under control.
@@ -1167,11 +1196,10 @@ pause:
 	if (!dirty_exceeded && bdi->dirty_exceeded)
 		bdi->dirty_exceeded = 0;
 
-	current->nr_dirtied = 0;
 	if (pause == 0) { /* in freerun area */
 		current->nr_dirtied_pause =
 				dirty_poll_interval(nr_dirty, dirty_thresh);
-	} else if (pause <= max_pause / 4 &&
+	} else if (period <= max_pause / 4 &&
 		   pages_dirtied >= current->nr_dirtied_pause) {
 		current->nr_dirtied_pause = clamp_val(
 					dirty_ratelimit * (max_pause / 2) / HZ,
-- 
cgit v1.2.3


From a7250db36308424ae040f1b2eeb5bfd0cbee0b0d Mon Sep 17 00:00:00 2001
From: Yu Xu <yuxu@marvell.com>
Date: Mon, 19 Dec 2011 17:33:03 +0800
Subject: usb: gadget: enlarge maxburst bit width.

For super speed bulk transfer, the max burst size
is 16, so that 4 bits of maxburst cannot store it.

Signed-off-by: Yu Xu <yuxu@marvell.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/gadget.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 317d8925387c..4d99805bcbb7 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -164,7 +164,7 @@ struct usb_ep {
 	unsigned		maxpacket:16;
 	unsigned		max_streams:16;
 	unsigned		mult:2;
-	unsigned		maxburst:4;
+	unsigned		maxburst:5;
 	u8			address;
 	const struct usb_endpoint_descriptor	*desc;
 	const struct usb_ss_ep_comp_descriptor	*comp_desc;
-- 
cgit v1.2.3


From b1b73d095084e754562961c443aa8f6587a55f8e Mon Sep 17 00:00:00 2001
From: Kusanagi Kouichi <slash@ac.auone-net.jp>
Date: Mon, 19 Dec 2011 18:13:19 +0900
Subject: time/clocksource: Fix kernel-doc warnings

Fix various KernelDoc build warnings.

Signed-off-by: Kusanagi Kouichi <slash@ac.auone-net.jp>
Cc: John Stultz <johnstul@us.ibm.com>
Link: http://lkml.kernel.org/r/20111219091320.0D5AF6FC03D@msa105.auone-net.jp
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/clocksource.h | 11 ++++++++---
 kernel/time/clocksource.c   | 12 +++++++++---
 2 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c86c940d1de3..081147da0564 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -71,7 +71,7 @@ struct timecounter {
 
 /**
  * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds
- * @tc:		Pointer to cycle counter.
+ * @cc:		Pointer to cycle counter.
  * @cycles:	Cycles
  *
  * XXX - This could use some mult_lxl_ll() asm optimization. Same code
@@ -114,7 +114,7 @@ extern u64 timecounter_read(struct timecounter *tc);
  *                        time base as values returned by
  *                        timecounter_read()
  * @tc:		Pointer to time counter.
- * @cycle:	a value returned by tc->cc->read()
+ * @cycle_tstamp:	a value returned by tc->cc->read()
  *
  * Cycle counts that are converted correctly as long as they
  * fall into the interval [-1/2 max cycle count, +1/2 max cycle count],
@@ -156,11 +156,12 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @mult:		cycle to nanosecond multiplier
  * @shift:		cycle to nanosecond divisor (power of two)
  * @max_idle_ns:	max idle time permitted by the clocksource (nsecs)
- * @maxadj		maximum adjustment value to mult (~11%)
+ * @maxadj:		maximum adjustment value to mult (~11%)
  * @flags:		flags describing special properties
  * @archdata:		arch-specific data
  * @suspend:		suspend function for the clocksource, if necessary
  * @resume:		resume function for the clocksource, if necessary
+ * @cycle_last:		most recent cycle counter value seen by ::read()
  */
 struct clocksource {
 	/*
@@ -187,6 +188,7 @@ struct clocksource {
 	void (*suspend)(struct clocksource *cs);
 	void (*resume)(struct clocksource *cs);
 
+	/* private: */
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
 	struct list_head wd_list;
@@ -261,6 +263,9 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
 
 /**
  * clocksource_cyc2ns - converts clocksource cycles to nanoseconds
+ * @cycles:	cycles
+ * @mult:	cycle to nanosecond multiplier
+ * @shift:	cycle to nanosecond divisor (power of two)
  *
  * Converts cycles to nanoseconds, using the given mult and shift.
  *
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index da2f760e780c..d3ad022136e5 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -647,7 +647,7 @@ static void clocksource_enqueue(struct clocksource *cs)
 
 /**
  * __clocksource_updatefreq_scale - Used update clocksource with new freq
- * @t:		clocksource to be registered
+ * @cs:		clocksource to be registered
  * @scale:	Scale factor multiplied against freq to get clocksource hz
  * @freq:	clocksource frequency (cycles per second) divided by scale
  *
@@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
 
 /**
  * __clocksource_register_scale - Used to install new clocksources
- * @t:		clocksource to be registered
+ * @cs:		clocksource to be registered
  * @scale:	Scale factor multiplied against freq to get clocksource hz
  * @freq:	clocksource frequency (cycles per second) divided by scale
  *
@@ -727,7 +727,7 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 
 /**
  * clocksource_register - Used to install new clocksources
- * @t:		clocksource to be registered
+ * @cs:		clocksource to be registered
  *
  * Returns -EBUSY if registration fails, zero otherwise.
  */
@@ -761,6 +761,8 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
 
 /**
  * clocksource_change_rating - Change the rating of a registered clocksource
+ * @cs:		clocksource to be changed
+ * @rating:	new rating
  */
 void clocksource_change_rating(struct clocksource *cs, int rating)
 {
@@ -772,6 +774,7 @@ EXPORT_SYMBOL(clocksource_change_rating);
 
 /**
  * clocksource_unregister - remove a registered clocksource
+ * @cs:	clocksource to be unregistered
  */
 void clocksource_unregister(struct clocksource *cs)
 {
@@ -787,6 +790,7 @@ EXPORT_SYMBOL(clocksource_unregister);
 /**
  * sysfs_show_current_clocksources - sysfs interface for current clocksource
  * @dev:	unused
+ * @attr:	unused
  * @buf:	char buffer to be filled with clocksource list
  *
  * Provides sysfs interface for listing current clocksource.
@@ -807,6 +811,7 @@ sysfs_show_current_clocksources(struct sys_device *dev,
 /**
  * sysfs_override_clocksource - interface for manually overriding clocksource
  * @dev:	unused
+ * @attr:	unused
  * @buf:	name of override clocksource
  * @count:	length of buffer
  *
@@ -842,6 +847,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 /**
  * sysfs_show_available_clocksources - sysfs interface for listing clocksource
  * @dev:	unused
+ * @attr:	unused
  * @buf:	char buffer to be filled with clocksource list
  *
  * Provides sysfs interface for listing registered clocksources
-- 
cgit v1.2.3


From a85e1d55974646a442d95911e3f7d7a891ea9ac5 Mon Sep 17 00:00:00 2001
From: Paul Stewart <pstew@chromium.org>
Date: Fri, 9 Dec 2011 11:01:49 -0800
Subject: cfg80211: Return beacon loss count in station

If station info contains a beacon loss count, return
it to userspace.

Signed-off-by: Paul Stewart <pstew@chromium.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 include/net/cfg80211.h  | 7 ++++++-
 net/mac80211/cfg.c      | 4 +++-
 net/mac80211/mlme.c     | 8 ++++++++
 net/mac80211/sta_info.h | 2 ++
 net/wireless/nl80211.c  | 3 +++
 6 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f795cb7dccdd..0f5ff3739820 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1655,6 +1655,7 @@ enum nl80211_sta_bss_param {
  *     containing info as possible, see &enum nl80211_sta_bss_param
  * @NL80211_STA_INFO_CONNECTED_TIME: time since the station is last connected
  * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update.
+ * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -1677,6 +1678,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_BSS_PARAM,
 	NL80211_STA_INFO_CONNECTED_TIME,
 	NL80211_STA_INFO_STA_FLAGS,
+	NL80211_STA_INFO_BEACON_LOSS,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9f85fca0b676..15f4be7d768e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -505,6 +505,7 @@ struct station_parameters {
  * @STATION_INFO_CONNECTED_TIME: @connected_time filled
  * @STATION_INFO_ASSOC_REQ_IES: @assoc_req_ies filled
  * @STATION_INFO_STA_FLAGS: @sta_flags filled
+ * @STATION_INFO_BEACON_LOSS_COUNT: @beacon_loss_count filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -525,7 +526,8 @@ enum station_info_flags {
 	STATION_INFO_BSS_PARAM          = 1<<15,
 	STATION_INFO_CONNECTED_TIME	= 1<<16,
 	STATION_INFO_ASSOC_REQ_IES	= 1<<17,
-	STATION_INFO_STA_FLAGS		= 1<<18
+	STATION_INFO_STA_FLAGS		= 1<<18,
+	STATION_INFO_BEACON_LOSS_COUNT	= 1<<19
 };
 
 /**
@@ -623,6 +625,7 @@ struct sta_bss_parameters {
  *	the cfg80211_new_sta() calls to notify user space of the IEs.
  * @assoc_req_ies_len: Length of assoc_req_ies buffer in octets.
  * @sta_flags: station flags mask & values
+ * @beacon_loss_count: Number of times beacon loss event has triggered.
  */
 struct station_info {
 	u32 filled;
@@ -650,6 +653,8 @@ struct station_info {
 	const u8 *assoc_req_ies;
 	size_t assoc_req_ies_len;
 
+	u32 beacon_loss_count;
+
 	/*
 	 * Note: Add a new enum station_info_flags value for each new field and
 	 * use it to check which fields are initialized.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 66ad9d9af87f..850bb96bd680 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -355,7 +355,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			STATION_INFO_RX_DROP_MISC |
 			STATION_INFO_BSS_PARAM |
 			STATION_INFO_CONNECTED_TIME |
-			STATION_INFO_STA_FLAGS;
+			STATION_INFO_STA_FLAGS |
+			STATION_INFO_BEACON_LOSS_COUNT;
 
 	do_posix_clock_monotonic_gettime(&uptime);
 	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
@@ -368,6 +369,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	sinfo->tx_retries = sta->tx_retry_count;
 	sinfo->tx_failed = sta->tx_retry_failed;
 	sinfo->rx_dropped_misc = sta->rx_dropped;
+	sinfo->beacon_loss_count = sta->beacon_loss_count;
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a984f1f60ddb..57989a046fca 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1381,6 +1381,14 @@ void ieee80211_beacon_connection_loss_work(struct work_struct *work)
 	struct ieee80211_sub_if_data *sdata =
 		container_of(work, struct ieee80211_sub_if_data,
 			     u.mgd.beacon_connection_loss_work);
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct sta_info *sta;
+
+	if (ifmgd->associated) {
+		sta = sta_info_get(sdata, ifmgd->bssid);
+		if (sta)
+			sta->beacon_loss_count++;
+	}
 
 	if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
 		__ieee80211_connection_loss(sdata);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index dee284290464..6f77f12dc3fc 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -275,6 +275,7 @@ struct sta_ampdu_mlme {
  *	EAP frames before association
  * @sta: station information we share with the driver
  * @sta_state: duplicates information about station state (for debug)
+ * @beacon_loss_count: number of times beacon loss has triggered
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -367,6 +368,7 @@ struct sta_info {
 #endif
 
 	unsigned int lost_packets;
+	unsigned int beacon_loss_count;
 
 	/* should be right in front of sta to be in the same cache line */
 	bool dummy;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b07c4fc4ae22..b3d3cf8931cb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2390,6 +2390,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_TX_FAILED)
 		NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED,
 			    sinfo->tx_failed);
+	if (sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT)
+		NLA_PUT_U32(msg, NL80211_STA_INFO_BEACON_LOSS,
+			    sinfo->beacon_loss_count);
 	if (sinfo->filled & STATION_INFO_BSS_PARAM) {
 		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
 		if (!bss_param)
-- 
cgit v1.2.3


From 58a60168d12c4e5be21c29420a3de4a41ef3470f Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Mon, 19 Dec 2011 04:00:26 +0000
Subject: mlx4: capability for link sensing

For ConnectX3 devices, we allow link sensing only if FW explicitly
reported it supports the feature.
For older versions (ConnectX1 and 2), if the card supports both link layer types
(Ethenet and Infiniband), link sensing is supported.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 10 ++++++++--
 include/linux/mlx4/device.h               |  3 ++-
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index b969bfb569e3..8f7314394cc2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -274,6 +274,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
+	/* Sense port always allowed on supported devices for ConnectX1 and 2 */
+	if (dev->pdev->device != 0x1003)
+		dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
+
 	dev->caps.log_num_macs  = log_num_mac;
 	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
 	dev->caps.log_num_prios = use_prio ? 3 : 0;
@@ -311,7 +315,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		}
 		dev->caps.possible_type[i] = dev->caps.port_type[i];
 		mlx4_priv(dev)->sense.sense_allowed[i] =
-			dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
+			((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
+			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
 
 		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
 			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
@@ -583,7 +588,8 @@ static ssize_t set_port_type(struct device *dev,
 			types[i] = mdev->caps.port_type[i+1];
 	}
 
-	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+	    !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
 		for (i = 1; i <= mdev->caps.num_ports; i++) {
 			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
 				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5f784ff6a36e..b06a44ba1565 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -94,7 +94,8 @@ enum {
 	MLX4_DEV_CAP_FLAG_UDP_RSS	= 1LL << 40,
 	MLX4_DEV_CAP_FLAG_VEP_UC_STEER	= 1LL << 41,
 	MLX4_DEV_CAP_FLAG_VEP_MC_STEER	= 1LL << 42,
-	MLX4_DEV_CAP_FLAG_COUNTERS	= 1LL << 48
+	MLX4_DEV_CAP_FLAG_COUNTERS	= 1LL << 48,
+	MLX4_DEV_CAP_FLAG_SENSE_SUPPORT	= 1LL << 55
 };
 
 #define MLX4_ATTR_EXTENDED_PORT_INFO	cpu_to_be16(0xff90)
-- 
cgit v1.2.3


From 8d0fc7b61191c9433a4f738987b89e1d962eb637 Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Mon, 19 Dec 2011 04:00:34 +0000
Subject: mlx4_core: Changing link sensing logic

New FW can give clues to driver regarding default port type
and whether or not we should default to link sensing on the port.

2 bits are added to QUERY_PORT command:
1. suggested_type: This bit gives a hint whether the default port type should be
   IB or Ethernet.
   The driver will use this hint in case the user didn't specify explicitly the link layer
   type he wants to set.
2. default_sense: If this bit is set, we would sense the port type on start-up
   and default the port to link sensing

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   |  2 ++
 drivers/net/ethernet/mellanox/mlx4/fw.h   |  2 ++
 drivers/net/ethernet/mellanox/mlx4/main.c | 50 +++++++++++++++++++++----------
 include/linux/mlx4/device.h               |  2 ++
 4 files changed, 40 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index f03b54e0aa53..abefcc86e2d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -577,6 +577,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 
 			MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET);
 			dev_cap->supported_port_types[i] = field & 3;
+			dev_cap->suggested_type[i] = (field >> 3) & 1;
+			dev_cap->default_sense[i] = (field >> 4) & 1;
 			MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET);
 			dev_cap->ib_mtu[i]	   = field & 0xf;
 			MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 3368363a8ec5..119e0cc9fab3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -111,6 +111,8 @@ struct mlx4_dev_cap {
 	u64 max_icm_sz;
 	int max_gso_sz;
 	u8  supported_port_types[MLX4_MAX_PORTS + 1];
+	u8  suggested_type[MLX4_MAX_PORTS + 1];
+	u8  default_sense[MLX4_MAX_PORTS + 1];
 	u8  log_max_macs[MLX4_MAX_PORTS + 1];
 	u8  log_max_vlans[MLX4_MAX_PORTS + 1];
 	u32 max_counters;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 8f7314394cc2..e984ded2249f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -130,10 +130,11 @@ int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
 
-static int port_type_array[2] = {1, 1};
+static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
 static int arr_argc = 2;
 module_param_array(port_type_array, int, &arr_argc, 0444);
-MODULE_PARM_DESC(port_type_array, "Array of port types: IB by default");
+MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
+				"1 for IB, 2 for Ethernet");
 
 struct mlx4_port_config {
 	struct list_head list;
@@ -225,6 +226,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
 		dev->caps.def_mac[i]        = dev_cap->def_mac[i];
 		dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
+		dev->caps.suggested_type[i] = dev_cap->suggested_type[i];
+		dev->caps.default_sense[i] = dev_cap->default_sense[i];
 		dev->caps.trans_type[i]	    = dev_cap->trans_type[i];
 		dev->caps.vendor_oui[i]     = dev_cap->vendor_oui[i];
 		dev->caps.wavelength[i]     = dev_cap->wavelength[i];
@@ -302,22 +305,43 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			 * first of all check if SRIOV is on */
 			} else if (dev->flags & MLX4_FLAG_SRIOV)
 				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
-			/* if IB and ETH are supported and SRIOV is off
-			 * use module parameters */
 			else {
-				if (port_type_array[i-1])
-					dev->caps.port_type[i] =
-						MLX4_PORT_TYPE_IB;
+				/* In non-SRIOV mode, we set the port type
+				 * according to user selection of port type,
+				 * if usere selected none, take the FW hint */
+				if (port_type_array[i-1] == MLX4_PORT_TYPE_NONE)
+					dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
+						MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
 				else
-					dev->caps.port_type[i] =
-						MLX4_PORT_TYPE_ETH;
+					dev->caps.port_type[i] = port_type_array[i-1];
 			}
 		}
-		dev->caps.possible_type[i] = dev->caps.port_type[i];
+		/*
+		 * Link sensing is allowed on the port if 3 conditions are true:
+		 * 1. Both protocols are supported on the port.
+		 * 2. Different types are supported on the port
+		 * 3. FW declared that it supports link sensing
+		 */
 		mlx4_priv(dev)->sense.sense_allowed[i] =
 			((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
+			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
 			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
 
+		/*
+		 * If "default_sense" bit is set, we move the port to "AUTO" mode
+		 * and perform sense_port FW command to try and set the correct
+		 * port type from beginning
+		 */
+		if (mlx4_priv(dev)->sense.sense_allowed && dev->caps.default_sense[i]) {
+			enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
+			dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
+			mlx4_SENSE_PORT(dev, i, &sensed_port);
+			if (sensed_port != MLX4_PORT_TYPE_NONE)
+				dev->caps.port_type[i] = sensed_port;
+		} else {
+			dev->caps.possible_type[i] = dev->caps.port_type[i];
+		}
+
 		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
 			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
 			mlx4_warn(dev, "Requested number of MACs is too much "
@@ -1329,12 +1353,6 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 
 	if (!mlx4_is_slave(dev)) {
 		for (port = 1; port <= dev->caps.num_ports; port++) {
-			if (!mlx4_is_mfunc(dev)) {
-				enum mlx4_port_type port_type = 0;
-				mlx4_SENSE_PORT(dev, port, &port_type);
-				if (port_type)
-					dev->caps.port_type[port] = port_type;
-			}
 			ib_port_default_caps = 0;
 			err = mlx4_get_port_ib_caps(dev, port,
 						    &ib_port_default_caps);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b06a44ba1565..5c4fe8e5bfe5 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -303,6 +303,8 @@ struct mlx4_caps {
 	int                     log_num_prios;
 	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
 	u8			supported_type[MLX4_MAX_PORTS + 1];
+	u8                      suggested_type[MLX4_MAX_PORTS + 1];
+	u8                      default_sense[MLX4_MAX_PORTS + 1];
 	u32			port_mask[MLX4_MAX_PORTS + 1];
 	enum mlx4_port_type	possible_type[MLX4_MAX_PORTS + 1];
 	u32			max_counters;
-- 
cgit v1.2.3


From 447f219190bf0368b8b36cf60155744cb43510df Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 19 Dec 2011 15:04:41 -0500
Subject: Revert "net: Remove unused neighbour layer ops."

This reverts commit 5c3ddec73d01a1fae9409c197078cb02c42238c3.

S390 qeth driver actually still uses the setup ops.

Reported-by: Frank Blaschka <blaschka@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/neighbour.h   |  1 +
 net/core/neighbour.c      | 10 ++++++++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6b9d4edb7c26..603730804da5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -974,6 +974,7 @@ struct net_device_ops {
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
 	int			(*ndo_neigh_construct)(struct neighbour *n);
+	void			(*ndo_neigh_destroy)(struct neighbour *n);
 };
 
 /*
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 6814c4d61c1c..e31f0a86f9b7 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -43,6 +43,7 @@ struct neigh_parms {
 #endif
 	struct net_device *dev;
 	struct neigh_parms *next;
+	int	(*neigh_setup)(struct neighbour *);
 	void	(*neigh_cleanup)(struct neighbour *);
 	struct neigh_table *tbl;
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d57a40a2598c..4af151e1bf5d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -497,6 +497,13 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		}
 	}
 
+	/* Device specific setup. */
+	if (n->parms->neigh_setup &&
+	    (error = n->parms->neigh_setup(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 
 	write_lock_bh(&tbl->lock);
@@ -710,6 +717,9 @@ void neigh_destroy(struct neighbour *neigh)
 	skb_queue_purge(&neigh->arp_queue);
 	neigh->arp_queue_len_bytes = 0;
 
+	if (dev->netdev_ops->ndo_neigh_destroy)
+		dev->netdev_ops->ndo_neigh_destroy(neigh);
+
 	dev_put(dev);
 	neigh_parms_put(neigh->parms);
 
-- 
cgit v1.2.3


From fb21c2f42879c05c76ea9e249b6905fc729f8529 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Dec 2011 14:02:26 +0100
Subject: fbdev: Add FOURCC-based format configuration API

This API will be used to support YUV frame buffer formats in a standard
way.

Last but not least, create a much needed fbdev API documentation and
document the format setting APIs.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
---
 Documentation/fb/api.txt | 306 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/video/fbmem.c    |  14 +++
 include/linux/fb.h       |  14 ++-
 3 files changed, 330 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/fb/api.txt

(limited to 'include/linux')

diff --git a/Documentation/fb/api.txt b/Documentation/fb/api.txt
new file mode 100644
index 000000000000..d4ff7de85700
--- /dev/null
+++ b/Documentation/fb/api.txt
@@ -0,0 +1,306 @@
+			The Frame Buffer Device API
+			---------------------------
+
+Last revised: June 21, 2011
+
+
+0. Introduction
+---------------
+
+This document describes the frame buffer API used by applications to interact
+with frame buffer devices. In-kernel APIs between device drivers and the frame
+buffer core are not described.
+
+Due to a lack of documentation in the original frame buffer API, drivers
+behaviours differ in subtle (and not so subtle) ways. This document describes
+the recommended API implementation, but applications should be prepared to
+deal with different behaviours.
+
+
+1. Capabilities
+---------------
+
+Device and driver capabilities are reported in the fixed screen information
+capabilities field.
+
+struct fb_fix_screeninfo {
+	...
+	__u16 capabilities;		/* see FB_CAP_*			*/
+	...
+};
+
+Application should use those capabilities to find out what features they can
+expect from the device and driver.
+
+- FB_CAP_FOURCC
+
+The driver supports the four character code (FOURCC) based format setting API.
+When supported, formats are configured using a FOURCC instead of manually
+specifying color components layout.
+
+
+2. Types and visuals
+--------------------
+
+Pixels are stored in memory in hardware-dependent formats. Applications need
+to be aware of the pixel storage format in order to write image data to the
+frame buffer memory in the format expected by the hardware.
+
+Formats are described by frame buffer types and visuals. Some visuals require
+additional information, which are stored in the variable screen information
+bits_per_pixel, grayscale, red, green, blue and transp fields.
+
+Visuals describe how color information is encoded and assembled to create
+macropixels. Types describe how macropixels are stored in memory. The following
+types and visuals are supported.
+
+- FB_TYPE_PACKED_PIXELS
+
+Macropixels are stored contiguously in a single plane. If the number of bits
+per macropixel is not a multiple of 8, whether macropixels are padded to the
+next multiple of 8 bits or packed together into bytes depends on the visual.
+
+Padding at end of lines may be present and is then reported through the fixed
+screen information line_length field.
+
+- FB_TYPE_PLANES
+
+Macropixels are split across multiple planes. The number of planes is equal to
+the number of bits per macropixel, with plane i'th storing i'th bit from all
+macropixels.
+
+Planes are located contiguously in memory.
+
+- FB_TYPE_INTERLEAVED_PLANES
+
+Macropixels are split across multiple planes. The number of planes is equal to
+the number of bits per macropixel, with plane i'th storing i'th bit from all
+macropixels.
+
+Planes are interleaved in memory. The interleave factor, defined as the
+distance in bytes between the beginning of two consecutive interleaved blocks
+belonging to different planes, is stored in the fixed screen information
+type_aux field.
+
+- FB_TYPE_FOURCC
+
+Macropixels are stored in memory as described by the format FOURCC identifier
+stored in the variable screen information grayscale field.
+
+- FB_VISUAL_MONO01
+
+Pixels are black or white and stored on a number of bits (typically one)
+specified by the variable screen information bpp field.
+
+Black pixels are represented by all bits set to 1 and white pixels by all bits
+set to 0. When the number of bits per pixel is smaller than 8, several pixels
+are packed together in a byte.
+
+FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
+
+- FB_VISUAL_MONO10
+
+Pixels are black or white and stored on a number of bits (typically one)
+specified by the variable screen information bpp field.
+
+Black pixels are represented by all bits set to 0 and white pixels by all bits
+set to 1. When the number of bits per pixel is smaller than 8, several pixels
+are packed together in a byte.
+
+FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
+
+- FB_VISUAL_TRUECOLOR
+
+Pixels are broken into red, green and blue components, and each component
+indexes a read-only lookup table for the corresponding value. Lookup tables
+are device-dependent, and provide linear or non-linear ramps.
+
+Each component is stored in a macropixel according to the variable screen
+information red, green, blue and transp fields.
+
+- FB_VISUAL_PSEUDOCOLOR and FB_VISUAL_STATIC_PSEUDOCOLOR
+
+Pixel values are encoded as indices into a colormap that stores red, green and
+blue components. The colormap is read-only for FB_VISUAL_STATIC_PSEUDOCOLOR
+and read-write for FB_VISUAL_PSEUDOCOLOR.
+
+Each pixel value is stored in the number of bits reported by the variable
+screen information bits_per_pixel field.
+
+- FB_VISUAL_DIRECTCOLOR
+
+Pixels are broken into red, green and blue components, and each component
+indexes a programmable lookup table for the corresponding value.
+
+Each component is stored in a macropixel according to the variable screen
+information red, green, blue and transp fields.
+
+- FB_VISUAL_FOURCC
+
+Pixels are encoded and  interpreted as described by the format FOURCC
+identifier stored in the variable screen information grayscale field.
+
+
+3. Screen information
+---------------------
+
+Screen information are queried by applications using the FBIOGET_FSCREENINFO
+and FBIOGET_VSCREENINFO ioctls. Those ioctls take a pointer to a
+fb_fix_screeninfo and fb_var_screeninfo structure respectively.
+
+struct fb_fix_screeninfo stores device independent unchangeable information
+about the frame buffer device and the current format. Those information can't
+be directly modified by applications, but can be changed by the driver when an
+application modifies the format.
+
+struct fb_fix_screeninfo {
+	char id[16];			/* identification string eg "TT Builtin" */
+	unsigned long smem_start;	/* Start of frame buffer mem */
+					/* (physical address) */
+	__u32 smem_len;			/* Length of frame buffer mem */
+	__u32 type;			/* see FB_TYPE_*		*/
+	__u32 type_aux;			/* Interleave for interleaved Planes */
+	__u32 visual;			/* see FB_VISUAL_*		*/
+	__u16 xpanstep;			/* zero if no hardware panning  */
+	__u16 ypanstep;			/* zero if no hardware panning  */
+	__u16 ywrapstep;		/* zero if no hardware ywrap    */
+	__u32 line_length;		/* length of a line in bytes    */
+	unsigned long mmio_start;	/* Start of Memory Mapped I/O   */
+					/* (physical address) */
+	__u32 mmio_len;			/* Length of Memory Mapped I/O  */
+	__u32 accel;			/* Indicate to driver which	*/
+					/*  specific chip/card we have	*/
+	__u16 capabilities;		/* see FB_CAP_*			*/
+	__u16 reserved[2];		/* Reserved for future compatibility */
+};
+
+struct fb_var_screeninfo stores device independent changeable information
+about a frame buffer device, its current format and video mode, as well as
+other miscellaneous parameters.
+
+struct fb_var_screeninfo {
+	__u32 xres;			/* visible resolution		*/
+	__u32 yres;
+	__u32 xres_virtual;		/* virtual resolution		*/
+	__u32 yres_virtual;
+	__u32 xoffset;			/* offset from virtual to visible */
+	__u32 yoffset;			/* resolution			*/
+
+	__u32 bits_per_pixel;		/* guess what			*/
+	__u32 grayscale;		/* 0 = color, 1 = grayscale,	*/
+					/* >1 = FOURCC			*/
+	struct fb_bitfield red;		/* bitfield in fb mem if true color, */
+	struct fb_bitfield green;	/* else only length is significant */
+	struct fb_bitfield blue;
+	struct fb_bitfield transp;	/* transparency			*/
+
+	__u32 nonstd;			/* != 0 Non standard pixel format */
+
+	__u32 activate;			/* see FB_ACTIVATE_*		*/
+
+	__u32 height;			/* height of picture in mm    */
+	__u32 width;			/* width of picture in mm     */
+
+	__u32 accel_flags;		/* (OBSOLETE) see fb_info.flags */
+
+	/* Timing: All values in pixclocks, except pixclock (of course) */
+	__u32 pixclock;			/* pixel clock in ps (pico seconds) */
+	__u32 left_margin;		/* time from sync to picture	*/
+	__u32 right_margin;		/* time from picture to sync	*/
+	__u32 upper_margin;		/* time from sync to picture	*/
+	__u32 lower_margin;
+	__u32 hsync_len;		/* length of horizontal sync	*/
+	__u32 vsync_len;		/* length of vertical sync	*/
+	__u32 sync;			/* see FB_SYNC_*		*/
+	__u32 vmode;			/* see FB_VMODE_*		*/
+	__u32 rotate;			/* angle we rotate counter clockwise */
+	__u32 colorspace;		/* colorspace for FOURCC-based modes */
+	__u32 reserved[4];		/* Reserved for future compatibility */
+};
+
+To modify variable information, applications call the FBIOPUT_VSCREENINFO
+ioctl with a pointer to a fb_var_screeninfo structure. If the call is
+successful, the driver will update the fixed screen information accordingly.
+
+Instead of filling the complete fb_var_screeninfo structure manually,
+applications should call the FBIOGET_VSCREENINFO ioctl and modify only the
+fields they care about.
+
+
+4. Format configuration
+-----------------------
+
+Frame buffer devices offer two ways to configure the frame buffer format: the
+legacy API and the FOURCC-based API.
+
+
+The legacy API has been the only frame buffer format configuration API for a
+long time and is thus widely used by application. It is the recommended API
+for applications when using RGB and grayscale formats, as well as legacy
+non-standard formats.
+
+To select a format, applications set the fb_var_screeninfo bits_per_pixel field
+to the desired frame buffer depth. Values up to 8 will usually map to
+monochrome, grayscale or pseudocolor visuals, although this is not required.
+
+- For grayscale formats, applications set the grayscale field to one. The red,
+  blue, green and transp fields must be set to 0 by applications and ignored by
+  drivers. Drivers must fill the red, blue and green offsets to 0 and lengths
+  to the bits_per_pixel value.
+
+- For pseudocolor formats, applications set the grayscale field to zero. The
+  red, blue, green and transp fields must be set to 0 by applications and
+  ignored by drivers. Drivers must fill the red, blue and green offsets to 0
+  and lengths to the bits_per_pixel value.
+
+- For truecolor and directcolor formats, applications set the grayscale field
+  to zero, and the red, blue, green and transp fields to describe the layout of
+  color components in memory.
+
+struct fb_bitfield {
+	__u32 offset;			/* beginning of bitfield	*/
+	__u32 length;			/* length of bitfield		*/
+	__u32 msb_right;		/* != 0 : Most significant bit is */
+					/* right */
+};
+
+  Pixel values are bits_per_pixel wide and are split in non-overlapping red,
+  green, blue and alpha (transparency) components. Location and size of each
+  component in the pixel value are described by the fb_bitfield offset and
+  length fields. Offset are computed from the right.
+
+  Pixels are always stored in an integer number of bytes. If the number of
+  bits per pixel is not a multiple of 8, pixel values are padded to the next
+  multiple of 8 bits.
+
+Upon successful format configuration, drivers update the fb_fix_screeninfo
+type, visual and line_length fields depending on the selected format.
+
+
+The FOURCC-based API replaces format descriptions by four character codes
+(FOURCC). FOURCCs are abstract identifiers that uniquely define a format
+without explicitly describing it. This is the only API that supports YUV
+formats. Drivers are also encouraged to implement the FOURCC-based API for RGB
+and grayscale formats.
+
+Drivers that support the FOURCC-based API report this capability by setting
+the FB_CAP_FOURCC bit in the fb_fix_screeninfo capabilities field.
+
+FOURCC definitions are located in the linux/videodev2.h header. However, and
+despite starting with the V4L2_PIX_FMT_prefix, they are not restricted to V4L2
+and don't require usage of the V4L2 subsystem. FOURCC documentation is
+available in Documentation/DocBook/v4l/pixfmt.xml.
+
+To select a format, applications set the grayscale field to the desired FOURCC.
+For YUV formats, they should also select the appropriate colorspace by setting
+the colorspace field to one of the colorspaces listed in linux/videodev2.h and
+documented in Documentation/DocBook/v4l/colorspaces.xml.
+
+The red, green, blue and transp fields are not used with the FOURCC-based API.
+For forward compatibility reasons applications must zero those fields, and
+drivers must ignore them. Values other than 0 may get a meaning in future
+extensions.
+
+Upon successful format configuration, drivers update the fb_fix_screeninfo
+type, visual and line_length fields depending on the selected format. The type
+and visual fields are set to FB_TYPE_FOURCC and FB_VISUAL_FOURCC respectively.
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index ad936295d8f4..ac9141b85356 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -967,6 +967,20 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 	    memcmp(&info->var, var, sizeof(struct fb_var_screeninfo))) {
 		u32 activate = var->activate;
 
+		/* When using FOURCC mode, make sure the red, green, blue and
+		 * transp fields are set to 0.
+		 */
+		if ((info->fix.capabilities & FB_CAP_FOURCC) &&
+		    var->grayscale > 1) {
+			if (var->red.offset     || var->green.offset    ||
+			    var->blue.offset    || var->transp.offset   ||
+			    var->red.length     || var->green.length    ||
+			    var->blue.length    || var->transp.length   ||
+			    var->red.msb_right  || var->green.msb_right ||
+			    var->blue.msb_right || var->transp.msb_right)
+				return -EINVAL;
+		}
+
 		if (!info->fbops->fb_check_var) {
 			*var = info->var;
 			goto done;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 1d6836c498dd..c18122f40543 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -45,6 +45,7 @@
 #define FB_TYPE_INTERLEAVED_PLANES	2	/* Interleaved planes	*/
 #define FB_TYPE_TEXT			3	/* Text/attributes	*/
 #define FB_TYPE_VGA_PLANES		4	/* EGA/VGA planes	*/
+#define FB_TYPE_FOURCC			5	/* Type identified by a V4L2 FOURCC */
 
 #define FB_AUX_TEXT_MDA		0	/* Monochrome text */
 #define FB_AUX_TEXT_CGA		1	/* CGA/EGA/VGA Color text */
@@ -69,6 +70,7 @@
 #define FB_VISUAL_PSEUDOCOLOR		3	/* Pseudo color (like atari) */
 #define FB_VISUAL_DIRECTCOLOR		4	/* Direct color */
 #define FB_VISUAL_STATIC_PSEUDOCOLOR	5	/* Pseudo color readonly */
+#define FB_VISUAL_FOURCC		6	/* Visual identified by a V4L2 FOURCC */
 
 #define FB_ACCEL_NONE		0	/* no hardware accelerator	*/
 #define FB_ACCEL_ATARIBLITT	1	/* Atari Blitter		*/
@@ -154,6 +156,8 @@
 
 #define FB_ACCEL_PUV3_UNIGFX	0xa0	/* PKUnity-v3 Unigfx		*/
 
+#define FB_CAP_FOURCC		1	/* Device supports FOURCC-based formats */
+
 struct fb_fix_screeninfo {
 	char id[16];			/* identification string eg "TT Builtin" */
 	unsigned long smem_start;	/* Start of frame buffer mem */
@@ -171,7 +175,8 @@ struct fb_fix_screeninfo {
 	__u32 mmio_len;			/* Length of Memory Mapped I/O  */
 	__u32 accel;			/* Indicate to driver which	*/
 					/*  specific chip/card we have	*/
-	__u16 reserved[3];		/* Reserved for future compatibility */
+	__u16 capabilities;		/* see FB_CAP_*			*/
+	__u16 reserved[2];		/* Reserved for future compatibility */
 };
 
 /* Interpretation of offset for color fields: All offsets are from the right,
@@ -246,8 +251,8 @@ struct fb_var_screeninfo {
 	__u32 yoffset;			/* resolution			*/
 
 	__u32 bits_per_pixel;		/* guess what			*/
-	__u32 grayscale;		/* != 0 Graylevels instead of colors */
-
+	__u32 grayscale;		/* 0 = color, 1 = grayscale,	*/
+					/* >1 = FOURCC			*/
 	struct fb_bitfield red;		/* bitfield in fb mem if true color, */
 	struct fb_bitfield green;	/* else only length is significant */
 	struct fb_bitfield blue;
@@ -273,7 +278,8 @@ struct fb_var_screeninfo {
 	__u32 sync;			/* see FB_SYNC_*		*/
 	__u32 vmode;			/* see FB_VMODE_*		*/
 	__u32 rotate;			/* angle we rotate counter clockwise */
-	__u32 reserved[5];		/* Reserved for future compatibility */
+	__u32 colorspace;		/* colorspace for FOURCC-based modes */
+	__u32 reserved[4];		/* Reserved for future compatibility */
 };
 
 struct fb_cmap {
-- 
cgit v1.2.3


From 0b9eabd77f4867232a9ac6ca54fa39607b0c9bc7 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Dec 2011 14:02:27 +0100
Subject: v4l: Add V4L2_PIX_FMT_NV24 and V4L2_PIX_FMT_NV42 formats

NV24 and NV42 are planar YCbCr 4:4:4 and YCrCb 4:4:4 formats with a
luma plane followed by an interleaved chroma plane.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
---
 Documentation/DocBook/media/v4l/pixfmt-nv24.xml | 121 ++++++++++++++++++++++++
 Documentation/DocBook/media/v4l/pixfmt.xml      |   1 +
 include/linux/videodev2.h                       |   2 +
 3 files changed, 124 insertions(+)
 create mode 100644 Documentation/DocBook/media/v4l/pixfmt-nv24.xml

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/v4l/pixfmt-nv24.xml b/Documentation/DocBook/media/v4l/pixfmt-nv24.xml
new file mode 100644
index 000000000000..fb255f2ca9dd
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-nv24.xml
@@ -0,0 +1,121 @@
+    <refentry>
+      <refmeta>
+	<refentrytitle>V4L2_PIX_FMT_NV24 ('NV24'), V4L2_PIX_FMT_NV42 ('NV42')</refentrytitle>
+	&manvol;
+      </refmeta>
+      <refnamediv>
+	<refname id="V4L2-PIX-FMT-NV24"><constant>V4L2_PIX_FMT_NV24</constant></refname>
+	<refname id="V4L2-PIX-FMT-NV42"><constant>V4L2_PIX_FMT_NV42</constant></refname>
+	<refpurpose>Formats with full horizontal and vertical
+chroma resolutions, also known as YUV 4:4:4. One luminance and one
+chrominance plane with alternating chroma samples as opposed to
+<constant>V4L2_PIX_FMT_YVU420</constant></refpurpose>
+      </refnamediv>
+      <refsect1>
+	<title>Description</title>
+
+	<para>These are two-plane versions of the YUV 4:4:4 format. The three
+	components are separated into two sub-images or planes. The Y plane is
+	first, with each Y sample stored in one byte per pixel. For
+	<constant>V4L2_PIX_FMT_NV24</constant>, a combined CbCr plane
+	immediately follows the Y plane in memory. The CbCr plane has the same
+	width and height, in pixels, as the Y plane (and the image). Each line
+	contains one CbCr pair per pixel, with each Cb and Cr sample stored in
+	one byte. <constant>V4L2_PIX_FMT_NV42</constant> is the same except that
+	the Cb and Cr samples are swapped, the CrCb plane starts with a Cr
+	sample.</para>
+
+	<para>If the Y plane has pad bytes after each row, then the CbCr plane
+	has twice as many pad bytes after its rows.</para>
+
+	<example>
+	  <title><constant>V4L2_PIX_FMT_NV24</constant> 4 &times; 4
+pixel image</title>
+
+	  <formalpara>
+	    <title>Byte Order.</title>
+	    <para>Each cell is one byte.
+		<informaltable frame="none">
+		<tgroup cols="9" align="center">
+		  <colspec align="left" colwidth="2*" />
+		  <tbody valign="top">
+		    <row>
+		      <entry>start&nbsp;+&nbsp;0:</entry>
+		      <entry>Y'<subscript>00</subscript></entry>
+		      <entry>Y'<subscript>01</subscript></entry>
+		      <entry>Y'<subscript>02</subscript></entry>
+		      <entry>Y'<subscript>03</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;4:</entry>
+		      <entry>Y'<subscript>10</subscript></entry>
+		      <entry>Y'<subscript>11</subscript></entry>
+		      <entry>Y'<subscript>12</subscript></entry>
+		      <entry>Y'<subscript>13</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;8:</entry>
+		      <entry>Y'<subscript>20</subscript></entry>
+		      <entry>Y'<subscript>21</subscript></entry>
+		      <entry>Y'<subscript>22</subscript></entry>
+		      <entry>Y'<subscript>23</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;12:</entry>
+		      <entry>Y'<subscript>30</subscript></entry>
+		      <entry>Y'<subscript>31</subscript></entry>
+		      <entry>Y'<subscript>32</subscript></entry>
+		      <entry>Y'<subscript>33</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;16:</entry>
+		      <entry>Cb<subscript>00</subscript></entry>
+		      <entry>Cr<subscript>00</subscript></entry>
+		      <entry>Cb<subscript>01</subscript></entry>
+		      <entry>Cr<subscript>01</subscript></entry>
+		      <entry>Cb<subscript>02</subscript></entry>
+		      <entry>Cr<subscript>02</subscript></entry>
+		      <entry>Cb<subscript>03</subscript></entry>
+		      <entry>Cr<subscript>03</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;24:</entry>
+		      <entry>Cb<subscript>10</subscript></entry>
+		      <entry>Cr<subscript>10</subscript></entry>
+		      <entry>Cb<subscript>11</subscript></entry>
+		      <entry>Cr<subscript>11</subscript></entry>
+		      <entry>Cb<subscript>12</subscript></entry>
+		      <entry>Cr<subscript>12</subscript></entry>
+		      <entry>Cb<subscript>13</subscript></entry>
+		      <entry>Cr<subscript>13</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;32:</entry>
+		      <entry>Cb<subscript>20</subscript></entry>
+		      <entry>Cr<subscript>20</subscript></entry>
+		      <entry>Cb<subscript>21</subscript></entry>
+		      <entry>Cr<subscript>21</subscript></entry>
+		      <entry>Cb<subscript>22</subscript></entry>
+		      <entry>Cr<subscript>22</subscript></entry>
+		      <entry>Cb<subscript>23</subscript></entry>
+		      <entry>Cr<subscript>23</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;40:</entry>
+		      <entry>Cb<subscript>30</subscript></entry>
+		      <entry>Cr<subscript>30</subscript></entry>
+		      <entry>Cb<subscript>31</subscript></entry>
+		      <entry>Cr<subscript>31</subscript></entry>
+		      <entry>Cb<subscript>32</subscript></entry>
+		      <entry>Cr<subscript>32</subscript></entry>
+		      <entry>Cb<subscript>33</subscript></entry>
+		      <entry>Cr<subscript>33</subscript></entry>
+		    </row>
+		  </tbody>
+		</tgroup>
+		</informaltable>
+	      </para>
+	  </formalpara>
+	</example>
+      </refsect1>
+    </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index 2ff6b7776d7f..aef4615fb07b 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -714,6 +714,7 @@ information.</para>
     &sub-nv12m;
     &sub-nv12mt;
     &sub-nv16;
+    &sub-nv24;
     &sub-m420;
   </section>
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 4b752d5ee80e..d2f74f8e3fe3 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -343,6 +343,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_NV21    v4l2_fourcc('N', 'V', '2', '1') /* 12  Y/CrCb 4:2:0  */
 #define V4L2_PIX_FMT_NV16    v4l2_fourcc('N', 'V', '1', '6') /* 16  Y/CbCr 4:2:2  */
 #define V4L2_PIX_FMT_NV61    v4l2_fourcc('N', 'V', '6', '1') /* 16  Y/CrCb 4:2:2  */
+#define V4L2_PIX_FMT_NV24    v4l2_fourcc('N', 'V', '2', '4') /* 24  Y/CbCr 4:4:4  */
+#define V4L2_PIX_FMT_NV42    v4l2_fourcc('N', 'V', '4', '2') /* 24  Y/CrCb 4:4:4  */
 
 /* two non contiguous planes - one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12M   v4l2_fourcc('N', 'M', '1', '2') /* 12  Y/CbCr 4:2:0  */
-- 
cgit v1.2.3


From 114d6e9c103736487c967060d0a7aec9a7fce967 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 19 Dec 2011 11:32:56 -0800
Subject: security: update security_file_mmap() docs

This documents the fields added to security_file_mmap() that were
introduced in ed0321895182ffb6ecf210e066d87911b270d587.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 051d4e96cb1f..16bb52a65fa3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -590,6 +590,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@reqprot contains the protection requested by the application.
  *	@prot contains the protection that will be applied by the kernel.
  *	@flags contains the operational flags.
+ *	@addr contains virtual address that will be used for the operation.
+ *	@addr_only contains a boolean: 0 if file-backed VMA, otherwise 1.
  *	Return 0 if permission is granted.
  * @file_mprotect:
  *	Check permissions before changing memory access permissions.
-- 
cgit v1.2.3


From 5e6c86b017691230b6b47f19b7d5449997e8a0b8 Mon Sep 17 00:00:00 2001
From: Neil Zhang <zhangwm@marvell.com>
Date: Tue, 20 Dec 2011 13:20:21 +0800
Subject: usb: gadget: mv_udc: drop ARCH dependency

This patch do the following things:
1. Change the Kconfig information.
2. Rename the driver name.
3. Don't do any type cast to io memory.
4. Add dummy stub for clk framework.

Signed-off-by: Neil Zhang <zhangwm@marvell.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/Kconfig           | 10 +++++-----
 drivers/usb/gadget/Makefile          |  2 +-
 drivers/usb/gadget/mv_udc.h          |  2 +-
 drivers/usb/gadget/mv_udc_core.c     | 17 ++++++++---------
 include/linux/platform_data/mv_usb.h | 12 ++++++++++--
 5 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 953b00cd59ff..f1a5409e99f7 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -309,13 +309,13 @@ config USB_S3C_HSUDC
 
 	  This driver has been tested on S3C2416 and S3C2450 processors.
 
-config USB_PXA_U2O
-	tristate "PXA9xx Processor USB2.0 controller"
-	depends on ARCH_MMP
+config USB_MV_UDC
+	tristate "Marvell USB2.0 Device Controller"
 	select USB_GADGET_DUALSPEED
 	help
-	  PXA9xx Processor series include a high speed USB2.0 device
-	  controller, which support high speed and full speed USB peripheral.
+	  Marvell Socs (including PXA and MMP series) include a high speed
+	  USB2.0 OTG controller, which can be configured as high speed or
+	  full speed USB peripheral.
 
 config USB_GADGET_DWC3
 	tristate "DesignWare USB3.0 (DRD) Controller"
diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile
index b54ac6190890..b7f6eefc3927 100644
--- a/drivers/usb/gadget/Makefile
+++ b/drivers/usb/gadget/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_USB_S3C_HSOTG)	+= s3c-hsotg.o
 obj-$(CONFIG_USB_S3C_HSUDC)	+= s3c-hsudc.o
 obj-$(CONFIG_USB_LANGWELL)	+= langwell_udc.o
 obj-$(CONFIG_USB_EG20T)		+= pch_udc.o
-obj-$(CONFIG_USB_PXA_U2O)	+= mv_udc.o
+obj-$(CONFIG_USB_MV_UDC)	+= mv_udc.o
 mv_udc-y			:= mv_udc_core.o
 obj-$(CONFIG_USB_CI13XXX_MSM)	+= ci13xxx_msm.o
 obj-$(CONFIG_USB_FUSB300)	+= fusb300_udc.o
diff --git a/drivers/usb/gadget/mv_udc.h b/drivers/usb/gadget/mv_udc.h
index 3d8404484613..34aadfae723d 100644
--- a/drivers/usb/gadget/mv_udc.h
+++ b/drivers/usb/gadget/mv_udc.h
@@ -180,7 +180,7 @@ struct mv_udc {
 
 	struct mv_cap_regs __iomem	*cap_regs;
 	struct mv_op_regs __iomem	*op_regs;
-	unsigned int			phy_regs;
+	void __iomem                    *phy_regs;
 	unsigned int			max_eps;
 	struct mv_dqh			*ep_dqh;
 	size_t				ep_dqh_size;
diff --git a/drivers/usb/gadget/mv_udc_core.c b/drivers/usb/gadget/mv_udc_core.c
index a3a7664add19..f0596ac533e0 100644
--- a/drivers/usb/gadget/mv_udc_core.c
+++ b/drivers/usb/gadget/mv_udc_core.c
@@ -2128,11 +2128,9 @@ static int __devexit mv_udc_remove(struct platform_device *dev)
 
 	if (udc->cap_regs)
 		iounmap(udc->cap_regs);
-	udc->cap_regs = NULL;
 
 	if (udc->phy_regs)
-		iounmap((void *)udc->phy_regs);
-	udc->phy_regs = 0;
+		iounmap(udc->phy_regs);
 
 	if (udc->status_req) {
 		kfree(udc->status_req->req.buf);
@@ -2217,8 +2215,8 @@ static int __devinit mv_udc_probe(struct platform_device *dev)
 		goto err_iounmap_capreg;
 	}
 
-	udc->phy_regs = (unsigned int)ioremap(r->start, resource_size(r));
-	if (udc->phy_regs == 0) {
+	udc->phy_regs = ioremap(r->start, resource_size(r));
+	if (udc->phy_regs == NULL) {
 		dev_err(&dev->dev, "failed to map phy I/O memory\n");
 		retval = -EBUSY;
 		goto err_iounmap_capreg;
@@ -2229,7 +2227,8 @@ static int __devinit mv_udc_probe(struct platform_device *dev)
 	if (retval)
 		goto err_iounmap_phyreg;
 
-	udc->op_regs = (struct mv_op_regs __iomem *)((u32)udc->cap_regs
+	udc->op_regs =
+		(struct mv_op_regs __iomem *)((unsigned long)udc->cap_regs
 		+ (readl(&udc->cap_regs->caplength_hciversion)
 			& CAPLENGTH_MASK));
 	udc->max_eps = readl(&udc->cap_regs->dccparams) & DCCPARAMS_DEN_MASK;
@@ -2389,7 +2388,7 @@ err_free_dma:
 err_disable_clock:
 	mv_udc_disable_internal(udc);
 err_iounmap_phyreg:
-	iounmap((void *)udc->phy_regs);
+	iounmap(udc->phy_regs);
 err_iounmap_capreg:
 	iounmap(udc->cap_regs);
 err_put_clk:
@@ -2480,13 +2479,13 @@ static struct platform_driver udc_driver = {
 	.shutdown	= mv_udc_shutdown,
 	.driver		= {
 		.owner	= THIS_MODULE,
-		.name	= "pxa-u2o",
+		.name	= "mv-udc",
 #ifdef CONFIG_PM
 		.pm	= &mv_udc_pm_ops,
 #endif
 	},
 };
-MODULE_ALIAS("platform:pxa-u2o");
+MODULE_ALIAS("platform:mv-udc");
 
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_AUTHOR("Chao Xie <chao.xie@marvell.com>");
diff --git a/include/linux/platform_data/mv_usb.h b/include/linux/platform_data/mv_usb.h
index e9d9149ddf38..a642cf20ad6e 100644
--- a/include/linux/platform_data/mv_usb.h
+++ b/include/linux/platform_data/mv_usb.h
@@ -42,9 +42,17 @@ struct mv_usb_platform_data {
 	/* only valid for HCD. OTG or Host only*/
 	unsigned int		mode;
 
-	int     (*phy_init)(unsigned int regbase);
-	void    (*phy_deinit)(unsigned int regbase);
+	int	(*phy_init)(void __iomem *regbase);
+	void	(*phy_deinit)(void __iomem *regbase);
 	int	(*set_vbus)(unsigned int vbus);
 };
 
+#ifndef CONFIG_HAVE_CLK
+/* Dummy stub for clk framework */
+#define clk_get(dev, id)       NULL
+#define clk_put(clock)         do {} while (0)
+#define clk_enable(clock)      do {} while (0)
+#define clk_disable(clock)     do {} while (0)
+#endif
+
 #endif
-- 
cgit v1.2.3


From 277164f03f466b7a1ea0d0c3dac8b8a0599ce0dc Mon Sep 17 00:00:00 2001
From: Neil Zhang <zhangwm@marvell.com>
Date: Tue, 20 Dec 2011 13:20:22 +0800
Subject: USB: OTG: add Marvell usb OTG driver support

This driver is for ChipIdea USB OTG controller on Marvell Socs.
PXA9xx/MMP2/MMP3/MGx all have this USB OTG controller.

Signed-off-by: Neil Zhang <zhangwm@marvell.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/otg/Kconfig              |  12 +
 drivers/usb/otg/Makefile             |   1 +
 drivers/usb/otg/mv_otg.c             | 957 +++++++++++++++++++++++++++++++++++
 drivers/usb/otg/mv_otg.h             | 165 ++++++
 include/linux/platform_data/mv_usb.h |   5 +
 5 files changed, 1140 insertions(+)
 create mode 100644 drivers/usb/otg/mv_otg.c
 create mode 100644 drivers/usb/otg/mv_otg.h

(limited to 'include/linux')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index c66481ad98d7..c4e2cb760c66 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -130,4 +130,16 @@ config FSL_USB2_OTG
 	help
 	  Enable this to support Freescale USB OTG transceiver.
 
+config USB_MV_OTG
+	tristate "Marvell USB OTG support"
+	depends on USB_MV_UDC
+	select USB_OTG
+	select USB_OTG_UTILS
+	help
+	  Say Y here if you want to build Marvell USB OTG transciever
+	  driver in kernel (including PXA and MMP series). This driver
+	  implements role switch between EHCI host driver and gadget driver.
+
+	  To compile this driver as a module, choose M here.
+
 endif # USB || OTG
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index 566655c53331..b2c5a9598637 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_USB_MSM_OTG)	+= msm_otg.o
 obj-$(CONFIG_AB8500_USB)	+= ab8500-usb.o
 fsl_usb2_otg-objs		:= fsl_otg.o otg_fsm.o
 obj-$(CONFIG_FSL_USB2_OTG)	+= fsl_usb2_otg.o
+obj-$(CONFIG_USB_MV_OTG)	+= mv_otg.o
diff --git a/drivers/usb/otg/mv_otg.c b/drivers/usb/otg/mv_otg.c
new file mode 100644
index 000000000000..db0d4fcdc8e2
--- /dev/null
+++ b/drivers/usb/otg/mv_otg.c
@@ -0,0 +1,957 @@
+/*
+ * Copyright (C) 2011 Marvell International Ltd. All rights reserved.
+ * Author: Chao Xie <chao.xie@marvell.com>
+ *	   Neil Zhang <zhangwm@marvell.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/device.h>
+#include <linux/proc_fs.h>
+#include <linux/clk.h>
+#include <linux/workqueue.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/otg.h>
+#include <linux/usb/gadget.h>
+#include <linux/usb/hcd.h>
+#include <linux/platform_data/mv_usb.h>
+
+#include "mv_otg.h"
+
+#define	DRIVER_DESC	"Marvell USB OTG transceiver driver"
+#define	DRIVER_VERSION	"Jan 20, 2010"
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+
+static const char driver_name[] = "mv-otg";
+
+static char *state_string[] = {
+	"undefined",
+	"b_idle",
+	"b_srp_init",
+	"b_peripheral",
+	"b_wait_acon",
+	"b_host",
+	"a_idle",
+	"a_wait_vrise",
+	"a_wait_bcon",
+	"a_host",
+	"a_suspend",
+	"a_peripheral",
+	"a_wait_vfall",
+	"a_vbus_err"
+};
+
+static int mv_otg_set_vbus(struct otg_transceiver *otg, bool on)
+{
+	struct mv_otg *mvotg = container_of(otg, struct mv_otg, otg);
+	if (mvotg->pdata->set_vbus == NULL)
+		return -ENODEV;
+
+	return mvotg->pdata->set_vbus(on);
+}
+
+static int mv_otg_set_host(struct otg_transceiver *otg,
+			   struct usb_bus *host)
+{
+	otg->host = host;
+
+	return 0;
+}
+
+static int mv_otg_set_peripheral(struct otg_transceiver *otg,
+				 struct usb_gadget *gadget)
+{
+	otg->gadget = gadget;
+
+	return 0;
+}
+
+static void mv_otg_run_state_machine(struct mv_otg *mvotg,
+				     unsigned long delay)
+{
+	dev_dbg(&mvotg->pdev->dev, "transceiver is updated\n");
+	if (!mvotg->qwork)
+		return;
+
+	queue_delayed_work(mvotg->qwork, &mvotg->work, delay);
+}
+
+static void mv_otg_timer_await_bcon(unsigned long data)
+{
+	struct mv_otg *mvotg = (struct mv_otg *) data;
+
+	mvotg->otg_ctrl.a_wait_bcon_timeout = 1;
+
+	dev_info(&mvotg->pdev->dev, "B Device No Response!\n");
+
+	if (spin_trylock(&mvotg->wq_lock)) {
+		mv_otg_run_state_machine(mvotg, 0);
+		spin_unlock(&mvotg->wq_lock);
+	}
+}
+
+static int mv_otg_cancel_timer(struct mv_otg *mvotg, unsigned int id)
+{
+	struct timer_list *timer;
+
+	if (id >= OTG_TIMER_NUM)
+		return -EINVAL;
+
+	timer = &mvotg->otg_ctrl.timer[id];
+
+	if (timer_pending(timer))
+		del_timer(timer);
+
+	return 0;
+}
+
+static int mv_otg_set_timer(struct mv_otg *mvotg, unsigned int id,
+			    unsigned long interval,
+			    void (*callback) (unsigned long))
+{
+	struct timer_list *timer;
+
+	if (id >= OTG_TIMER_NUM)
+		return -EINVAL;
+
+	timer = &mvotg->otg_ctrl.timer[id];
+	if (timer_pending(timer)) {
+		dev_err(&mvotg->pdev->dev, "Timer%d is already running\n", id);
+		return -EBUSY;
+	}
+
+	init_timer(timer);
+	timer->data = (unsigned long) mvotg;
+	timer->function = callback;
+	timer->expires = jiffies + interval;
+	add_timer(timer);
+
+	return 0;
+}
+
+static int mv_otg_reset(struct mv_otg *mvotg)
+{
+	unsigned int loops;
+	u32 tmp;
+
+	/* Stop the controller */
+	tmp = readl(&mvotg->op_regs->usbcmd);
+	tmp &= ~USBCMD_RUN_STOP;
+	writel(tmp, &mvotg->op_regs->usbcmd);
+
+	/* Reset the controller to get default values */
+	writel(USBCMD_CTRL_RESET, &mvotg->op_regs->usbcmd);
+
+	loops = 500;
+	while (readl(&mvotg->op_regs->usbcmd) & USBCMD_CTRL_RESET) {
+		if (loops == 0) {
+			dev_err(&mvotg->pdev->dev,
+				"Wait for RESET completed TIMEOUT\n");
+			return -ETIMEDOUT;
+		}
+		loops--;
+		udelay(20);
+	}
+
+	writel(0x0, &mvotg->op_regs->usbintr);
+	tmp = readl(&mvotg->op_regs->usbsts);
+	writel(tmp, &mvotg->op_regs->usbsts);
+
+	return 0;
+}
+
+static void mv_otg_init_irq(struct mv_otg *mvotg)
+{
+	u32 otgsc;
+
+	mvotg->irq_en = OTGSC_INTR_A_SESSION_VALID
+	    | OTGSC_INTR_A_VBUS_VALID;
+	mvotg->irq_status = OTGSC_INTSTS_A_SESSION_VALID
+	    | OTGSC_INTSTS_A_VBUS_VALID;
+
+	if (mvotg->pdata->vbus == NULL) {
+		mvotg->irq_en |= OTGSC_INTR_B_SESSION_VALID
+		    | OTGSC_INTR_B_SESSION_END;
+		mvotg->irq_status |= OTGSC_INTSTS_B_SESSION_VALID
+		    | OTGSC_INTSTS_B_SESSION_END;
+	}
+
+	if (mvotg->pdata->id == NULL) {
+		mvotg->irq_en |= OTGSC_INTR_USB_ID;
+		mvotg->irq_status |= OTGSC_INTSTS_USB_ID;
+	}
+
+	otgsc = readl(&mvotg->op_regs->otgsc);
+	otgsc |= mvotg->irq_en;
+	writel(otgsc, &mvotg->op_regs->otgsc);
+}
+
+static void mv_otg_start_host(struct mv_otg *mvotg, int on)
+{
+	struct otg_transceiver *otg = &mvotg->otg;
+	struct usb_hcd *hcd;
+
+	if (!otg->host)
+		return;
+
+	dev_info(&mvotg->pdev->dev, "%s host\n", on ? "start" : "stop");
+
+	hcd = bus_to_hcd(otg->host);
+
+	if (on)
+		usb_add_hcd(hcd, hcd->irq, IRQF_SHARED);
+	else
+		usb_remove_hcd(hcd);
+}
+
+static void mv_otg_start_periphrals(struct mv_otg *mvotg, int on)
+{
+	struct otg_transceiver *otg = &mvotg->otg;
+
+	if (!otg->gadget)
+		return;
+
+	dev_info(otg->dev, "gadget %s\n", on ? "on" : "off");
+
+	if (on)
+		usb_gadget_vbus_connect(otg->gadget);
+	else
+		usb_gadget_vbus_disconnect(otg->gadget);
+}
+
+static void otg_clock_enable(struct mv_otg *mvotg)
+{
+	unsigned int i;
+
+	for (i = 0; i < mvotg->clknum; i++)
+		clk_enable(mvotg->clk[i]);
+}
+
+static void otg_clock_disable(struct mv_otg *mvotg)
+{
+	unsigned int i;
+
+	for (i = 0; i < mvotg->clknum; i++)
+		clk_disable(mvotg->clk[i]);
+}
+
+static int mv_otg_enable_internal(struct mv_otg *mvotg)
+{
+	int retval = 0;
+
+	if (mvotg->active)
+		return 0;
+
+	dev_dbg(&mvotg->pdev->dev, "otg enabled\n");
+
+	otg_clock_enable(mvotg);
+	if (mvotg->pdata->phy_init) {
+		retval = mvotg->pdata->phy_init(mvotg->phy_regs);
+		if (retval) {
+			dev_err(&mvotg->pdev->dev,
+				"init phy error %d\n", retval);
+			otg_clock_disable(mvotg);
+			return retval;
+		}
+	}
+	mvotg->active = 1;
+
+	return 0;
+
+}
+
+static int mv_otg_enable(struct mv_otg *mvotg)
+{
+	if (mvotg->clock_gating)
+		return mv_otg_enable_internal(mvotg);
+
+	return 0;
+}
+
+static void mv_otg_disable_internal(struct mv_otg *mvotg)
+{
+	if (mvotg->active) {
+		dev_dbg(&mvotg->pdev->dev, "otg disabled\n");
+		if (mvotg->pdata->phy_deinit)
+			mvotg->pdata->phy_deinit(mvotg->phy_regs);
+		otg_clock_disable(mvotg);
+		mvotg->active = 0;
+	}
+}
+
+static void mv_otg_disable(struct mv_otg *mvotg)
+{
+	if (mvotg->clock_gating)
+		mv_otg_disable_internal(mvotg);
+}
+
+static void mv_otg_update_inputs(struct mv_otg *mvotg)
+{
+	struct mv_otg_ctrl *otg_ctrl = &mvotg->otg_ctrl;
+	u32 otgsc;
+
+	otgsc = readl(&mvotg->op_regs->otgsc);
+
+	if (mvotg->pdata->vbus) {
+		if (mvotg->pdata->vbus->poll() == VBUS_HIGH) {
+			otg_ctrl->b_sess_vld = 1;
+			otg_ctrl->b_sess_end = 0;
+		} else {
+			otg_ctrl->b_sess_vld = 0;
+			otg_ctrl->b_sess_end = 1;
+		}
+	} else {
+		otg_ctrl->b_sess_vld = !!(otgsc & OTGSC_STS_B_SESSION_VALID);
+		otg_ctrl->b_sess_end = !!(otgsc & OTGSC_STS_B_SESSION_END);
+	}
+
+	if (mvotg->pdata->id)
+		otg_ctrl->id = !!mvotg->pdata->id->poll();
+	else
+		otg_ctrl->id = !!(otgsc & OTGSC_STS_USB_ID);
+
+	if (mvotg->pdata->otg_force_a_bus_req && !otg_ctrl->id)
+		otg_ctrl->a_bus_req = 1;
+
+	otg_ctrl->a_sess_vld = !!(otgsc & OTGSC_STS_A_SESSION_VALID);
+	otg_ctrl->a_vbus_vld = !!(otgsc & OTGSC_STS_A_VBUS_VALID);
+
+	dev_dbg(&mvotg->pdev->dev, "%s: ", __func__);
+	dev_dbg(&mvotg->pdev->dev, "id %d\n", otg_ctrl->id);
+	dev_dbg(&mvotg->pdev->dev, "b_sess_vld %d\n", otg_ctrl->b_sess_vld);
+	dev_dbg(&mvotg->pdev->dev, "b_sess_end %d\n", otg_ctrl->b_sess_end);
+	dev_dbg(&mvotg->pdev->dev, "a_vbus_vld %d\n", otg_ctrl->a_vbus_vld);
+	dev_dbg(&mvotg->pdev->dev, "a_sess_vld %d\n", otg_ctrl->a_sess_vld);
+}
+
+static void mv_otg_update_state(struct mv_otg *mvotg)
+{
+	struct mv_otg_ctrl *otg_ctrl = &mvotg->otg_ctrl;
+	struct otg_transceiver *otg = &mvotg->otg;
+	int old_state = otg->state;
+
+	switch (old_state) {
+	case OTG_STATE_UNDEFINED:
+		otg->state = OTG_STATE_B_IDLE;
+		/* FALL THROUGH */
+	case OTG_STATE_B_IDLE:
+		if (otg_ctrl->id == 0)
+			otg->state = OTG_STATE_A_IDLE;
+		else if (otg_ctrl->b_sess_vld)
+			otg->state = OTG_STATE_B_PERIPHERAL;
+		break;
+	case OTG_STATE_B_PERIPHERAL:
+		if (!otg_ctrl->b_sess_vld || otg_ctrl->id == 0)
+			otg->state = OTG_STATE_B_IDLE;
+		break;
+	case OTG_STATE_A_IDLE:
+		if (otg_ctrl->id)
+			otg->state = OTG_STATE_B_IDLE;
+		else if (!(otg_ctrl->a_bus_drop) &&
+			 (otg_ctrl->a_bus_req || otg_ctrl->a_srp_det))
+			otg->state = OTG_STATE_A_WAIT_VRISE;
+		break;
+	case OTG_STATE_A_WAIT_VRISE:
+		if (otg_ctrl->a_vbus_vld)
+			otg->state = OTG_STATE_A_WAIT_BCON;
+		break;
+	case OTG_STATE_A_WAIT_BCON:
+		if (otg_ctrl->id || otg_ctrl->a_bus_drop
+		    || otg_ctrl->a_wait_bcon_timeout) {
+			mv_otg_cancel_timer(mvotg, A_WAIT_BCON_TIMER);
+			mvotg->otg_ctrl.a_wait_bcon_timeout = 0;
+			otg->state = OTG_STATE_A_WAIT_VFALL;
+			otg_ctrl->a_bus_req = 0;
+		} else if (!otg_ctrl->a_vbus_vld) {
+			mv_otg_cancel_timer(mvotg, A_WAIT_BCON_TIMER);
+			mvotg->otg_ctrl.a_wait_bcon_timeout = 0;
+			otg->state = OTG_STATE_A_VBUS_ERR;
+		} else if (otg_ctrl->b_conn) {
+			mv_otg_cancel_timer(mvotg, A_WAIT_BCON_TIMER);
+			mvotg->otg_ctrl.a_wait_bcon_timeout = 0;
+			otg->state = OTG_STATE_A_HOST;
+		}
+		break;
+	case OTG_STATE_A_HOST:
+		if (otg_ctrl->id || !otg_ctrl->b_conn
+		    || otg_ctrl->a_bus_drop)
+			otg->state = OTG_STATE_A_WAIT_BCON;
+		else if (!otg_ctrl->a_vbus_vld)
+			otg->state = OTG_STATE_A_VBUS_ERR;
+		break;
+	case OTG_STATE_A_WAIT_VFALL:
+		if (otg_ctrl->id
+		    || (!otg_ctrl->b_conn && otg_ctrl->a_sess_vld)
+		    || otg_ctrl->a_bus_req)
+			otg->state = OTG_STATE_A_IDLE;
+		break;
+	case OTG_STATE_A_VBUS_ERR:
+		if (otg_ctrl->id || otg_ctrl->a_clr_err
+		    || otg_ctrl->a_bus_drop) {
+			otg_ctrl->a_clr_err = 0;
+			otg->state = OTG_STATE_A_WAIT_VFALL;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static void mv_otg_work(struct work_struct *work)
+{
+	struct mv_otg *mvotg;
+	struct otg_transceiver *otg;
+	int old_state;
+
+	mvotg = container_of((struct delayed_work *)work, struct mv_otg, work);
+
+run:
+	/* work queue is single thread, or we need spin_lock to protect */
+	otg = &mvotg->otg;
+	old_state = otg->state;
+
+	if (!mvotg->active)
+		return;
+
+	mv_otg_update_inputs(mvotg);
+	mv_otg_update_state(mvotg);
+
+	if (old_state != otg->state) {
+		dev_info(&mvotg->pdev->dev, "change from state %s to %s\n",
+			 state_string[old_state],
+			 state_string[otg->state]);
+
+		switch (otg->state) {
+		case OTG_STATE_B_IDLE:
+			mvotg->otg.default_a = 0;
+			if (old_state == OTG_STATE_B_PERIPHERAL)
+				mv_otg_start_periphrals(mvotg, 0);
+			mv_otg_reset(mvotg);
+			mv_otg_disable(mvotg);
+			break;
+		case OTG_STATE_B_PERIPHERAL:
+			mv_otg_enable(mvotg);
+			mv_otg_start_periphrals(mvotg, 1);
+			break;
+		case OTG_STATE_A_IDLE:
+			mvotg->otg.default_a = 1;
+			mv_otg_enable(mvotg);
+			if (old_state == OTG_STATE_A_WAIT_VFALL)
+				mv_otg_start_host(mvotg, 0);
+			mv_otg_reset(mvotg);
+			break;
+		case OTG_STATE_A_WAIT_VRISE:
+			mv_otg_set_vbus(&mvotg->otg, 1);
+			break;
+		case OTG_STATE_A_WAIT_BCON:
+			if (old_state != OTG_STATE_A_HOST)
+				mv_otg_start_host(mvotg, 1);
+			mv_otg_set_timer(mvotg, A_WAIT_BCON_TIMER,
+					 T_A_WAIT_BCON,
+					 mv_otg_timer_await_bcon);
+			/*
+			 * Now, we directly enter A_HOST. So set b_conn = 1
+			 * here. In fact, it need host driver to notify us.
+			 */
+			mvotg->otg_ctrl.b_conn = 1;
+			break;
+		case OTG_STATE_A_HOST:
+			break;
+		case OTG_STATE_A_WAIT_VFALL:
+			/*
+			 * Now, we has exited A_HOST. So set b_conn = 0
+			 * here. In fact, it need host driver to notify us.
+			 */
+			mvotg->otg_ctrl.b_conn = 0;
+			mv_otg_set_vbus(&mvotg->otg, 0);
+			break;
+		case OTG_STATE_A_VBUS_ERR:
+			break;
+		default:
+			break;
+		}
+		goto run;
+	}
+}
+
+static irqreturn_t mv_otg_irq(int irq, void *dev)
+{
+	struct mv_otg *mvotg = dev;
+	u32 otgsc;
+
+	otgsc = readl(&mvotg->op_regs->otgsc);
+	writel(otgsc, &mvotg->op_regs->otgsc);
+
+	/*
+	 * if we have vbus, then the vbus detection for B-device
+	 * will be done by mv_otg_inputs_irq().
+	 */
+	if (mvotg->pdata->vbus)
+		if ((otgsc & OTGSC_STS_USB_ID) &&
+		    !(otgsc & OTGSC_INTSTS_USB_ID))
+			return IRQ_NONE;
+
+	if ((otgsc & mvotg->irq_status) == 0)
+		return IRQ_NONE;
+
+	mv_otg_run_state_machine(mvotg, 0);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mv_otg_inputs_irq(int irq, void *dev)
+{
+	struct mv_otg *mvotg = dev;
+
+	/* The clock may disabled at this time */
+	if (!mvotg->active) {
+		mv_otg_enable(mvotg);
+		mv_otg_init_irq(mvotg);
+	}
+
+	mv_otg_run_state_machine(mvotg, 0);
+
+	return IRQ_HANDLED;
+}
+
+static ssize_t
+get_a_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct mv_otg *mvotg = dev_get_drvdata(dev);
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			 mvotg->otg_ctrl.a_bus_req);
+}
+
+static ssize_t
+set_a_bus_req(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct mv_otg *mvotg = dev_get_drvdata(dev);
+
+	if (count > 2)
+		return -1;
+
+	/* We will use this interface to change to A device */
+	if (mvotg->otg.state != OTG_STATE_B_IDLE
+	    && mvotg->otg.state != OTG_STATE_A_IDLE)
+		return -1;
+
+	/* The clock may disabled and we need to set irq for ID detected */
+	mv_otg_enable(mvotg);
+	mv_otg_init_irq(mvotg);
+
+	if (buf[0] == '1') {
+		mvotg->otg_ctrl.a_bus_req = 1;
+		mvotg->otg_ctrl.a_bus_drop = 0;
+		dev_dbg(&mvotg->pdev->dev,
+			"User request: a_bus_req = 1\n");
+
+		if (spin_trylock(&mvotg->wq_lock)) {
+			mv_otg_run_state_machine(mvotg, 0);
+			spin_unlock(&mvotg->wq_lock);
+		}
+	}
+
+	return count;
+}
+
+static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUSR, get_a_bus_req,
+		   set_a_bus_req);
+
+static ssize_t
+set_a_clr_err(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct mv_otg *mvotg = dev_get_drvdata(dev);
+	if (!mvotg->otg.default_a)
+		return -1;
+
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '1') {
+		mvotg->otg_ctrl.a_clr_err = 1;
+		dev_dbg(&mvotg->pdev->dev,
+			"User request: a_clr_err = 1\n");
+	}
+
+	if (spin_trylock(&mvotg->wq_lock)) {
+		mv_otg_run_state_machine(mvotg, 0);
+		spin_unlock(&mvotg->wq_lock);
+	}
+
+	return count;
+}
+
+static DEVICE_ATTR(a_clr_err, S_IWUSR, NULL, set_a_clr_err);
+
+static ssize_t
+get_a_bus_drop(struct device *dev, struct device_attribute *attr,
+	       char *buf)
+{
+	struct mv_otg *mvotg = dev_get_drvdata(dev);
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			 mvotg->otg_ctrl.a_bus_drop);
+}
+
+static ssize_t
+set_a_bus_drop(struct device *dev, struct device_attribute *attr,
+	       const char *buf, size_t count)
+{
+	struct mv_otg *mvotg = dev_get_drvdata(dev);
+	if (!mvotg->otg.default_a)
+		return -1;
+
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '0') {
+		mvotg->otg_ctrl.a_bus_drop = 0;
+		dev_dbg(&mvotg->pdev->dev,
+			"User request: a_bus_drop = 0\n");
+	} else if (buf[0] == '1') {
+		mvotg->otg_ctrl.a_bus_drop = 1;
+		mvotg->otg_ctrl.a_bus_req = 0;
+		dev_dbg(&mvotg->pdev->dev,
+			"User request: a_bus_drop = 1\n");
+		dev_dbg(&mvotg->pdev->dev,
+			"User request: and a_bus_req = 0\n");
+	}
+
+	if (spin_trylock(&mvotg->wq_lock)) {
+		mv_otg_run_state_machine(mvotg, 0);
+		spin_unlock(&mvotg->wq_lock);
+	}
+
+	return count;
+}
+
+static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUSR,
+		   get_a_bus_drop, set_a_bus_drop);
+
+static struct attribute *inputs_attrs[] = {
+	&dev_attr_a_bus_req.attr,
+	&dev_attr_a_clr_err.attr,
+	&dev_attr_a_bus_drop.attr,
+	NULL,
+};
+
+static struct attribute_group inputs_attr_group = {
+	.name = "inputs",
+	.attrs = inputs_attrs,
+};
+
+int mv_otg_remove(struct platform_device *pdev)
+{
+	struct mv_otg *mvotg = platform_get_drvdata(pdev);
+	int clk_i;
+
+	sysfs_remove_group(&mvotg->pdev->dev.kobj, &inputs_attr_group);
+
+	if (mvotg->irq)
+		free_irq(mvotg->irq, mvotg);
+
+	if (mvotg->pdata->vbus)
+		free_irq(mvotg->pdata->vbus->irq, mvotg);
+	if (mvotg->pdata->id)
+		free_irq(mvotg->pdata->id->irq, mvotg);
+
+	if (mvotg->qwork) {
+		flush_workqueue(mvotg->qwork);
+		destroy_workqueue(mvotg->qwork);
+	}
+
+	mv_otg_disable(mvotg);
+
+	if (mvotg->cap_regs)
+		iounmap(mvotg->cap_regs);
+
+	if (mvotg->phy_regs)
+		iounmap(mvotg->phy_regs);
+
+	for (clk_i = 0; clk_i <= mvotg->clknum; clk_i++)
+		clk_put(mvotg->clk[clk_i]);
+
+	otg_set_transceiver(NULL);
+	platform_set_drvdata(pdev, NULL);
+
+	kfree(mvotg);
+
+	return 0;
+}
+
+static int mv_otg_probe(struct platform_device *pdev)
+{
+	struct mv_usb_platform_data *pdata = pdev->dev.platform_data;
+	struct mv_otg *mvotg;
+	struct resource *r;
+	int retval = 0, clk_i, i;
+	size_t size;
+
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "failed to get platform data\n");
+		return -ENODEV;
+	}
+
+	size = sizeof(*mvotg) + sizeof(struct clk *) * pdata->clknum;
+	mvotg = kzalloc(size, GFP_KERNEL);
+	if (!mvotg) {
+		dev_err(&pdev->dev, "failed to allocate memory!\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, mvotg);
+
+	mvotg->pdev = pdev;
+	mvotg->pdata = pdata;
+
+	mvotg->clknum = pdata->clknum;
+	for (clk_i = 0; clk_i < mvotg->clknum; clk_i++) {
+		mvotg->clk[clk_i] = clk_get(&pdev->dev, pdata->clkname[clk_i]);
+		if (IS_ERR(mvotg->clk[clk_i])) {
+			retval = PTR_ERR(mvotg->clk[clk_i]);
+			goto err_put_clk;
+		}
+	}
+
+	mvotg->qwork = create_singlethread_workqueue("mv_otg_queue");
+	if (!mvotg->qwork) {
+		dev_dbg(&pdev->dev, "cannot create workqueue for OTG\n");
+		retval = -ENOMEM;
+		goto err_put_clk;
+	}
+
+	INIT_DELAYED_WORK(&mvotg->work, mv_otg_work);
+
+	/* OTG common part */
+	mvotg->pdev = pdev;
+	mvotg->otg.dev = &pdev->dev;
+	mvotg->otg.label = driver_name;
+	mvotg->otg.set_host = mv_otg_set_host;
+	mvotg->otg.set_peripheral = mv_otg_set_peripheral;
+	mvotg->otg.set_vbus = mv_otg_set_vbus;
+	mvotg->otg.state = OTG_STATE_UNDEFINED;
+
+	for (i = 0; i < OTG_TIMER_NUM; i++)
+		init_timer(&mvotg->otg_ctrl.timer[i]);
+
+	r = platform_get_resource_byname(mvotg->pdev,
+					 IORESOURCE_MEM, "phyregs");
+	if (r == NULL) {
+		dev_err(&pdev->dev, "no phy I/O memory resource defined\n");
+		retval = -ENODEV;
+		goto err_destroy_workqueue;
+	}
+
+	mvotg->phy_regs = ioremap(r->start, resource_size(r));
+	if (mvotg->phy_regs == NULL) {
+		dev_err(&pdev->dev, "failed to map phy I/O memory\n");
+		retval = -EFAULT;
+		goto err_destroy_workqueue;
+	}
+
+	r = platform_get_resource_byname(mvotg->pdev,
+					 IORESOURCE_MEM, "capregs");
+	if (r == NULL) {
+		dev_err(&pdev->dev, "no I/O memory resource defined\n");
+		retval = -ENODEV;
+		goto err_unmap_phyreg;
+	}
+
+	mvotg->cap_regs = ioremap(r->start, resource_size(r));
+	if (mvotg->cap_regs == NULL) {
+		dev_err(&pdev->dev, "failed to map I/O memory\n");
+		retval = -EFAULT;
+		goto err_unmap_phyreg;
+	}
+
+	/* we will acces controller register, so enable the udc controller */
+	retval = mv_otg_enable_internal(mvotg);
+	if (retval) {
+		dev_err(&pdev->dev, "mv otg enable error %d\n", retval);
+		goto err_unmap_capreg;
+	}
+
+	mvotg->op_regs =
+		(struct mv_otg_regs __iomem *) ((unsigned long) mvotg->cap_regs
+			+ (readl(mvotg->cap_regs) & CAPLENGTH_MASK));
+
+	if (pdata->id) {
+		retval = request_threaded_irq(pdata->id->irq, NULL,
+					      mv_otg_inputs_irq,
+					      IRQF_ONESHOT, "id", mvotg);
+		if (retval) {
+			dev_info(&pdev->dev,
+				 "Failed to request irq for ID\n");
+			pdata->id = NULL;
+		}
+	}
+
+	if (pdata->vbus) {
+		mvotg->clock_gating = 1;
+		retval = request_threaded_irq(pdata->vbus->irq, NULL,
+					      mv_otg_inputs_irq,
+					      IRQF_ONESHOT, "vbus", mvotg);
+		if (retval) {
+			dev_info(&pdev->dev,
+				 "Failed to request irq for VBUS, "
+				 "disable clock gating\n");
+			mvotg->clock_gating = 0;
+			pdata->vbus = NULL;
+		}
+	}
+
+	if (pdata->disable_otg_clock_gating)
+		mvotg->clock_gating = 0;
+
+	mv_otg_reset(mvotg);
+	mv_otg_init_irq(mvotg);
+
+	r = platform_get_resource(mvotg->pdev, IORESOURCE_IRQ, 0);
+	if (r == NULL) {
+		dev_err(&pdev->dev, "no IRQ resource defined\n");
+		retval = -ENODEV;
+		goto err_disable_clk;
+	}
+
+	mvotg->irq = r->start;
+	if (request_irq(mvotg->irq, mv_otg_irq, IRQF_SHARED,
+			driver_name, mvotg)) {
+		dev_err(&pdev->dev, "Request irq %d for OTG failed\n",
+			mvotg->irq);
+		mvotg->irq = 0;
+		retval = -ENODEV;
+		goto err_disable_clk;
+	}
+
+	retval = otg_set_transceiver(&mvotg->otg);
+	if (retval < 0) {
+		dev_err(&pdev->dev, "can't register transceiver, %d\n",
+			retval);
+		goto err_free_irq;
+	}
+
+	retval = sysfs_create_group(&pdev->dev.kobj, &inputs_attr_group);
+	if (retval < 0) {
+		dev_dbg(&pdev->dev,
+			"Can't register sysfs attr group: %d\n", retval);
+		goto err_set_transceiver;
+	}
+
+	spin_lock_init(&mvotg->wq_lock);
+	if (spin_trylock(&mvotg->wq_lock)) {
+		mv_otg_run_state_machine(mvotg, 2 * HZ);
+		spin_unlock(&mvotg->wq_lock);
+	}
+
+	dev_info(&pdev->dev,
+		 "successful probe OTG device %s clock gating.\n",
+		 mvotg->clock_gating ? "with" : "without");
+
+	return 0;
+
+err_set_transceiver:
+	otg_set_transceiver(NULL);
+err_free_irq:
+	free_irq(mvotg->irq, mvotg);
+err_disable_clk:
+	if (pdata->vbus)
+		free_irq(pdata->vbus->irq, mvotg);
+	if (pdata->id)
+		free_irq(pdata->id->irq, mvotg);
+	mv_otg_disable_internal(mvotg);
+err_unmap_capreg:
+	iounmap(mvotg->cap_regs);
+err_unmap_phyreg:
+	iounmap(mvotg->phy_regs);
+err_destroy_workqueue:
+	flush_workqueue(mvotg->qwork);
+	destroy_workqueue(mvotg->qwork);
+err_put_clk:
+	for (clk_i--; clk_i >= 0; clk_i--)
+		clk_put(mvotg->clk[clk_i]);
+
+	platform_set_drvdata(pdev, NULL);
+	kfree(mvotg);
+
+	return retval;
+}
+
+#ifdef CONFIG_PM
+static int mv_otg_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct mv_otg *mvotg = platform_get_drvdata(pdev);
+
+	if (mvotg->otg.state != OTG_STATE_B_IDLE) {
+		dev_info(&pdev->dev,
+			 "OTG state is not B_IDLE, it is %d!\n",
+			 mvotg->otg.state);
+		return -EAGAIN;
+	}
+
+	if (!mvotg->clock_gating)
+		mv_otg_disable_internal(mvotg);
+
+	return 0;
+}
+
+static int mv_otg_resume(struct platform_device *pdev)
+{
+	struct mv_otg *mvotg = platform_get_drvdata(pdev);
+	u32 otgsc;
+
+	if (!mvotg->clock_gating) {
+		mv_otg_enable_internal(mvotg);
+
+		otgsc = readl(&mvotg->op_regs->otgsc);
+		otgsc |= mvotg->irq_en;
+		writel(otgsc, &mvotg->op_regs->otgsc);
+
+		if (spin_trylock(&mvotg->wq_lock)) {
+			mv_otg_run_state_machine(mvotg, 0);
+			spin_unlock(&mvotg->wq_lock);
+		}
+	}
+	return 0;
+}
+#endif
+
+static struct platform_driver mv_otg_driver = {
+	.probe = mv_otg_probe,
+	.remove = __exit_p(mv_otg_remove),
+	.driver = {
+		   .owner = THIS_MODULE,
+		   .name = driver_name,
+		   },
+#ifdef CONFIG_PM
+	.suspend = mv_otg_suspend,
+	.resume = mv_otg_resume,
+#endif
+};
+
+static int __init mv_otg_init(void)
+{
+	return platform_driver_register(&mv_otg_driver);
+}
+
+static void __exit mv_otg_exit(void)
+{
+	platform_driver_unregister(&mv_otg_driver);
+}
+
+module_init(mv_otg_init);
+module_exit(mv_otg_exit);
diff --git a/drivers/usb/otg/mv_otg.h b/drivers/usb/otg/mv_otg.h
new file mode 100644
index 000000000000..be6ca1437645
--- /dev/null
+++ b/drivers/usb/otg/mv_otg.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2011 Marvell International Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef	__MV_USB_OTG_CONTROLLER__
+#define	__MV_USB_OTG_CONTROLLER__
+
+#include <linux/types.h>
+
+/* Command Register Bit Masks */
+#define USBCMD_RUN_STOP			(0x00000001)
+#define USBCMD_CTRL_RESET		(0x00000002)
+
+/* otgsc Register Bit Masks */
+#define OTGSC_CTRL_VUSB_DISCHARGE		0x00000001
+#define OTGSC_CTRL_VUSB_CHARGE			0x00000002
+#define OTGSC_CTRL_OTG_TERM			0x00000008
+#define OTGSC_CTRL_DATA_PULSING			0x00000010
+#define OTGSC_STS_USB_ID			0x00000100
+#define OTGSC_STS_A_VBUS_VALID			0x00000200
+#define OTGSC_STS_A_SESSION_VALID		0x00000400
+#define OTGSC_STS_B_SESSION_VALID		0x00000800
+#define OTGSC_STS_B_SESSION_END			0x00001000
+#define OTGSC_STS_1MS_TOGGLE			0x00002000
+#define OTGSC_STS_DATA_PULSING			0x00004000
+#define OTGSC_INTSTS_USB_ID			0x00010000
+#define OTGSC_INTSTS_A_VBUS_VALID		0x00020000
+#define OTGSC_INTSTS_A_SESSION_VALID		0x00040000
+#define OTGSC_INTSTS_B_SESSION_VALID		0x00080000
+#define OTGSC_INTSTS_B_SESSION_END		0x00100000
+#define OTGSC_INTSTS_1MS			0x00200000
+#define OTGSC_INTSTS_DATA_PULSING		0x00400000
+#define OTGSC_INTR_USB_ID			0x01000000
+#define OTGSC_INTR_A_VBUS_VALID			0x02000000
+#define OTGSC_INTR_A_SESSION_VALID		0x04000000
+#define OTGSC_INTR_B_SESSION_VALID		0x08000000
+#define OTGSC_INTR_B_SESSION_END		0x10000000
+#define OTGSC_INTR_1MS_TIMER			0x20000000
+#define OTGSC_INTR_DATA_PULSING			0x40000000
+
+#define CAPLENGTH_MASK		(0xff)
+
+/* Timer's interval, unit 10ms */
+#define T_A_WAIT_VRISE		100
+#define T_A_WAIT_BCON		2000
+#define T_A_AIDL_BDIS		100
+#define T_A_BIDL_ADIS		20
+#define T_B_ASE0_BRST		400
+#define T_B_SE0_SRP		300
+#define T_B_SRP_FAIL		2000
+#define T_B_DATA_PLS		10
+#define T_B_SRP_INIT		100
+#define T_A_SRP_RSPNS		10
+#define T_A_DRV_RSM		5
+
+enum otg_function {
+	OTG_B_DEVICE = 0,
+	OTG_A_DEVICE
+};
+
+enum mv_otg_timer {
+	A_WAIT_BCON_TIMER = 0,
+	OTG_TIMER_NUM
+};
+
+/* PXA OTG state machine */
+struct mv_otg_ctrl {
+	/* internal variables */
+	u8 a_set_b_hnp_en;	/* A-Device set b_hnp_en */
+	u8 b_srp_done;
+	u8 b_hnp_en;
+
+	/* OTG inputs */
+	u8 a_bus_drop;
+	u8 a_bus_req;
+	u8 a_clr_err;
+	u8 a_bus_resume;
+	u8 a_bus_suspend;
+	u8 a_conn;
+	u8 a_sess_vld;
+	u8 a_srp_det;
+	u8 a_vbus_vld;
+	u8 b_bus_req;		/* B-Device Require Bus */
+	u8 b_bus_resume;
+	u8 b_bus_suspend;
+	u8 b_conn;
+	u8 b_se0_srp;
+	u8 b_sess_end;
+	u8 b_sess_vld;
+	u8 id;
+	u8 a_suspend_req;
+
+	/*Timer event */
+	u8 a_aidl_bdis_timeout;
+	u8 b_ase0_brst_timeout;
+	u8 a_bidl_adis_timeout;
+	u8 a_wait_bcon_timeout;
+
+	struct timer_list timer[OTG_TIMER_NUM];
+};
+
+#define VUSBHS_MAX_PORTS	8
+
+struct mv_otg_regs {
+	u32 usbcmd;		/* Command register */
+	u32 usbsts;		/* Status register */
+	u32 usbintr;		/* Interrupt enable */
+	u32 frindex;		/* Frame index */
+	u32 reserved1[1];
+	u32 deviceaddr;		/* Device Address */
+	u32 eplistaddr;		/* Endpoint List Address */
+	u32 ttctrl;		/* HOST TT status and control */
+	u32 burstsize;		/* Programmable Burst Size */
+	u32 txfilltuning;	/* Host Transmit Pre-Buffer Packet Tuning */
+	u32 reserved[4];
+	u32 epnak;		/* Endpoint NAK */
+	u32 epnaken;		/* Endpoint NAK Enable */
+	u32 configflag;		/* Configured Flag register */
+	u32 portsc[VUSBHS_MAX_PORTS];	/* Port Status/Control x, x = 1..8 */
+	u32 otgsc;
+	u32 usbmode;		/* USB Host/Device mode */
+	u32 epsetupstat;	/* Endpoint Setup Status */
+	u32 epprime;		/* Endpoint Initialize */
+	u32 epflush;		/* Endpoint De-initialize */
+	u32 epstatus;		/* Endpoint Status */
+	u32 epcomplete;		/* Endpoint Interrupt On Complete */
+	u32 epctrlx[16];	/* Endpoint Control, where x = 0.. 15 */
+	u32 mcr;		/* Mux Control */
+	u32 isr;		/* Interrupt Status */
+	u32 ier;		/* Interrupt Enable */
+};
+
+struct mv_otg {
+	struct otg_transceiver otg;
+	struct mv_otg_ctrl otg_ctrl;
+
+	/* base address */
+	void __iomem *phy_regs;
+	void __iomem *cap_regs;
+	struct mv_otg_regs __iomem *op_regs;
+
+	struct platform_device *pdev;
+	int irq;
+	u32 irq_status;
+	u32 irq_en;
+
+	struct delayed_work work;
+	struct workqueue_struct *qwork;
+
+	spinlock_t wq_lock;
+
+	struct mv_usb_platform_data *pdata;
+
+	unsigned int active;
+	unsigned int clock_gating;
+	unsigned int clknum;
+	struct clk *clk[0];
+};
+
+#endif
diff --git a/include/linux/platform_data/mv_usb.h b/include/linux/platform_data/mv_usb.h
index a642cf20ad6e..f4cb0ec373c3 100644
--- a/include/linux/platform_data/mv_usb.h
+++ b/include/linux/platform_data/mv_usb.h
@@ -42,6 +42,11 @@ struct mv_usb_platform_data {
 	/* only valid for HCD. OTG or Host only*/
 	unsigned int		mode;
 
+	/* This flag is used for that needs id pin checked by otg */
+	unsigned int    disable_otg_clock_gating:1;
+	/* Force a_bus_req to be asserted */
+	 unsigned int    otg_force_a_bus_req:1;
+
 	int	(*phy_init)(void __iomem *regbase);
 	void	(*phy_deinit)(void __iomem *regbase);
 	int	(*set_vbus)(unsigned int vbus);
-- 
cgit v1.2.3


From 3a082ec9b2f544a81e977cfa259e3f990a995dc8 Mon Sep 17 00:00:00 2001
From: Neil Zhang <zhangwm@marvell.com>
Date: Tue, 20 Dec 2011 13:20:23 +0800
Subject: USB: EHCI: Add Marvell Host Controller driver

This patch adds support for EHCI compliant HSUSB Host controller found
on Marvell Socs.

It fits both OTG and SPH controller on marvell Socs, including
PXA9xx/MMP2/MMP3/MGx.

Signed-off-by: Neil Zhang <zhangwm@marvell.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/host/Kconfig             |   9 +
 drivers/usb/host/ehci-hcd.c          |   5 +
 drivers/usb/host/ehci-mv.c           | 391 +++++++++++++++++++++++++++++++++++
 include/linux/platform_data/mv_usb.h |   1 +
 4 files changed, 406 insertions(+)
 create mode 100644 drivers/usb/host/ehci-mv.c

(limited to 'include/linux')

diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 060e0e2b1ae6..a52769b5c904 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -194,6 +194,15 @@ config USB_EHCI_S5P
        help
 	 Enable support for the S5P SOC's on-chip EHCI controller.
 
+config USB_EHCI_MV
+	bool "EHCI support for Marvell on-chip controller"
+	depends on USB_EHCI_HCD
+	select USB_EHCI_ROOT_HUB_TT
+	---help---
+	  Enables support for Marvell (including PXA and MMP series) on-chip
+	  USB SPH and OTG controller. SPH is a single port host, and it can
+	  only be EHCI host. OTG is controller that can switch to host mode.
+
 config USB_W90X900_EHCI
 	bool "W90X900(W90P910) EHCI support"
 	depends on USB_EHCI_HCD && ARCH_W90X900
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 3ff9f82f7263..b05e7533d08f 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1329,6 +1329,11 @@ MODULE_LICENSE ("GPL");
 #define PLATFORM_DRIVER		ehci_xls_driver
 #endif
 
+#ifdef CONFIG_USB_EHCI_MV
+#include "ehci-mv.c"
+#define        PLATFORM_DRIVER         ehci_mv_driver
+#endif
+
 #if !defined(PCI_DRIVER) && !defined(PLATFORM_DRIVER) && \
     !defined(PS3_SYSTEM_BUS_DRIVER) && !defined(OF_PLATFORM_DRIVER) && \
     !defined(XILINX_OF_PLATFORM_DRIVER)
diff --git a/drivers/usb/host/ehci-mv.c b/drivers/usb/host/ehci-mv.c
new file mode 100644
index 000000000000..52a604fb9321
--- /dev/null
+++ b/drivers/usb/host/ehci-mv.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2011 Marvell International Ltd. All rights reserved.
+ * Author: Chao Xie <chao.xie@marvell.com>
+ *        Neil Zhang <zhangwm@marvell.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/usb/otg.h>
+#include <linux/platform_data/mv_usb.h>
+
+#define CAPLENGTH_MASK         (0xff)
+
+struct ehci_hcd_mv {
+	struct usb_hcd *hcd;
+
+	/* Which mode does this ehci running OTG/Host ? */
+	int mode;
+
+	void __iomem *phy_regs;
+	void __iomem *cap_regs;
+	void __iomem *op_regs;
+
+	struct otg_transceiver *otg;
+
+	struct mv_usb_platform_data *pdata;
+
+	/* clock source and total clock number */
+	unsigned int clknum;
+	struct clk *clk[0];
+};
+
+static void ehci_clock_enable(struct ehci_hcd_mv *ehci_mv)
+{
+	unsigned int i;
+
+	for (i = 0; i < ehci_mv->clknum; i++)
+		clk_enable(ehci_mv->clk[i]);
+}
+
+static void ehci_clock_disable(struct ehci_hcd_mv *ehci_mv)
+{
+	unsigned int i;
+
+	for (i = 0; i < ehci_mv->clknum; i++)
+		clk_disable(ehci_mv->clk[i]);
+}
+
+static int mv_ehci_enable(struct ehci_hcd_mv *ehci_mv)
+{
+	int retval;
+
+	ehci_clock_enable(ehci_mv);
+	if (ehci_mv->pdata->phy_init) {
+		retval = ehci_mv->pdata->phy_init(ehci_mv->phy_regs);
+		if (retval)
+			return retval;
+	}
+
+	return 0;
+}
+
+static void mv_ehci_disable(struct ehci_hcd_mv *ehci_mv)
+{
+	if (ehci_mv->pdata->phy_deinit)
+		ehci_mv->pdata->phy_deinit(ehci_mv->phy_regs);
+	ehci_clock_disable(ehci_mv);
+}
+
+static int mv_ehci_reset(struct usb_hcd *hcd)
+{
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+	struct device *dev = hcd->self.controller;
+	struct ehci_hcd_mv *ehci_mv = dev_get_drvdata(dev);
+	int retval;
+
+	if (ehci_mv == NULL) {
+		dev_err(dev, "Can not find private ehci data\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * data structure init
+	 */
+	retval = ehci_init(hcd);
+	if (retval) {
+		dev_err(dev, "ehci_init failed %d\n", retval);
+		return retval;
+	}
+
+	hcd->has_tt = 1;
+	ehci->sbrn = 0x20;
+
+	retval = ehci_reset(ehci);
+	if (retval) {
+		dev_err(dev, "ehci_reset failed %d\n", retval);
+		return retval;
+	}
+
+	return 0;
+}
+
+static const struct hc_driver mv_ehci_hc_driver = {
+	.description = hcd_name,
+	.product_desc = "Marvell EHCI",
+	.hcd_priv_size = sizeof(struct ehci_hcd),
+
+	/*
+	 * generic hardware linkage
+	 */
+	.irq = ehci_irq,
+	.flags = HCD_MEMORY | HCD_USB2,
+
+	/*
+	 * basic lifecycle operations
+	 */
+	.reset = mv_ehci_reset,
+	.start = ehci_run,
+	.stop = ehci_stop,
+	.shutdown = ehci_shutdown,
+
+	/*
+	 * managing i/o requests and associated device resources
+	 */
+	.urb_enqueue = ehci_urb_enqueue,
+	.urb_dequeue = ehci_urb_dequeue,
+	.endpoint_disable = ehci_endpoint_disable,
+	.endpoint_reset = ehci_endpoint_reset,
+	.clear_tt_buffer_complete = ehci_clear_tt_buffer_complete,
+
+	/*
+	 * scheduling support
+	 */
+	.get_frame_number = ehci_get_frame,
+
+	/*
+	 * root hub support
+	 */
+	.hub_status_data = ehci_hub_status_data,
+	.hub_control = ehci_hub_control,
+	.bus_suspend = ehci_bus_suspend,
+	.bus_resume = ehci_bus_resume,
+};
+
+static int mv_ehci_probe(struct platform_device *pdev)
+{
+	struct mv_usb_platform_data *pdata = pdev->dev.platform_data;
+	struct usb_hcd *hcd;
+	struct ehci_hcd *ehci;
+	struct ehci_hcd_mv *ehci_mv;
+	struct resource *r;
+	int clk_i, retval = -ENODEV;
+	u32 offset;
+	size_t size;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "missing platform_data\n");
+		return -ENODEV;
+	}
+
+	if (usb_disabled())
+		return -ENODEV;
+
+	hcd = usb_create_hcd(&mv_ehci_hc_driver, &pdev->dev, "mv ehci");
+	if (!hcd)
+		return -ENOMEM;
+
+	size = sizeof(*ehci_mv) + sizeof(struct clk *) * pdata->clknum;
+	ehci_mv = kzalloc(size, GFP_KERNEL);
+	if (ehci_mv == NULL) {
+		dev_err(&pdev->dev, "cannot allocate ehci_hcd_mv\n");
+		retval = -ENOMEM;
+		goto err_put_hcd;
+	}
+
+	platform_set_drvdata(pdev, ehci_mv);
+	ehci_mv->pdata = pdata;
+	ehci_mv->hcd = hcd;
+
+	ehci_mv->clknum = pdata->clknum;
+	for (clk_i = 0; clk_i < ehci_mv->clknum; clk_i++) {
+		ehci_mv->clk[clk_i] =
+		    clk_get(&pdev->dev, pdata->clkname[clk_i]);
+		if (IS_ERR(ehci_mv->clk[clk_i])) {
+			dev_err(&pdev->dev, "error get clck \"%s\"\n",
+				pdata->clkname[clk_i]);
+			retval = PTR_ERR(ehci_mv->clk[clk_i]);
+			goto err_put_clk;
+		}
+	}
+
+	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "phyregs");
+	if (r == NULL) {
+		dev_err(&pdev->dev, "no phy I/O memory resource defined\n");
+		retval = -ENODEV;
+		goto err_put_clk;
+	}
+
+	ehci_mv->phy_regs = ioremap(r->start, resource_size(r));
+	if (ehci_mv->phy_regs == 0) {
+		dev_err(&pdev->dev, "failed to map phy I/O memory\n");
+		retval = -EFAULT;
+		goto err_put_clk;
+	}
+
+	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "capregs");
+	if (!r) {
+		dev_err(&pdev->dev, "no I/O memory resource defined\n");
+		retval = -ENODEV;
+		goto err_iounmap_phyreg;
+	}
+
+	ehci_mv->cap_regs = ioremap(r->start, resource_size(r));
+	if (ehci_mv->cap_regs == NULL) {
+		dev_err(&pdev->dev, "failed to map I/O memory\n");
+		retval = -EFAULT;
+		goto err_iounmap_phyreg;
+	}
+
+	retval = mv_ehci_enable(ehci_mv);
+	if (retval) {
+		dev_err(&pdev->dev, "init phy error %d\n", retval);
+		goto err_iounmap_capreg;
+	}
+
+	offset = readl(ehci_mv->cap_regs) & CAPLENGTH_MASK;
+	ehci_mv->op_regs =
+		(void __iomem *) ((unsigned long) ehci_mv->cap_regs + offset);
+
+	hcd->rsrc_start = r->start;
+	hcd->rsrc_len = r->end - r->start + 1;
+	hcd->regs = ehci_mv->op_regs;
+
+	hcd->irq = platform_get_irq(pdev, 0);
+	if (!hcd->irq) {
+		dev_err(&pdev->dev, "Cannot get irq.");
+		retval = -ENODEV;
+		goto err_disable_clk;
+	}
+
+	ehci = hcd_to_ehci(hcd);
+	ehci->caps = (struct ehci_caps *) ehci_mv->cap_regs;
+	ehci->regs = (struct ehci_regs *) ehci_mv->op_regs;
+	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
+
+	ehci_mv->mode = pdata->mode;
+	if (ehci_mv->mode == MV_USB_MODE_OTG) {
+#ifdef CONFIG_USB_OTG_UTILS
+		ehci_mv->otg = otg_get_transceiver();
+		if (!ehci_mv->otg) {
+			dev_err(&pdev->dev,
+				"unable to find transceiver\n");
+			retval = -ENODEV;
+			goto err_disable_clk;
+		}
+
+		retval = otg_set_host(ehci_mv->otg, &hcd->self);
+		if (retval < 0) {
+			dev_err(&pdev->dev,
+				"unable to register with transceiver\n");
+			retval = -ENODEV;
+			goto err_put_transceiver;
+		}
+		/* otg will enable clock before use as host */
+		mv_ehci_disable(ehci_mv);
+#else
+		dev_info(&pdev->dev, "MV_USB_MODE_OTG "
+			 "must have CONFIG_USB_OTG_UTILS enabled\n");
+		goto err_disable_clk;
+#endif
+	} else {
+		if (pdata->set_vbus)
+			pdata->set_vbus(1);
+
+		retval = usb_add_hcd(hcd, hcd->irq, IRQF_SHARED);
+		if (retval) {
+			dev_err(&pdev->dev,
+				"failed to add hcd with err %d\n", retval);
+			goto err_set_vbus;
+		}
+	}
+
+	if (pdata->private_init)
+		pdata->private_init(ehci_mv->op_regs, ehci_mv->phy_regs);
+
+	dev_info(&pdev->dev,
+		 "successful find EHCI device with regs 0x%p irq %d"
+		 " working in %s mode\n", hcd->regs, hcd->irq,
+		 ehci_mv->mode == MV_USB_MODE_OTG ? "OTG" : "Host");
+
+	return 0;
+
+err_set_vbus:
+	if (pdata->set_vbus)
+		pdata->set_vbus(0);
+#ifdef CONFIG_USB_OTG_UTILS
+err_put_transceiver:
+	if (ehci_mv->otg)
+		otg_put_transceiver(ehci_mv->otg);
+#endif
+err_disable_clk:
+	mv_ehci_disable(ehci_mv);
+err_iounmap_capreg:
+	iounmap(ehci_mv->cap_regs);
+err_iounmap_phyreg:
+	iounmap(ehci_mv->phy_regs);
+err_put_clk:
+	for (clk_i--; clk_i >= 0; clk_i--)
+		clk_put(ehci_mv->clk[clk_i]);
+	platform_set_drvdata(pdev, NULL);
+	kfree(ehci_mv);
+err_put_hcd:
+	usb_put_hcd(hcd);
+
+	return retval;
+}
+
+static int mv_ehci_remove(struct platform_device *pdev)
+{
+	struct ehci_hcd_mv *ehci_mv = platform_get_drvdata(pdev);
+	struct usb_hcd *hcd = ehci_mv->hcd;
+	int clk_i;
+
+	if (hcd->rh_registered)
+		usb_remove_hcd(hcd);
+
+	if (ehci_mv->otg) {
+		otg_set_host(ehci_mv->otg, NULL);
+		otg_put_transceiver(ehci_mv->otg);
+	}
+
+	if (ehci_mv->mode == MV_USB_MODE_HOST) {
+		if (ehci_mv->pdata->set_vbus)
+			ehci_mv->pdata->set_vbus(0);
+
+		mv_ehci_disable(ehci_mv);
+	}
+
+	iounmap(ehci_mv->cap_regs);
+	iounmap(ehci_mv->phy_regs);
+
+	for (clk_i = 0; clk_i < ehci_mv->clknum; clk_i++)
+		clk_put(ehci_mv->clk[clk_i]);
+
+	platform_set_drvdata(pdev, NULL);
+
+	kfree(ehci_mv);
+	usb_put_hcd(hcd);
+
+	return 0;
+}
+
+MODULE_ALIAS("mv-ehci");
+
+static const struct platform_device_id ehci_id_table[] = {
+	{"pxa-u2oehci", PXA_U2OEHCI},
+	{"pxa-sph", PXA_SPH},
+	{"mmp3-hsic", MMP3_HSIC},
+	{"mmp3-fsic", MMP3_FSIC},
+	{},
+};
+
+static void mv_ehci_shutdown(struct platform_device *pdev)
+{
+	struct ehci_hcd_mv *ehci_mv = platform_get_drvdata(pdev);
+	struct usb_hcd *hcd = ehci_mv->hcd;
+
+	if (!hcd->rh_registered)
+		return;
+
+	if (hcd->driver->shutdown)
+		hcd->driver->shutdown(hcd);
+}
+
+static struct platform_driver ehci_mv_driver = {
+	.probe = mv_ehci_probe,
+	.remove = mv_ehci_remove,
+	.shutdown = mv_ehci_shutdown,
+	.driver = {
+		   .name = "mv-ehci",
+		   .bus = &platform_bus_type,
+		   },
+	.id_table = ehci_id_table,
+};
diff --git a/include/linux/platform_data/mv_usb.h b/include/linux/platform_data/mv_usb.h
index f4cb0ec373c3..d94804aca764 100644
--- a/include/linux/platform_data/mv_usb.h
+++ b/include/linux/platform_data/mv_usb.h
@@ -50,6 +50,7 @@ struct mv_usb_platform_data {
 	int	(*phy_init)(void __iomem *regbase);
 	void	(*phy_deinit)(void __iomem *regbase);
 	int	(*set_vbus)(unsigned int vbus);
+	int     (*private_init)(void __iomem *opregs, void __iomem *phyregs);
 };
 
 #ifndef CONFIG_HAVE_CLK
-- 
cgit v1.2.3


From a597fa78d374c57dcf9a9bac02472a530cd7e60a Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 10 Jun 2011 12:23:30 -0300
Subject: [media] v4l: Add over-current and indicator flash fault bits

Flash controllers can report over-current and indicator fault
conditions. Define flash fault control bits for them.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Sakari Ailus <sakari.ailus@maxwell.research.nokia.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/controls.xml | 10 ++++++++++
 include/linux/videodev2.h                    |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index 9e72f077329a..c0422c622337 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -3329,6 +3329,16 @@ interface and may change in the future.</para>
 		  <entry>The short circuit protection of the flash
 		  controller has been triggered.</entry>
 		</row>
+		<row>
+		  <entry><constant>V4L2_FLASH_FAULT_OVER_CURRENT</constant></entry>
+		  <entry>Current in the LED power supply has exceeded the limit
+		  specific to the flash controller.</entry>
+		</row>
+		<row>
+		  <entry><constant>V4L2_FLASH_FAULT_INDICATOR</constant></entry>
+		  <entry>The flash controller has detected a short or open
+		  circuit condition on the indicator LED.</entry>
+		</row>
 	      </tbody>
 	    </entrytbl>
 	  </row>
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 4b752d5ee80e..3d62631839bc 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1682,6 +1682,8 @@ enum v4l2_flash_strobe_source {
 #define V4L2_FLASH_FAULT_TIMEOUT		(1 << 1)
 #define V4L2_FLASH_FAULT_OVER_TEMPERATURE	(1 << 2)
 #define V4L2_FLASH_FAULT_SHORT_CIRCUIT		(1 << 3)
+#define V4L2_FLASH_FAULT_OVER_CURRENT		(1 << 4)
+#define V4L2_FLASH_FAULT_INDICATOR		(1 << 5)
 
 #define V4L2_CID_FLASH_CHARGE			(V4L2_CID_FLASH_CLASS_BASE + 11)
 #define V4L2_CID_FLASH_READY			(V4L2_CID_FLASH_CLASS_BASE + 12)
-- 
cgit v1.2.3


From bf3b84006e22ae241ec3d53dbe6c6d1f6ceddb56 Mon Sep 17 00:00:00 2001
From: Manu Abraham <abraham.manu@gmail.com>
Date: Sat, 17 Dec 2011 20:36:55 -0300
Subject: [media] DVB: Use a unique delivery system identifier for DVBC_ANNEX_C

Use a unique delivery system identifier for DVBC_ANNEX_C, just like any
other.

DVBC_ANNEX_A and DVBC_ANNEX_C have slightly different parameters
and are used in 2 geographically different locations.

Signed-off-by: Manu Abraham <abraham.manu@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/frontend.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index cb114f52ccf7..b2a939f8f1e2 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -337,7 +337,7 @@ typedef enum fe_rolloff {
 
 typedef enum fe_delivery_system {
 	SYS_UNDEFINED,
-	SYS_DVBC_ANNEX_AC,
+	SYS_DVBC_ANNEX_A,
 	SYS_DVBC_ANNEX_B,
 	SYS_DVBT,
 	SYS_DSS,
@@ -354,8 +354,13 @@ typedef enum fe_delivery_system {
 	SYS_DAB,
 	SYS_DVBT2,
 	SYS_TURBO,
+	SYS_DVBC_ANNEX_C,
 } fe_delivery_system_t;
 
+
+#define SYS_DVBC_ANNEX_AC	SYS_DVBC_ANNEX_A
+
+
 struct dtv_cmds_h {
 	char	*name;		/* A display name for debugging purposes */
 
-- 
cgit v1.2.3


From fd66c45dd51000ff444231a94ac15ccab8cffd3d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sat, 17 Dec 2011 20:36:57 -0300
Subject: [media] Remove Annex A/C selection via roll-off factor

Instead of using a roll-off factor, change DRX-K & friends to select
the bandwidth filter and the Nyquist half roll-off via delivery system.

This provides a cleaner support for Annex A/C switch.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/tuners/xc5000.c       | 137 ++++++++++++-----------------
 drivers/media/dvb/dvb-core/dvb_frontend.c  |  25 +++++-
 drivers/media/dvb/frontends/drxk_hard.c    |  15 ++--
 drivers/media/dvb/frontends/tda18271c2dd.c |  44 ++++-----
 include/linux/dvb/frontend.h               |   2 -
 5 files changed, 106 insertions(+), 117 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/common/tuners/xc5000.c b/drivers/media/common/tuners/xc5000.c
index 97ad33896343..5c56d3cc030b 100644
--- a/drivers/media/common/tuners/xc5000.c
+++ b/drivers/media/common/tuners/xc5000.c
@@ -629,11 +629,13 @@ static void xc_debug_dump(struct xc5000_priv *priv)
 }
 
 static int xc5000_set_params(struct dvb_frontend *fe,
-	struct dvb_frontend_parameters *params)
+			     struct dvb_frontend_parameters *params)
 {
+	int ret, b;
 	struct xc5000_priv *priv = fe->tuner_priv;
-	int ret;
-	u32 bw;
+	u32 bw = fe->dtv_property_cache.bandwidth_hz;
+	u32 freq = fe->dtv_property_cache.frequency;
+	u32 delsys  = fe->dtv_property_cache.delivery_system;
 
 	if (xc5000_is_firmware_loaded(fe) != XC_RESULT_SUCCESS) {
 		if (xc_load_fw_and_init_tuner(fe) != XC_RESULT_SUCCESS) {
@@ -642,104 +644,77 @@ static int xc5000_set_params(struct dvb_frontend *fe,
 		}
 	}
 
-	dprintk(1, "%s() frequency=%d (Hz)\n", __func__, params->frequency);
+	dprintk(1, "%s() frequency=%d (Hz)\n", __func__, freq);
 
-	if (fe->ops.info.type == FE_ATSC) {
-		dprintk(1, "%s() ATSC\n", __func__);
-		switch (params->u.vsb.modulation) {
-		case VSB_8:
-		case VSB_16:
-			dprintk(1, "%s() VSB modulation\n", __func__);
-			priv->rf_mode = XC_RF_MODE_AIR;
-			priv->freq_hz = params->frequency - 1750000;
-			priv->bandwidth = BANDWIDTH_6_MHZ;
-			priv->video_standard = DTV6;
-			break;
-		case QAM_64:
-		case QAM_256:
-		case QAM_AUTO:
-			dprintk(1, "%s() QAM modulation\n", __func__);
-			priv->rf_mode = XC_RF_MODE_CABLE;
-			priv->freq_hz = params->frequency - 1750000;
-			priv->bandwidth = BANDWIDTH_6_MHZ;
-			priv->video_standard = DTV6;
-			break;
-		default:
-			return -EINVAL;
-		}
-	} else if (fe->ops.info.type == FE_OFDM) {
+	switch (delsys) {
+	case SYS_ATSC:
+		dprintk(1, "%s() VSB modulation\n", __func__);
+		priv->rf_mode = XC_RF_MODE_AIR;
+		priv->freq_hz = freq - 1750000;
+		priv->bandwidth = BANDWIDTH_6_MHZ;
+		priv->video_standard = DTV6;
+		break;
+	case SYS_DVBC_ANNEX_B:
+		dprintk(1, "%s() QAM modulation\n", __func__);
+		priv->rf_mode = XC_RF_MODE_CABLE;
+		priv->freq_hz = freq - 1750000;
+		priv->bandwidth = BANDWIDTH_6_MHZ;
+		priv->video_standard = DTV6;
+		break;
+	case SYS_DVBT:
+	case SYS_DVBT2:
 		dprintk(1, "%s() OFDM\n", __func__);
-		switch (params->u.ofdm.bandwidth) {
-		case BANDWIDTH_6_MHZ:
+		switch (bw) {
+		case 6000000:
 			priv->bandwidth = BANDWIDTH_6_MHZ;
 			priv->video_standard = DTV6;
-			priv->freq_hz = params->frequency - 1750000;
+			priv->freq_hz = freq - 1750000;
 			break;
-		case BANDWIDTH_7_MHZ:
+		case 7000000:
 			priv->bandwidth = BANDWIDTH_7_MHZ;
 			priv->video_standard = DTV7;
-			priv->freq_hz = params->frequency - 2250000;
+			priv->freq_hz = freq - 2250000;
 			break;
-		case BANDWIDTH_8_MHZ:
+		case 8000000:
 			priv->bandwidth = BANDWIDTH_8_MHZ;
 			priv->video_standard = DTV8;
-			priv->freq_hz = params->frequency - 2750000;
+			priv->freq_hz = freq - 2750000;
 			break;
 		default:
 			printk(KERN_ERR "xc5000 bandwidth not set!\n");
 			return -EINVAL;
 		}
 		priv->rf_mode = XC_RF_MODE_AIR;
-	} else if (fe->ops.info.type == FE_QAM) {
-		switch (params->u.qam.modulation) {
-		case QAM_256:
-		case QAM_AUTO:
-		case QAM_16:
-		case QAM_32:
-		case QAM_64:
-		case QAM_128:
-			dprintk(1, "%s() QAM modulation\n", __func__);
-			priv->rf_mode = XC_RF_MODE_CABLE;
-			/*
-			 * Using a higher bandwidth at the tuner filter may
-			 * allow inter-carrier interference.
-			 * So, determine the minimal channel spacing, in order
-			 * to better adjust the tuner filter.
-			 * According with ITU-T J.83, the bandwidth is given by:
-			 * bw = Simbol Rate * (1 + roll_off), where the roll_off
-			 * is equal to 0.15 for Annex A, and 0.13 for annex C
-			 */
-			if (fe->dtv_property_cache.rolloff == ROLLOFF_13)
-				bw = (params->u.qam.symbol_rate * 113) / 100;
-			else
-				bw = (params->u.qam.symbol_rate * 115) / 100;
-			if (bw <= 6000000) {
-				priv->bandwidth = BANDWIDTH_6_MHZ;
-				priv->video_standard = DTV6;
-				priv->freq_hz = params->frequency - 1750000;
-			} else if (bw <= 7000000) {
-				priv->bandwidth = BANDWIDTH_7_MHZ;
-				priv->video_standard = DTV7;
-				priv->freq_hz = params->frequency - 2250000;
-			} else {
-				priv->bandwidth = BANDWIDTH_8_MHZ;
-				priv->video_standard = DTV7_8;
-				priv->freq_hz = params->frequency - 2750000;
-			}
-			dprintk(1, "%s() Bandwidth %dMHz (%d)\n", __func__,
-				BANDWIDTH_6_MHZ ? 6: 8, bw);
-			break;
-		default:
-			dprintk(1, "%s() Unsupported QAM type\n", __func__);
-			return -EINVAL;
+	case SYS_DVBC_ANNEX_A:
+	case SYS_DVBC_ANNEX_C:
+		dprintk(1, "%s() QAM modulation\n", __func__);
+		priv->rf_mode = XC_RF_MODE_CABLE;
+		if (bw <= 6000000) {
+			priv->bandwidth = BANDWIDTH_6_MHZ;
+			priv->video_standard = DTV6;
+			priv->freq_hz = freq - 1750000;
+			b = 6;
+		} else if (bw <= 7000000) {
+			priv->bandwidth = BANDWIDTH_7_MHZ;
+			priv->video_standard = DTV7;
+			priv->freq_hz = freq - 2250000;
+			b = 7;
+		} else {
+			priv->bandwidth = BANDWIDTH_8_MHZ;
+			priv->video_standard = DTV7_8;
+			priv->freq_hz = freq - 2750000;
+			b = 8;
 		}
-	} else {
-		printk(KERN_ERR "xc5000 modulation type not supported!\n");
+		dprintk(1, "%s() Bandwidth %dMHz (%d)\n", __func__,
+			b, bw);
+		break;
+	default:
+		printk(KERN_ERR "xc5000: delivery system is not supported!\n");
 		return -EINVAL;
 	}
 
-	dprintk(1, "%s() frequency=%d (compensated)\n",
-		__func__, priv->freq_hz);
+	dprintk(1, "%s() frequency=%d (compensated to %d)\n",
+		__func__, freq, priv->freq_hz);
 
 	ret = xc_SetSignalSource(priv, priv->rf_mode);
 	if (ret != XC_RESULT_SUCCESS) {
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 821b2250ec70..66537b10132c 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -1011,7 +1011,7 @@ static void dtv_property_dump(struct dtv_property *tvp)
 
 static int is_legacy_delivery_system(fe_delivery_system_t s)
 {
-	if((s == SYS_UNDEFINED) || (s == SYS_DVBC_ANNEX_AC) ||
+	if((s == SYS_UNDEFINED) || (s == SYS_DVBC_ANNEX_A) ||
 	   (s == SYS_DVBC_ANNEX_B) || (s == SYS_DVBT) || (s == SYS_DVBS) ||
 	   (s == SYS_ATSC))
 		return 1;
@@ -1032,8 +1032,7 @@ static void dtv_property_cache_init(struct dvb_frontend *fe,
 		c->delivery_system = SYS_DVBS;
 		break;
 	case FE_QAM:
-		c->delivery_system = SYS_DVBC_ANNEX_AC;
-		c->rolloff = ROLLOFF_15; /* implied for Annex A */
+		c->delivery_system = SYS_DVBC_ANNEX_A;
 		break;
 	case FE_OFDM:
 		c->delivery_system = SYS_DVBT;
@@ -1144,9 +1143,10 @@ static void dtv_property_legacy_params_sync(struct dvb_frontend *fe)
  */
 static void dtv_property_adv_params_sync(struct dvb_frontend *fe)
 {
-	const struct dtv_frontend_properties *c = &fe->dtv_property_cache;
+	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
 	struct dvb_frontend_parameters *p = &fepriv->parameters_in;
+	u32 rolloff = 0;
 
 	p->frequency = c->frequency;
 	p->inversion = c->inversion;
@@ -1178,6 +1178,23 @@ static void dtv_property_adv_params_sync(struct dvb_frontend *fe)
 		else
 			p->u.ofdm.bandwidth = BANDWIDTH_AUTO;
 	}
+
+	/*
+	 * On DVB-C, the bandwidth is a function of roll-off and symbol rate.
+	 * The bandwidth is required for DVB-C tuners, in order to avoid
+	 * inter-channel noise. Instead of estimating the minimal required
+	 * bandwidth on every single driver, calculates it here and fills
+	 * it at the cache bandwidth parameter.
+	 * While not officially supported, a side effect of handling it at
+	 * the cache level is that a program could retrieve the bandwidth
+	 * via DTV_BANDWIDTH_HZ, wich may be useful for test programs.
+	 */
+	if (c->delivery_system == SYS_DVBC_ANNEX_A)
+		rolloff = 115;
+	if (c->delivery_system == SYS_DVBC_ANNEX_C)
+		rolloff = 113;
+	if (rolloff)
+		c->bandwidth_hz = (c->symbol_rate * rolloff) / 100;
 }
 
 static void dtv_property_cache_submit(struct dvb_frontend *fe)
diff --git a/drivers/media/dvb/frontends/drxk_hard.c b/drivers/media/dvb/frontends/drxk_hard.c
index 038e470bf039..a2c819651933 100644
--- a/drivers/media/dvb/frontends/drxk_hard.c
+++ b/drivers/media/dvb/frontends/drxk_hard.c
@@ -6215,6 +6215,7 @@ static int drxk_set_parameters(struct dvb_frontend *fe,
 			       struct dvb_frontend_parameters *p)
 {
 	struct drxk_state *state = fe->demodulator_priv;
+	u32 delsys  = fe->dtv_property_cache.delivery_system;
 	u32 IF;
 
 	dprintk(1, "\n");
@@ -6225,11 +6226,15 @@ static int drxk_set_parameters(struct dvb_frontend *fe,
 		return -EINVAL;
 	}
 
-	if (fe->ops.info.type == FE_QAM) {
-		if (fe->dtv_property_cache.rolloff == ROLLOFF_13)
-			state->m_itut_annex_c = true;
-		else
-			state->m_itut_annex_c = false;
+	switch (delsys) {
+	case SYS_DVBC_ANNEX_A:
+		state->m_itut_annex_c = false;
+		break;
+	case SYS_DVBC_ANNEX_C:
+		state->m_itut_annex_c = true;
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	if (fe->ops.i2c_gate_ctrl)
diff --git a/drivers/media/dvb/frontends/tda18271c2dd.c b/drivers/media/dvb/frontends/tda18271c2dd.c
index b66ca29704fc..0f8e9622bc96 100644
--- a/drivers/media/dvb/frontends/tda18271c2dd.c
+++ b/drivers/media/dvb/frontends/tda18271c2dd.c
@@ -1130,50 +1130,44 @@ static int set_params(struct dvb_frontend *fe,
 	struct tda_state *state = fe->tuner_priv;
 	int status = 0;
 	int Standard;
-	u32 bw;
+	u32 bw = fe->dtv_property_cache.bandwidth_hz;
+	u32 delsys  = fe->dtv_property_cache.delivery_system;
 
-	state->m_Frequency = params->frequency;
+	state->m_Frequency = fe->dtv_property_cache.frequency;
 
-	if (fe->ops.info.type == FE_OFDM)
-		switch (params->u.ofdm.bandwidth) {
-		case BANDWIDTH_6_MHZ:
+	switch (delsys) {
+	case  SYS_DVBT:
+	case  SYS_DVBT2:
+		switch (bw) {
+		case 6000000:
 			Standard = HF_DVBT_6MHZ;
 			break;
-		case BANDWIDTH_7_MHZ:
+		case 7000000:
 			Standard = HF_DVBT_7MHZ;
 			break;
-		default:
-		case BANDWIDTH_8_MHZ:
+		case 8000000:
 			Standard = HF_DVBT_8MHZ;
 			break;
+		default:
+			return -EINVAL;
 		}
-	else if (fe->ops.info.type == FE_QAM) {
-		/*
-		 * Using a higher bandwidth at the tuner filter may
-		 * allow inter-carrier interference.
-		 * So, determine the minimal channel spacing, in order
-		 * to better adjust the tuner filter.
-		 * According with ITU-T J.83, the bandwidth is given by:
-		 * bw = Simbol Rate * (1 + roll_off), where the roll_off
-		 * is equal to 0.15 for Annex A, and 0.13 for annex C
-		 */
-		if (fe->dtv_property_cache.rolloff == ROLLOFF_13)
-			bw = (params->u.qam.symbol_rate * 113) / 100;
-		else
-			bw = (params->u.qam.symbol_rate * 115) / 100;
+	case SYS_DVBC_ANNEX_A:
+	case SYS_DVBC_ANNEX_C:
 		if (bw <= 6000000)
 			Standard = HF_DVBC_6MHZ;
 		else if (bw <= 7000000)
 			Standard = HF_DVBC_7MHZ;
 		else
 			Standard = HF_DVBC_8MHZ;
-	} else
+	default:
 		return -EINVAL;
+	}
 	do {
-		status = RFTrackingFiltersCorrection(state, params->frequency);
+		status = RFTrackingFiltersCorrection(state, state->m_Frequency);
 		if (status < 0)
 			break;
-		status = ChannelConfiguration(state, params->frequency, Standard);
+		status = ChannelConfiguration(state, state->m_Frequency,
+					      Standard);
 		if (status < 0)
 			break;
 
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index b2a939f8f1e2..a3c762383f88 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -331,8 +331,6 @@ typedef enum fe_rolloff {
 	ROLLOFF_20,
 	ROLLOFF_25,
 	ROLLOFF_AUTO,
-	ROLLOFF_15,	/* DVB-C Annex A */
-	ROLLOFF_13,	/* DVB-C Annex C */
 } fe_rolloff_t;
 
 typedef enum fe_delivery_system {
-- 
cgit v1.2.3


From ab56222a32b9dbaae19c1d37f07b0ac4fc3c27ec Mon Sep 17 00:00:00 2001
From: Vijay Subramanian <subramanian.vijay@gmail.com>
Date: Tue, 20 Dec 2011 13:23:24 +0000
Subject: tcp: Replace constants with #define macros

to record the state of SACK/FACK and DSACK for better readability and maintenance.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   | 5 +++++
 include/net/tcp.h     | 4 ++--
 net/ipv4/syncookies.c | 2 +-
 net/ipv4/tcp_input.c  | 6 +++---
 4 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7f59ee946983..46a85c9e1f25 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -238,6 +238,11 @@ struct tcp_sack_block {
 	u32	end_seq;
 };
 
+/*These are used to set the sack_ok field in struct tcp_options_received */
+#define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
+#define TCP_FACK_ENABLED  (1 << 1)   /*1 = FACK is enabled locally*/
+#define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
+
 struct tcp_options_received {
 /*	PAWS/RTTM data	*/
 	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a4f52e154843..0118ea999f67 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -773,12 +773,12 @@ static inline int tcp_is_reno(const struct tcp_sock *tp)
 
 static inline int tcp_is_fack(const struct tcp_sock *tp)
 {
-	return tp->rx_opt.sack_ok & 2;
+	return tp->rx_opt.sack_ok & TCP_FACK_ENABLED;
 }
 
 static inline void tcp_enable_fack(struct tcp_sock *tp)
 {
-	tp->rx_opt.sack_ok |= 2;
+	tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
 }
 
 static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 90f6544c13e2..51fdbb490437 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -245,7 +245,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
 	if (!sysctl_tcp_timestamps)
 		return false;
 
-	tcp_opt->sack_ok = (options >> 4) & 0x1;
+	tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0;
 	*ecn_ok = (options >> 5) & 1;
 	if (*ecn_ok && !sysctl_tcp_ecn)
 		return false;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f131d92d25ee..2877c3e09587 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -865,13 +865,13 @@ static void tcp_disable_fack(struct tcp_sock *tp)
 	/* RFC3517 uses different metric in lost marker => reset on change */
 	if (tcp_is_fack(tp))
 		tp->lost_skb_hint = NULL;
-	tp->rx_opt.sack_ok &= ~2;
+	tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
 }
 
 /* Take a notice that peer is sending D-SACKs */
 static void tcp_dsack_seen(struct tcp_sock *tp)
 {
-	tp->rx_opt.sack_ok |= 4;
+	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
 }
 
 /* Initialize metrics on socket. */
@@ -3878,7 +3878,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
 			case TCPOPT_SACK_PERM:
 				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
 				    !estab && sysctl_tcp_sack) {
-					opt_rx->sack_ok = 1;
+					opt_rx->sack_ok = TCP_SACK_SEEN;
 					tcp_sack_reset(opt_rx);
 				}
 				break;
-- 
cgit v1.2.3


From f07fdec50a13f134ea9608c8fb3f6408c58ef55e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 13 Dec 2011 13:20:54 +0100
Subject: lockdep/waitqueues: Add better annotation

 -> #2 (&tty->write_wait){-.-...}:

is a lot more informative than:

 -> #2 (key#19){-.....}:

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-8zpopbny51023rdb0qq67eye@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/wait.h | 4 ++--
 kernel/wait.c        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 3efc9f3f43a0..a9ce45e8501c 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,13 +77,13 @@ struct task_struct;
 #define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
 	{ .flags = word, .bit_nr = bit, }
 
-extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
+extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
 
 #define init_waitqueue_head(q)				\
 	do {						\
 		static struct lock_class_key __key;	\
 							\
-		__init_waitqueue_head((q), &__key);	\
+		__init_waitqueue_head((q), #q, &__key);	\
 	} while (0)
 
 #ifdef CONFIG_LOCKDEP
diff --git a/kernel/wait.c b/kernel/wait.c
index 26fa7797f90f..7fdd9eaca2c3 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,10 +10,10 @@
 #include <linux/wait.h>
 #include <linux/hash.h>
 
-void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
+void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
 {
 	spin_lock_init(&q->lock);
-	lockdep_set_class(&q->lock, key);
+	lockdep_set_class_and_name(&q->lock, key, name);
 	INIT_LIST_HEAD(&q->task_list);
 }
 
-- 
cgit v1.2.3


From c37e17497e01fc0f5d2d6feb5723b210b3ab8890 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Sun, 11 Dec 2011 00:28:52 +0100
Subject: perf events: Add PERF_COUNT_HW_REF_CPU_CYCLES generic PMU event

This event counts the number of reference core cpu cycles.
Reference means that the event increments at a constant rate which
is not subject to core CPU frequency adjustments. The event may
not count when the processor is in halted (low power) state.
As such, it may not be equivalent to wall clock time. However,
when the processor is not halted state, the event keeps
a constant correlation with wall clock time.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1323559734-3488-3-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 564769cdb473..08855613ceb3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -54,6 +54,7 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
 	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
 	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
+	PERF_COUNT_HW_REF_CPU_CYCLES		= 9,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
-- 
cgit v1.2.3


From 4371ea8202e98c8ef77ca887de3b19affbb3498f Mon Sep 17 00:00:00 2001
From: Daniel Kurtz <djkurtz@chromium.org>
Date: Thu, 17 Nov 2011 19:23:50 +0800
Subject: HID: usbhid: defer LED setting to a workqueue

Defer LED setting action to a workqueue.
This is more likely to send all LED change events in a single URB.

Signed-off-by: Daniel Kurtz <djkurtz@chromium.org>
Acked-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c       | 42 ++++++++++++++++++++++++++++++++++++++
 drivers/hid/usbhid/hid-core.c | 47 +++++++++++++++++++++++++++++++++----------
 drivers/hid/usbhid/usbhid.h   |  2 ++
 include/linux/hid.h           |  2 ++
 4 files changed, 82 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index b9b8c75a6f9a..c6ee632bfd68 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -976,6 +976,48 @@ int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int
 }
 EXPORT_SYMBOL_GPL(hidinput_find_field);
 
+struct hid_field *hidinput_get_led_field(struct hid_device *hid)
+{
+	struct hid_report *report;
+	struct hid_field *field;
+	int i, j;
+
+	list_for_each_entry(report,
+			    &hid->report_enum[HID_OUTPUT_REPORT].report_list,
+			    list) {
+		for (i = 0; i < report->maxfield; i++) {
+			field = report->field[i];
+			for (j = 0; j < field->maxusage; j++)
+				if (field->usage[j].type == EV_LED)
+					return field;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(hidinput_get_led_field);
+
+unsigned int hidinput_count_leds(struct hid_device *hid)
+{
+	struct hid_report *report;
+	struct hid_field *field;
+	int i, j;
+	unsigned int count = 0;
+
+	list_for_each_entry(report,
+			    &hid->report_enum[HID_OUTPUT_REPORT].report_list,
+			    list) {
+		for (i = 0; i < report->maxfield; i++) {
+			field = report->field[i];
+			for (j = 0; j < field->maxusage; j++)
+				if (field->usage[j].type == EV_LED &&
+				    field->value[j])
+					count += 1;
+		}
+	}
+	return count;
+}
+EXPORT_SYMBOL_GPL(hidinput_count_leds);
+
 static int hidinput_open(struct input_dev *dev)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 719f6b02fab1..5bf91dbad59d 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -602,6 +602,30 @@ void usbhid_submit_report(struct hid_device *hid, struct hid_report *report, uns
 }
 EXPORT_SYMBOL_GPL(usbhid_submit_report);
 
+/* Workqueue routine to send requests to change LEDs */
+static void hid_led(struct work_struct *work)
+{
+	struct usbhid_device *usbhid =
+		container_of(work, struct usbhid_device, led_work);
+	struct hid_device *hid = usbhid->hid;
+	struct hid_field *field;
+	unsigned long flags;
+
+	field = hidinput_get_led_field(hid);
+	if (!field) {
+		hid_warn(hid, "LED event field not found\n");
+		return;
+	}
+
+	spin_lock_irqsave(&usbhid->lock, flags);
+	if (!test_bit(HID_DISCONNECTED, &usbhid->iofl)) {
+		usbhid->ledcount = hidinput_count_leds(hid);
+		hid_dbg(usbhid->hid, "New ledcount = %u\n", usbhid->ledcount);
+		__usbhid_submit_report(hid, field->report, USB_DIR_OUT);
+	}
+	spin_unlock_irqrestore(&usbhid->lock, flags);
+}
+
 static int usb_hidinput_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
@@ -621,17 +645,15 @@ static int usb_hidinput_input_event(struct input_dev *dev, unsigned int type, un
 		return -1;
 	}
 
+	spin_lock_irqsave(&usbhid->lock, flags);
 	hid_set_field(field, offset, value);
-	if (value) {
-		spin_lock_irqsave(&usbhid->lock, flags);
-		usbhid->ledcount++;
-		spin_unlock_irqrestore(&usbhid->lock, flags);
-	} else {
-		spin_lock_irqsave(&usbhid->lock, flags);
-		usbhid->ledcount--;
-		spin_unlock_irqrestore(&usbhid->lock, flags);
-	}
-	usbhid_submit_report(hid, field->report, USB_DIR_OUT);
+	spin_unlock_irqrestore(&usbhid->lock, flags);
+
+	/*
+	 * Defer performing requested LED action.
+	 * This is more likely gather all LED changes into a single URB.
+	 */
+	schedule_work(&usbhid->led_work);
 
 	return 0;
 }
@@ -1126,7 +1148,7 @@ static void usbhid_stop(struct hid_device *hid)
 		return;
 
 	clear_bit(HID_STARTED, &usbhid->iofl);
-	spin_lock_irq(&usbhid->lock);	/* Sync with error handler */
+	spin_lock_irq(&usbhid->lock);	/* Sync with error and led handlers */
 	set_bit(HID_DISCONNECTED, &usbhid->iofl);
 	spin_unlock_irq(&usbhid->lock);
 	usb_kill_urb(usbhid->urbin);
@@ -1260,6 +1282,8 @@ static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *
 	setup_timer(&usbhid->io_retry, hid_retry_timeout, (unsigned long) hid);
 	spin_lock_init(&usbhid->lock);
 
+	INIT_WORK(&usbhid->led_work, hid_led);
+
 	ret = hid_add_device(hid);
 	if (ret) {
 		if (ret != -ENODEV)
@@ -1292,6 +1316,7 @@ static void hid_cancel_delayed_stuff(struct usbhid_device *usbhid)
 {
 	del_timer_sync(&usbhid->io_retry);
 	cancel_work_sync(&usbhid->reset_work);
+	cancel_work_sync(&usbhid->led_work);
 }
 
 static void hid_cease_io(struct usbhid_device *usbhid)
diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h
index 2d8957c11d2d..cb8f703efde5 100644
--- a/drivers/hid/usbhid/usbhid.h
+++ b/drivers/hid/usbhid/usbhid.h
@@ -96,6 +96,8 @@ struct usbhid_device {
 	struct work_struct reset_work;                                  /* Task context for resets */
 	wait_queue_head_t wait;						/* For sleeping */
 	int ledcount;							/* counting the number of active leds */
+
+	struct work_struct led_work;					/* Task context for setting LEDs */
 };
 
 #define	hid_to_usb_dev(hid_dev) \
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 7f344c3da767..999a54c72b20 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -727,6 +727,8 @@ extern void hidinput_disconnect(struct hid_device *);
 int hid_set_field(struct hid_field *, unsigned, __s32);
 int hid_input_report(struct hid_device *, int type, u8 *, int, int);
 int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field);
+struct hid_field *hidinput_get_led_field(struct hid_device *hid);
+unsigned int hidinput_count_leds(struct hid_device *hid);
 void hid_output_report(struct hid_report *report, __u8 *data);
 struct hid_device *hid_allocate_device(void);
 struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id);
-- 
cgit v1.2.3


From 715a3e41e78a40e1e711298667435d5a2cef1972 Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Mon, 19 Dec 2011 19:39:15 +0100
Subject: usb: gadget: s3c-hsudc: move platform_data struct to global header

Gadget drivers should be compilable on all architectures.
This patch removes one dependency on architecture-specific code.

Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 arch/arm/mach-s3c2416/mach-smdk2416.c    |  1 +
 arch/arm/plat-samsung/devs.c             |  1 +
 arch/arm/plat-samsung/include/plat/udc.h | 15 +-------------
 drivers/usb/gadget/s3c-hsudc.c           |  2 +-
 include/linux/platform_data/s3c-hsudc.h  | 34 ++++++++++++++++++++++++++++++++
 5 files changed, 38 insertions(+), 15 deletions(-)
 create mode 100644 include/linux/platform_data/s3c-hsudc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-s3c2416/mach-smdk2416.c b/arch/arm/mach-s3c2416/mach-smdk2416.c
index a9eee531ca76..6345bcbf5c73 100644
--- a/arch/arm/mach-s3c2416/mach-smdk2416.c
+++ b/arch/arm/mach-s3c2416/mach-smdk2416.c
@@ -50,6 +50,7 @@
 #include <plat/nand.h>
 #include <plat/sdhci.h>
 #include <plat/udc.h>
+#include <linux/platform_data/s3c-hsudc.h>
 
 #include <plat/regs-fb-v4.h>
 #include <plat/fb.h>
diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index 4ca8b571f971..92b4c025d37a 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -29,6 +29,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mmc/host.h>
 #include <linux/ioport.h>
+#include <linux/platform_data/s3c-hsudc.h>
 
 #include <asm/irq.h>
 #include <asm/pmu.h>
diff --git a/arch/arm/plat-samsung/include/plat/udc.h b/arch/arm/plat-samsung/include/plat/udc.h
index 8c22d586befb..de8e2288a509 100644
--- a/arch/arm/plat-samsung/include/plat/udc.h
+++ b/arch/arm/plat-samsung/include/plat/udc.h
@@ -37,20 +37,7 @@ struct s3c2410_udc_mach_info {
 
 extern void __init s3c24xx_udc_set_platdata(struct s3c2410_udc_mach_info *);
 
-/**
- * s3c24xx_hsudc_platdata - Platform data for USB High-Speed gadget controller.
- * @epnum: Number of endpoints to be instantiated by the controller driver.
- * @gpio_init: Platform specific USB related GPIO initialization.
- * @gpio_uninit: Platform specific USB releted GPIO uninitialzation.
- *
- * Representation of platform data for the S3C24XX USB 2.0 High Speed gadget
- * controllers.
- */
-struct s3c24xx_hsudc_platdata {
-	unsigned int	epnum;
-	void		(*gpio_init)(void);
-	void		(*gpio_uninit)(void);
-};
+struct s3c24xx_hsudc_platdata;
 
 extern void __init s3c24xx_hsudc_set_platdata(struct s3c24xx_hsudc_platdata *pd);
 
diff --git a/drivers/usb/gadget/s3c-hsudc.c b/drivers/usb/gadget/s3c-hsudc.c
index f398b8590f9c..2f9f8409031b 100644
--- a/drivers/usb/gadget/s3c-hsudc.c
+++ b/drivers/usb/gadget/s3c-hsudc.c
@@ -28,9 +28,9 @@
 #include <linux/usb/gadget.h>
 #include <linux/usb/otg.h>
 #include <linux/prefetch.h>
+#include <linux/platform_data/s3c-hsudc.h>
 
 #include <mach/regs-s3c2443-clock.h>
-#include <plat/udc.h>
 
 #define S3C_HSUDC_REG(x)	(x)
 
diff --git a/include/linux/platform_data/s3c-hsudc.h b/include/linux/platform_data/s3c-hsudc.h
new file mode 100644
index 000000000000..6fa109339bf9
--- /dev/null
+++ b/include/linux/platform_data/s3c-hsudc.h
@@ -0,0 +1,34 @@
+/*
+ * S3C24XX USB 2.0 High-speed USB controller gadget driver
+ *
+ * Copyright (c) 2010 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ *
+ * The S3C24XX USB 2.0 high-speed USB controller supports upto 9 endpoints.
+ * Each endpoint can be configured as either in or out endpoint. Endpoints
+ * can be configured for Bulk or Interrupt transfer mode.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef __LINUX_USB_S3C_HSUDC_H
+#define __LINUX_USB_S3C_HSUDC_H
+
+/**
+ * s3c24xx_hsudc_platdata - Platform data for USB High-Speed gadget controller.
+ * @epnum: Number of endpoints to be instantiated by the controller driver.
+ * @gpio_init: Platform specific USB related GPIO initialization.
+ * @gpio_uninit: Platform specific USB releted GPIO uninitialzation.
+ *
+ * Representation of platform data for the S3C24XX USB 2.0 High Speed gadget
+ * controllers.
+ */
+struct s3c24xx_hsudc_platdata {
+	unsigned int	epnum;
+	void		(*gpio_init)(void);
+	void		(*gpio_uninit)(void);
+};
+
+#endif	/* __LINUX_USB_S3C_HSUDC_H */
-- 
cgit v1.2.3


From 898c60867827796f0f6f84e5de446098d776c866 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Tue, 22 Nov 2011 11:11:50 +0200
Subject: usb: gadget: introduce support for sg lists

Some controllers support scatter/gather transfers
and that might be very useful for some gadget drivers.

This means that we can make use of larger buffer
allocations which means we will have less completion
IRQs overtime, thus improving the perceived performance.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/gadget.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 4d99805bcbb7..da653b5c7134 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/scatterlist.h>
 #include <linux/types.h>
 #include <linux/usb/ch9.h>
 
@@ -32,6 +33,9 @@ struct usb_ep;
  * @dma: DMA address corresponding to 'buf'.  If you don't set this
  *	field, and the usb controller needs one, it is responsible
  *	for mapping and unmapping the buffer.
+ * @sg: a scatterlist for SG-capable controllers.
+ * @num_sgs: number of SG entries
+ * @num_mapped_sgs: number of SG entries mapped to DMA (internal)
  * @length: Length of that data
  * @stream_id: The stream id, when USB3.0 bulk streams are being used
  * @no_interrupt: If true, hints that no completion irq is needed.
@@ -88,6 +92,10 @@ struct usb_request {
 	unsigned		length;
 	dma_addr_t		dma;
 
+	struct scatterlist	*sg;
+	unsigned		num_sgs;
+	unsigned		num_mapped_sgs;
+
 	unsigned		stream_id:16;
 	unsigned		no_interrupt:1;
 	unsigned		zero:1;
@@ -479,6 +487,7 @@ struct usb_gadget_ops {
  * @speed: Speed of current connection to USB host.
  * @max_speed: Maximal speed the UDC can handle.  UDC must support this
  *      and all slower speeds.
+ * @sg_supported: true if we can handle scatter-gather
  * @is_otg: True if the USB device port uses a Mini-AB jack, so that the
  *	gadget driver must provide a USB OTG descriptor.
  * @is_a_peripheral: False unless is_otg, the "A" end of a USB cable
@@ -519,6 +528,7 @@ struct usb_gadget {
 	struct list_head		ep_list;	/* of usb_ep */
 	enum usb_device_speed		speed;
 	enum usb_device_speed		max_speed;
+	unsigned			sg_supported:1;
 	unsigned			is_otg:1;
 	unsigned			is_a_peripheral:1;
 	unsigned			b_hnp_enable:1;
-- 
cgit v1.2.3


From 45959ee7aa645815a5ce303a0ea1e48a21e67c6a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 12 Dec 2011 15:22:41 -0500
Subject: ftrace: Do not function trace inlined functions

When gcc inlines a function, it does not mark it with the mcount
prologue, which in turn means that inlined functions are not traced
by the function tracer. But if CONFIG_OPTIMIZE_INLINING is set, then
gcc is allowed not to inline a function that is marked inline.

Depending on the options and the compiler, a function may or may
not be traced by the function tracer, depending on whether gcc
decides to inline a function or not. This has caused several
problems in the pass becaues gcc is not always consistent with
what it decides to inline between different gcc versions.

Some places should not be traced (like paravirt native_* functions)
and these are mostly marked as inline. When gcc decides not to
inline the function, and if that function should not be traced, then
the ftrace function tracer will suddenly break when it use to work
fine. This becomes even harder to debug when different versions of
gcc will not inline that function, making the same kernel and config
work for some gcc versions and not work for others.

By making all functions marked inline to not be traced will remove
the ambiguity that gcc adds when it comes to tracing functions marked
inline. All gcc versions will be consistent with what functions are
traced and having volatile working code will be removed.

Note, only the inline macro when CONFIG_OPTIMIZE_INLINING is set needs
to have notrace added, as the attribute __always_inline will force
the function to be inlined and then not traced.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/compiler-gcc.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 59e4028e833d..3fd17c249221 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -50,6 +50,11 @@
 # define inline		inline		__attribute__((always_inline))
 # define __inline__	__inline__	__attribute__((always_inline))
 # define __inline	__inline	__attribute__((always_inline))
+#else
+/* A lot of inline functions can cause havoc with function tracing */
+# define inline		inline		notrace
+# define __inline__	__inline__	notrace
+# define __inline	__inline	notrace
 #endif
 
 #define __deprecated			__attribute__((deprecated))
-- 
cgit v1.2.3


From c88fd8634ea68e74c7d19fd2621b4078fd22864c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 16 Aug 2011 09:53:39 -0400
Subject: ftrace: Allow archs to modify code without stop machine

The stop machine method to modify all functions in the kernel
(some 20,000 of them) is the safest way to do so across all archs.
But some archs may not need this big hammer approach to modify code
on SMP machines, and can simply just update the code it needs.

Adding a weak function arch_ftrace_update_code() that now does the
stop machine, will also let any arch override this method.

If the arch needs to check the system and then decide if it can
avoid stop machine, it can still call ftrace_run_stop_machine() to
use the old method.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  31 ++++++
 kernel/trace/ftrace.c  | 253 +++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 246 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 26eafcef75be..4f0b6fec379d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -133,6 +133,8 @@ struct ftrace_func_command {
 int ftrace_arch_code_modify_prepare(void);
 int ftrace_arch_code_modify_post_process(void);
 
+void ftrace_bug(int err, unsigned long ip);
+
 struct seq_file;
 
 struct ftrace_probe_ops {
@@ -190,6 +192,35 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
 
+enum {
+	FTRACE_UPDATE_CALLS		= (1 << 0),
+	FTRACE_DISABLE_CALLS		= (1 << 1),
+	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
+	FTRACE_START_FUNC_RET		= (1 << 3),
+	FTRACE_STOP_FUNC_RET		= (1 << 4),
+};
+
+enum {
+	FTRACE_UPDATE_IGNORE,
+	FTRACE_UPDATE_MAKE_CALL,
+	FTRACE_UPDATE_MAKE_NOP,
+};
+
+void arch_ftrace_update_code(int command);
+
+struct ftrace_rec_iter;
+
+struct ftrace_rec_iter *ftrace_rec_iter_start(void);
+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
+
+int ftrace_update_record(struct dyn_ftrace *rec, int enable);
+int ftrace_test_record(struct dyn_ftrace *rec, int enable);
+void ftrace_run_stop_machine(int command);
+int ftrace_location(unsigned long ip);
+
+extern ftrace_func_t ftrace_trace_function;
+
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void *data);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 25b4f4da0fe8..655b432fb890 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -947,13 +947,6 @@ struct ftrace_func_probe {
 	struct rcu_head		rcu;
 };
 
-enum {
-	FTRACE_UPDATE_CALLS		= (1 << 0),
-	FTRACE_DISABLE_CALLS		= (1 << 1),
-	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
-	FTRACE_START_FUNC_RET		= (1 << 3),
-	FTRACE_STOP_FUNC_RET		= (1 << 4),
-};
 struct ftrace_func_entry {
 	struct hlist_node hlist;
 	unsigned long ip;
@@ -1307,6 +1300,28 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 		}				\
 	}
 
+/**
+ * ftrace_location - return true if the ip giving is a traced location
+ * @ip: the instruction pointer to check
+ *
+ * Returns 1 if @ip given is a pointer to a ftrace location.
+ * That is, the instruction that is either a NOP or call to
+ * the function tracer. It checks the ftrace internal tables to
+ * determine if the address belongs or not.
+ */
+int ftrace_location(unsigned long ip)
+{
+	struct ftrace_page *pg;
+	struct dyn_ftrace *rec;
+
+	do_for_each_ftrace_rec(pg, rec) {
+		if (rec->ip == ip)
+			return 1;
+	} while_for_each_ftrace_rec();
+
+	return 0;
+}
+
 static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 				     int filter_hash,
 				     bool inc)
@@ -1475,7 +1490,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
 		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
 }
 
-static void ftrace_bug(int failed, unsigned long ip)
+/**
+ * ftrace_bug - report and shutdown function tracer
+ * @failed: The failed type (EFAULT, EINVAL, EPERM)
+ * @ip: The address that failed
+ *
+ * The arch code that enables or disables the function tracing
+ * can call ftrace_bug() when it has detected a problem in
+ * modifying the code. @failed should be one of either:
+ * EFAULT - if the problem happens on reading the @ip address
+ * EINVAL - if what is read at @ip is not what was expected
+ * EPERM - if the problem happens on writting to the @ip address
+ */
+void ftrace_bug(int failed, unsigned long ip)
 {
 	switch (failed) {
 	case -EFAULT:
@@ -1517,15 +1544,10 @@ int ftrace_text_reserved(void *start, void *end)
 	return 0;
 }
 
-
-static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int update)
+static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 {
-	unsigned long ftrace_addr;
 	unsigned long flag = 0UL;
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
-
 	/*
 	 * If we are updating calls:
 	 *
@@ -1537,20 +1559,74 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int update)
 	 * If we are disabling calls, then disable all records that
 	 * are enabled.
 	 */
-	if (update && (rec->flags & ~FTRACE_FL_MASK))
+	if (enable && (rec->flags & ~FTRACE_FL_MASK))
 		flag = FTRACE_FL_ENABLED;
 
 	/* If the state of this record hasn't changed, then do nothing */
 	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
-		return 0;
+		return FTRACE_UPDATE_IGNORE;
 
 	if (flag) {
-		rec->flags |= FTRACE_FL_ENABLED;
+		if (update)
+			rec->flags |= FTRACE_FL_ENABLED;
+		return FTRACE_UPDATE_MAKE_CALL;
+	}
+
+	if (update)
+		rec->flags &= ~FTRACE_FL_ENABLED;
+
+	return FTRACE_UPDATE_MAKE_NOP;
+}
+
+/**
+ * ftrace_update_record, set a record that now is tracing or not
+ * @rec: the record to update
+ * @enable: set to 1 if the record is tracing, zero to force disable
+ *
+ * The records that represent all functions that can be traced need
+ * to be updated when tracing has been enabled.
+ */
+int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+{
+	return ftrace_check_record(rec, enable, 1);
+}
+
+/**
+ * ftrace_test_record, check if the record has been enabled or not
+ * @rec: the record to test
+ * @enable: set to 1 to check if enabled, 0 if it is disabled
+ *
+ * The arch code may need to test if a record is already set to
+ * tracing to determine how to modify the function code that it
+ * represents.
+ */
+int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+{
+	return ftrace_check_record(rec, enable, 0);
+}
+
+static int
+__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+{
+	unsigned long ftrace_addr;
+	int ret;
+
+	ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+	ret = ftrace_update_record(rec, enable);
+
+	switch (ret) {
+	case FTRACE_UPDATE_IGNORE:
+		return 0;
+
+	case FTRACE_UPDATE_MAKE_CALL:
 		return ftrace_make_call(rec, ftrace_addr);
+
+	case FTRACE_UPDATE_MAKE_NOP:
+		return ftrace_make_nop(NULL, rec, ftrace_addr);
 	}
 
-	rec->flags &= ~FTRACE_FL_ENABLED;
-	return ftrace_make_nop(NULL, rec, ftrace_addr);
+	return -1; /* unknow ftrace bug */
 }
 
 static void ftrace_replace_code(int update)
@@ -1576,6 +1652,78 @@ static void ftrace_replace_code(int update)
 	} while_for_each_ftrace_rec();
 }
 
+struct ftrace_rec_iter {
+	struct ftrace_page	*pg;
+	int			index;
+};
+
+/**
+ * ftrace_rec_iter_start, start up iterating over traced functions
+ *
+ * Returns an iterator handle that is used to iterate over all
+ * the records that represent address locations where functions
+ * are traced.
+ *
+ * May return NULL if no records are available.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_start(void)
+{
+	/*
+	 * We only use a single iterator.
+	 * Protected by the ftrace_lock mutex.
+	 */
+	static struct ftrace_rec_iter ftrace_rec_iter;
+	struct ftrace_rec_iter *iter = &ftrace_rec_iter;
+
+	iter->pg = ftrace_pages_start;
+	iter->index = 0;
+
+	/* Could have empty pages */
+	while (iter->pg && !iter->pg->index)
+		iter->pg = iter->pg->next;
+
+	if (!iter->pg)
+		return NULL;
+
+	return iter;
+}
+
+/**
+ * ftrace_rec_iter_next, get the next record to process.
+ * @iter: The handle to the iterator.
+ *
+ * Returns the next iterator after the given iterator @iter.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
+{
+	iter->index++;
+
+	if (iter->index >= iter->pg->index) {
+		iter->pg = iter->pg->next;
+		iter->index = 0;
+
+		/* Could have empty pages */
+		while (iter->pg && !iter->pg->index)
+			iter->pg = iter->pg->next;
+	}
+
+	if (!iter->pg)
+		return NULL;
+
+	return iter;
+}
+
+/**
+ * ftrace_rec_iter_record, get the record at the iterator location
+ * @iter: The current iterator location
+ *
+ * Returns the record that the current @iter is at.
+ */
+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
+{
+	return &iter->pg->records[iter->index];
+}
+
 static int
 ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
 {
@@ -1617,12 +1765,6 @@ static int __ftrace_modify_code(void *data)
 {
 	int *command = data;
 
-	/*
-	 * Do not call function tracer while we update the code.
-	 * We are in stop machine, no worrying about races.
-	 */
-	function_trace_stop++;
-
 	if (*command & FTRACE_UPDATE_CALLS)
 		ftrace_replace_code(1);
 	else if (*command & FTRACE_DISABLE_CALLS)
@@ -1636,21 +1778,33 @@ static int __ftrace_modify_code(void *data)
 	else if (*command & FTRACE_STOP_FUNC_RET)
 		ftrace_disable_ftrace_graph_caller();
 
-#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
-	/*
-	 * For archs that call ftrace_test_stop_func(), we must
-	 * wait till after we update all the function callers
-	 * before we update the callback. This keeps different
-	 * ops that record different functions from corrupting
-	 * each other.
-	 */
-	__ftrace_trace_function = __ftrace_trace_function_delay;
-#endif
-	function_trace_stop--;
-
 	return 0;
 }
 
+/**
+ * ftrace_run_stop_machine, go back to the stop machine method
+ * @command: The command to tell ftrace what to do
+ *
+ * If an arch needs to fall back to the stop machine method, the
+ * it can call this function.
+ */
+void ftrace_run_stop_machine(int command)
+{
+	stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
+/**
+ * arch_ftrace_update_code, modify the code to trace or not trace
+ * @command: The command that needs to be done
+ *
+ * Archs can override this function if it does not need to
+ * run stop_machine() to modify code.
+ */
+void __weak arch_ftrace_update_code(int command)
+{
+	ftrace_run_stop_machine(command);
+}
+
 static void ftrace_run_update_code(int command)
 {
 	int ret;
@@ -1659,8 +1813,31 @@ static void ftrace_run_update_code(int command)
 	FTRACE_WARN_ON(ret);
 	if (ret)
 		return;
+	/*
+	 * Do not call function tracer while we update the code.
+	 * We are in stop machine.
+	 */
+	function_trace_stop++;
 
-	stop_machine(__ftrace_modify_code, &command, NULL);
+	/*
+	 * By default we use stop_machine() to modify the code.
+	 * But archs can do what ever they want as long as it
+	 * is safe. The stop_machine() is the safest, but also
+	 * produces the most overhead.
+	 */
+	arch_ftrace_update_code(command);
+
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	/*
+	 * For archs that call ftrace_test_stop_func(), we must
+	 * wait till after we update all the function callers
+	 * before we update the callback. This keeps different
+	 * ops that record different functions from corrupting
+	 * each other.
+	 */
+	__ftrace_trace_function = __ftrace_trace_function_delay;
+#endif
+	function_trace_stop--;
 
 	ret = ftrace_arch_code_modify_post_process();
 	FTRACE_WARN_ON(ret);
-- 
cgit v1.2.3


From 3208230983a0ee3d95be22d463257e530c684956 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 16 Dec 2011 14:42:37 -0500
Subject: ftrace: Remove usage of "freed" records

Records that are added to the function trace table are
permanently there, except for modules. By separating out the
modules to their own pages that can be freed in one shot
we can remove the "freed" flag and simplify some of the record
management.

Another benefit of doing this is that we can also move the
records around; sort them.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   1 -
 kernel/trace/ftrace.c  | 100 ++++++++++++++++++++++++-------------------------
 2 files changed, 49 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4f0b6fec379d..3f79bc458bff 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -163,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end);
 
 enum {
 	FTRACE_FL_ENABLED	= (1 << 30),
-	FTRACE_FL_FREE		= (1 << 31),
 };
 
 #define FTRACE_FL_MASK		(0x3UL << 30)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 655b432fb890..be6888f40d2b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -996,8 +996,6 @@ struct ftrace_page {
 static struct ftrace_page	*ftrace_pages_start;
 static struct ftrace_page	*ftrace_pages;
 
-static struct dyn_ftrace *ftrace_free_records;
-
 static struct ftrace_func_entry *
 ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 {
@@ -1421,32 +1419,8 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
 	__ftrace_hash_rec_update(ops, filter_hash, 1);
 }
 
-static void ftrace_free_rec(struct dyn_ftrace *rec)
-{
-	rec->freelist = ftrace_free_records;
-	ftrace_free_records = rec;
-	rec->flags |= FTRACE_FL_FREE;
-}
-
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
-	struct dyn_ftrace *rec;
-
-	/* First check for freed records */
-	if (ftrace_free_records) {
-		rec = ftrace_free_records;
-
-		if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
-			FTRACE_WARN_ON_ONCE(1);
-			ftrace_free_records = NULL;
-			return NULL;
-		}
-
-		ftrace_free_records = rec->freelist;
-		memset(rec, 0, sizeof(*rec));
-		return rec;
-	}
-
 	if (ftrace_pages->index == ENTRIES_PER_PAGE) {
 		if (!ftrace_pages->next) {
 			/* allocate another page */
@@ -1639,10 +1613,6 @@ static void ftrace_replace_code(int update)
 		return;
 
 	do_for_each_ftrace_rec(pg, rec) {
-		/* Skip over free records */
-		if (rec->flags & FTRACE_FL_FREE)
-			continue;
-
 		failed = __ftrace_replace_code(rec, update);
 		if (failed) {
 			ftrace_bug(failed, rec->ip);
@@ -2007,11 +1977,8 @@ static int ftrace_update_code(struct module *mod)
 		 * Do the initial record conversion from mcount jump
 		 * to the NOP instructions.
 		 */
-		if (!ftrace_code_disable(mod, p)) {
-			ftrace_free_rec(p);
-			/* Game over */
+		if (!ftrace_code_disable(mod, p))
 			break;
-		}
 
 		ftrace_update_cnt++;
 
@@ -2026,10 +1993,8 @@ static int ftrace_update_code(struct module *mod)
 		 */
 		if (ftrace_start_up && ref) {
 			int failed = __ftrace_replace_code(p, 1);
-			if (failed) {
+			if (failed)
 				ftrace_bug(failed, p->ip);
-				ftrace_free_rec(p);
-			}
 		}
 	}
 
@@ -2223,9 +2188,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		}
 	} else {
 		rec = &iter->pg->records[iter->idx++];
-		if ((rec->flags & FTRACE_FL_FREE) ||
-
-		    ((iter->flags & FTRACE_ITER_FILTER) &&
+		if (((iter->flags & FTRACE_ITER_FILTER) &&
 		     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
@@ -2602,7 +2565,6 @@ match_records(struct ftrace_hash *hash, char *buff,
 		goto out_unlock;
 
 	do_for_each_ftrace_rec(pg, rec) {
-
 		if (ftrace_match_record(rec, mod, search, search_len, type)) {
 			ret = enter_record(hash, rec, not);
 			if (ret < 0) {
@@ -3446,9 +3408,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 
 	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FREE)
-			continue;
-
 		if (ftrace_match_record(rec, NULL, search, search_len, type)) {
 			/* if it is in the array */
 			exists = false;
@@ -3566,6 +3525,27 @@ static int ftrace_process_locs(struct module *mod,
 	unsigned long flags = 0; /* Shut up gcc */
 
 	mutex_lock(&ftrace_lock);
+	/*
+	 * Core and each module needs their own pages, as
+	 * modules will free them when they are removed.
+	 * Force a new page to be allocated for modules.
+	 */
+	if (mod) {
+		if (!ftrace_pages)
+			return -ENOMEM;
+
+		/*
+		 * If the last page was full, it will be
+		 * allocated anyway.
+		 */
+		if (ftrace_pages->index != ENTRIES_PER_PAGE) {
+			ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
+			if (!ftrace_pages->next)
+				return -ENOMEM;
+			ftrace_pages = ftrace_pages->next;
+		}
+	}
+
 	p = start;
 	while (p < end) {
 		addr = ftrace_call_adjust(*p++);
@@ -3599,9 +3579,13 @@ static int ftrace_process_locs(struct module *mod,
 }
 
 #ifdef CONFIG_MODULES
+
+#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
+
 void ftrace_release_mod(struct module *mod)
 {
 	struct dyn_ftrace *rec;
+	struct ftrace_page **last_pg;
 	struct ftrace_page *pg;
 
 	mutex_lock(&ftrace_lock);
@@ -3609,16 +3593,30 @@ void ftrace_release_mod(struct module *mod)
 	if (ftrace_disabled)
 		goto out_unlock;
 
-	do_for_each_ftrace_rec(pg, rec) {
+	/*
+	 * Each module has its own ftrace_pages, remove
+	 * them from the list.
+	 */
+	last_pg = &ftrace_pages_start;
+	for (pg = ftrace_pages_start; pg; pg = *last_pg) {
+		rec = &pg->records[0];
 		if (within_module_core(rec->ip, mod)) {
 			/*
-			 * rec->ip is changed in ftrace_free_rec()
-			 * It should not between s and e if record was freed.
+			 * As core pages are first, the first
+			 * page should never be a module page.
 			 */
-			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
-			ftrace_free_rec(rec);
-		}
-	} while_for_each_ftrace_rec();
+			if (WARN_ON(pg == ftrace_pages_start))
+				goto out_unlock;
+
+			/* Check if we are deleting the last page */
+			if (pg == ftrace_pages)
+				ftrace_pages = next_to_ftrace_page(last_pg);
+
+			*last_pg = pg->next;
+			free_page((unsigned long)pg);
+		} else
+			last_pg = &pg->next;
+	}
  out_unlock:
 	mutex_unlock(&ftrace_lock);
 }
-- 
cgit v1.2.3


From 85ae32ae019bc1c2cc22e5f51fe0c9f2812ef68c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 16 Dec 2011 16:30:31 -0500
Subject: ftrace: Replace record newlist with record page list

As new functions come in to be initalized from mcount to nop,
they are done by groups of pages. Whether it is the core kernel
or a module. There's no need to keep track of these on a per record
basis.

At startup, and as any module is loaded, the functions to be
traced are stored in a group of pages and added to the function
list at the end. We just need to keep a pointer to the first
page of the list that was added, and use that to know where to
start on the list for initializing functions.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  5 +---
 kernel/trace/ftrace.c  | 68 +++++++++++++++++++++++++++-----------------------
 2 files changed, 38 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3f79bc458bff..31b9fd7aedcd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -173,10 +173,7 @@ struct dyn_ftrace {
 		unsigned long		ip; /* address of mcount call-site */
 		struct dyn_ftrace	*freelist;
 	};
-	union {
-		unsigned long		flags;
-		struct dyn_ftrace	*newlist;
-	};
+	unsigned long		flags;
 	struct dyn_arch_ftrace		arch;
 };
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2e7218869fe9..366d7881f188 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -977,8 +977,6 @@ static struct ftrace_ops global_ops = {
 	.filter_hash		= EMPTY_HASH,
 };
 
-static struct dyn_ftrace *ftrace_new_addrs;
-
 static DEFINE_MUTEX(ftrace_regex_lock);
 
 struct ftrace_page {
@@ -988,6 +986,8 @@ struct ftrace_page {
 	int			size;
 };
 
+static struct ftrace_page *ftrace_new_pgs;
+
 #define ENTRY_SIZE sizeof(struct dyn_ftrace)
 #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
 
@@ -1445,8 +1445,6 @@ ftrace_record_ip(unsigned long ip)
 		return NULL;
 
 	rec->ip = ip;
-	rec->newlist = ftrace_new_addrs;
-	ftrace_new_addrs = rec;
 
 	return rec;
 }
@@ -1936,9 +1934,11 @@ static int ops_traces_mod(struct ftrace_ops *ops)
 
 static int ftrace_update_code(struct module *mod)
 {
+	struct ftrace_page *pg;
 	struct dyn_ftrace *p;
 	cycle_t start, stop;
 	unsigned long ref = 0;
+	int i;
 
 	/*
 	 * When adding a module, we need to check if tracers are
@@ -1960,41 +1960,44 @@ static int ftrace_update_code(struct module *mod)
 	start = ftrace_now(raw_smp_processor_id());
 	ftrace_update_cnt = 0;
 
-	while (ftrace_new_addrs) {
+	for (pg = ftrace_new_pgs; pg; pg = pg->next) {
 
-		/* If something went wrong, bail without enabling anything */
-		if (unlikely(ftrace_disabled))
-			return -1;
+		for (i = 0; i < pg->index; i++) {
+			/* If something went wrong, bail without enabling anything */
+			if (unlikely(ftrace_disabled))
+				return -1;
 
-		p = ftrace_new_addrs;
-		ftrace_new_addrs = p->newlist;
-		p->flags = ref;
+			p = &pg->records[i];
+			p->flags = ref;
 
-		/*
-		 * Do the initial record conversion from mcount jump
-		 * to the NOP instructions.
-		 */
-		if (!ftrace_code_disable(mod, p))
-			break;
+			/*
+			 * Do the initial record conversion from mcount jump
+			 * to the NOP instructions.
+			 */
+			if (!ftrace_code_disable(mod, p))
+				break;
 
-		ftrace_update_cnt++;
+			ftrace_update_cnt++;
 
-		/*
-		 * If the tracing is enabled, go ahead and enable the record.
-		 *
-		 * The reason not to enable the record immediatelly is the
-		 * inherent check of ftrace_make_nop/ftrace_make_call for
-		 * correct previous instructions.  Making first the NOP
-		 * conversion puts the module to the correct state, thus
-		 * passing the ftrace_make_call check.
-		 */
-		if (ftrace_start_up && ref) {
-			int failed = __ftrace_replace_code(p, 1);
-			if (failed)
-				ftrace_bug(failed, p->ip);
+			/*
+			 * If the tracing is enabled, go ahead and enable the record.
+			 *
+			 * The reason not to enable the record immediatelly is the
+			 * inherent check of ftrace_make_nop/ftrace_make_call for
+			 * correct previous instructions.  Making first the NOP
+			 * conversion puts the module to the correct state, thus
+			 * passing the ftrace_make_call check.
+			 */
+			if (ftrace_start_up && ref) {
+				int failed = __ftrace_replace_code(p, 1);
+				if (failed)
+					ftrace_bug(failed, p->ip);
+			}
 		}
 	}
 
+	ftrace_new_pgs = NULL;
+
 	stop = ftrace_now(raw_smp_processor_id());
 	ftrace_update_time = stop - start;
 	ftrace_update_tot_cnt += ftrace_update_cnt;
@@ -3632,6 +3635,9 @@ static int ftrace_process_locs(struct module *mod,
 			break;
 	}
 
+	/* These new locations need to be initialized */
+	ftrace_new_pgs = pg;
+
 	/*
 	 * We only need to disable interrupts on start up
 	 * because we are modifying code that an interrupt
-- 
cgit v1.2.3


From fc13cb0ce45296f331263a6034aa1814203e1ac3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 19 Dec 2011 14:41:25 -0500
Subject: ftrace: Allow other users of function tracing to use the output
 listing

The function tracer is set up to allow any other subsystem (like perf)
to use it. Ftrace already has a way to list what functions are enabled
by the global_ops. It would be very helpful to let other users of
the function tracer to be able to use the same code.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 35 +++++++++++++++++++++++++++++++++++
 kernel/trace/ftrace.c  | 41 +++++++++++++++++++++++++----------------
 2 files changed, 60 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 31b9fd7aedcd..aa7559f0a224 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -202,6 +202,14 @@ enum {
 	FTRACE_UPDATE_MAKE_NOP,
 };
 
+enum {
+	FTRACE_ITER_FILTER	= (1 << 0),
+	FTRACE_ITER_NOTRACE	= (1 << 1),
+	FTRACE_ITER_PRINTALL	= (1 << 2),
+	FTRACE_ITER_HASH	= (1 << 3),
+	FTRACE_ITER_ENABLED	= (1 << 4),
+};
+
 void arch_ftrace_update_code(int command);
 
 struct ftrace_rec_iter;
@@ -217,6 +225,15 @@ int ftrace_location(unsigned long ip);
 
 extern ftrace_func_t ftrace_trace_function;
 
+int ftrace_regex_open(struct ftrace_ops *ops, int flag,
+		  struct inode *inode, struct file *file);
+ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
+			    size_t cnt, loff_t *ppos);
+ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
+			     size_t cnt, loff_t *ppos);
+loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
+int ftrace_regex_release(struct inode *inode, struct file *file);
+
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void *data);
@@ -311,6 +328,24 @@ static inline int ftrace_text_reserved(void *start, void *end)
 {
 	return 0;
 }
+
+/*
+ * Again users of functions that have ftrace_ops may not
+ * have them defined when ftrace is not enabled, but these
+ * functions may still be called. Use a macro instead of inline.
+ */
+#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
+
+static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
+			    size_t cnt, loff_t *ppos) { return -ENODEV; }
+static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
+			     size_t cnt, loff_t *ppos) { return -ENODEV; }
+static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+{
+	return -ENODEV;
+}
+static inline int
+ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e1ee07f81ca2..5b105c5ddc0c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2134,14 +2134,6 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
 	return 0;
 }
 
-enum {
-	FTRACE_ITER_FILTER	= (1 << 0),
-	FTRACE_ITER_NOTRACE	= (1 << 1),
-	FTRACE_ITER_PRINTALL	= (1 << 2),
-	FTRACE_ITER_HASH	= (1 << 3),
-	FTRACE_ITER_ENABLED	= (1 << 4),
-};
-
 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
 
 struct ftrace_iterator {
@@ -2249,7 +2241,7 @@ static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
-	struct ftrace_ops *ops = &global_ops;
+	struct ftrace_ops *ops = iter->ops;
 	struct dyn_ftrace *rec = NULL;
 
 	if (unlikely(ftrace_disabled))
@@ -2305,7 +2297,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
-	struct ftrace_ops *ops = &global_ops;
+	struct ftrace_ops *ops = iter->ops;
 	void *p = NULL;
 	loff_t l;
 
@@ -2414,6 +2406,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 
 	iter->pg = ftrace_pages_start;
+	iter->ops = &global_ops;
 
 	ret = seq_open(file, &show_ftrace_seq_ops);
 	if (!ret) {
@@ -2442,6 +2435,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
 
 	iter->pg = ftrace_pages_start;
 	iter->flags = FTRACE_ITER_ENABLED;
+	iter->ops = &global_ops;
 
 	ret = seq_open(file, &show_ftrace_seq_ops);
 	if (!ret) {
@@ -2462,7 +2456,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
 	mutex_unlock(&ftrace_lock);
 }
 
-static int
+/**
+ * ftrace_regex_open - initialize function tracer filter files
+ * @ops: The ftrace_ops that hold the hash filters
+ * @flag: The type of filter to process
+ * @inode: The inode, usually passed in to your open routine
+ * @file: The file, usually passed in to your open routine
+ *
+ * ftrace_regex_open() initializes the filter files for the
+ * @ops. Depending on @flag it may process the filter hash or
+ * the notrace hash of @ops. With this called from the open
+ * routine, you can use ftrace_filter_write() for the write
+ * routine if @flag has FTRACE_ITER_FILTER set, or
+ * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
+ * ftrace_regex_lseek() should be used as the lseek routine, and
+ * release must call ftrace_regex_release().
+ */
+int
 ftrace_regex_open(struct ftrace_ops *ops, int flag,
 		  struct inode *inode, struct file *file)
 {
@@ -2542,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
 				 inode, file);
 }
 
-static loff_t
+loff_t
 ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
 {
 	loff_t ret;
@@ -3095,14 +3105,14 @@ out_unlock:
 	return ret;
 }
 
-static ssize_t
+ssize_t
 ftrace_filter_write(struct file *file, const char __user *ubuf,
 		    size_t cnt, loff_t *ppos)
 {
 	return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
 }
 
-static ssize_t
+ssize_t
 ftrace_notrace_write(struct file *file, const char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
 {
@@ -3292,8 +3302,7 @@ static void __init set_ftrace_early_filters(void)
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 }
 
-static int
-ftrace_regex_release(struct inode *inode, struct file *file)
+int ftrace_regex_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	struct ftrace_iterator *iter;
-- 
cgit v1.2.3


From 69a3083c4a7df0322d97bb2b43a33cb12af8131a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 19 Dec 2011 15:21:16 -0500
Subject: ftrace: Decouple hash items from showing filtered functions

The set_ftrace_filter shows "hashed" functions, which are functions
that are added with operations to them (like traceon and traceoff).

As other subsystems may be able to show what functions they are
using for function tracing, the hash items should no longer
be shown just because the FILTER flag is set. As they have nothing
to do with other subsystems filters.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  5 +++--
 kernel/trace/ftrace.c  | 16 ++++++++--------
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index aa7559f0a224..d1ff0de18970 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -206,8 +206,9 @@ enum {
 	FTRACE_ITER_FILTER	= (1 << 0),
 	FTRACE_ITER_NOTRACE	= (1 << 1),
 	FTRACE_ITER_PRINTALL	= (1 << 2),
-	FTRACE_ITER_HASH	= (1 << 3),
-	FTRACE_ITER_ENABLED	= (1 << 4),
+	FTRACE_ITER_DO_HASH	= (1 << 3),
+	FTRACE_ITER_HASH	= (1 << 4),
+	FTRACE_ITER_ENABLED	= (1 << 5),
 };
 
 void arch_ftrace_update_code(int command);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b105c5ddc0c..5728d9aa632e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2198,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
 	void *p = NULL;
 	loff_t l;
 
+	if (!(iter->flags & FTRACE_ITER_DO_HASH))
+		return NULL;
+
 	if (iter->func_pos > *pos)
 		return NULL;
 
@@ -2343,12 +2346,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 			break;
 	}
 
-	if (!p) {
-		if (iter->flags & FTRACE_ITER_FILTER)
-			return t_hash_start(m, pos);
-
-		return NULL;
-	}
+	if (!p)
+		return t_hash_start(m, pos);
 
 	return iter;
 }
@@ -2541,8 +2540,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 static int
 ftrace_filter_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
-				 inode, file);
+	return ftrace_regex_open(&global_ops,
+			FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,
+			inode, file);
 }
 
 static int
-- 
cgit v1.2.3


From 2a85a37f168d2b4d74d493b578af4dc9032be92e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 19 Dec 2011 21:57:44 -0500
Subject: ftrace: Allow access to the boot time function enabling

Change set_ftrace_early_filter() to ftrace_set_early_filter()
and make it a global function. This will allow other subsystems
in the kernel to be able to enable function tracing at start
up and reuse the ftrace function parsing code.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 3 +++
 kernel/trace/ftrace.c  | 8 ++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d1ff0de18970..41df6f501656 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -235,6 +235,9 @@ ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
 loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
 int ftrace_regex_release(struct inode *inode, struct file *file);
 
+void __init
+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
+
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void *data);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5728d9aa632e..683d559a0eef 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3279,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-static void __init
-set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
+void __init
+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 {
 	char *func;
 
@@ -3293,9 +3293,9 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 static void __init set_ftrace_early_filters(void)
 {
 	if (ftrace_filter_buf[0])
-		set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
+		ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
 	if (ftrace_notrace_buf[0])
-		set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
+		ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (ftrace_graph_buf[0])
 		set_ftrace_early_graph(ftrace_graph_buf);
-- 
cgit v1.2.3


From 9b39e73d0c2b265a7f8748b0e9a9f09be84079a8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 18 Dec 2011 00:34:24 +0100
Subject: PM / Sleep: Remove forward-only callbacks from platform bus type

The forward-only PM callbacks provided by the platform bus type are
not necessary any more, because the PM core executes driver callbacks
when the corresponding subsystem callbacks are not present, so drop
them.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/platform.c         | 115 ----------------------------------------
 include/linux/platform_device.h |  30 +----------
 2 files changed, 1 insertion(+), 144 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 7a24895543e7..7d912d5675d8 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -700,25 +700,6 @@ static int platform_legacy_resume(struct device *dev)
 	return ret;
 }
 
-int platform_pm_prepare(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (drv && drv->pm && drv->pm->prepare)
-		ret = drv->pm->prepare(dev);
-
-	return ret;
-}
-
-void platform_pm_complete(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-
-	if (drv && drv->pm && drv->pm->complete)
-		drv->pm->complete(dev);
-}
-
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_SUSPEND
@@ -741,22 +722,6 @@ int platform_pm_suspend(struct device *dev)
 	return ret;
 }
 
-int platform_pm_suspend_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->suspend_noirq)
-			ret = drv->pm->suspend_noirq(dev);
-	}
-
-	return ret;
-}
-
 int platform_pm_resume(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -775,22 +740,6 @@ int platform_pm_resume(struct device *dev)
 	return ret;
 }
 
-int platform_pm_resume_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->resume_noirq)
-			ret = drv->pm->resume_noirq(dev);
-	}
-
-	return ret;
-}
-
 #endif /* CONFIG_SUSPEND */
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
@@ -813,22 +762,6 @@ int platform_pm_freeze(struct device *dev)
 	return ret;
 }
 
-int platform_pm_freeze_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->freeze_noirq)
-			ret = drv->pm->freeze_noirq(dev);
-	}
-
-	return ret;
-}
-
 int platform_pm_thaw(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -847,22 +780,6 @@ int platform_pm_thaw(struct device *dev)
 	return ret;
 }
 
-int platform_pm_thaw_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->thaw_noirq)
-			ret = drv->pm->thaw_noirq(dev);
-	}
-
-	return ret;
-}
-
 int platform_pm_poweroff(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -881,22 +798,6 @@ int platform_pm_poweroff(struct device *dev)
 	return ret;
 }
 
-int platform_pm_poweroff_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->poweroff_noirq)
-			ret = drv->pm->poweroff_noirq(dev);
-	}
-
-	return ret;
-}
-
 int platform_pm_restore(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -915,22 +816,6 @@ int platform_pm_restore(struct device *dev)
 	return ret;
 }
 
-int platform_pm_restore_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->restore_noirq)
-			ret = drv->pm->restore_noirq(dev);
-	}
-
-	return ret;
-}
-
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
 
 static const struct dev_pm_ops platform_dev_pm_ops = {
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 2a23f7d1a825..b5267c951161 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -264,62 +264,34 @@ static inline char *early_platform_driver_setup_func(void)		\
 }
 #endif /* MODULE */
 
-#ifdef CONFIG_PM_SLEEP
-extern int platform_pm_prepare(struct device *dev);
-extern void platform_pm_complete(struct device *dev);
-#else
-#define platform_pm_prepare	NULL
-#define platform_pm_complete	NULL
-#endif
-
 #ifdef CONFIG_SUSPEND
 extern int platform_pm_suspend(struct device *dev);
-extern int platform_pm_suspend_noirq(struct device *dev);
 extern int platform_pm_resume(struct device *dev);
-extern int platform_pm_resume_noirq(struct device *dev);
 #else
 #define platform_pm_suspend		NULL
 #define platform_pm_resume		NULL
-#define platform_pm_suspend_noirq	NULL
-#define platform_pm_resume_noirq	NULL
 #endif
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 extern int platform_pm_freeze(struct device *dev);
-extern int platform_pm_freeze_noirq(struct device *dev);
 extern int platform_pm_thaw(struct device *dev);
-extern int platform_pm_thaw_noirq(struct device *dev);
 extern int platform_pm_poweroff(struct device *dev);
-extern int platform_pm_poweroff_noirq(struct device *dev);
 extern int platform_pm_restore(struct device *dev);
-extern int platform_pm_restore_noirq(struct device *dev);
 #else
 #define platform_pm_freeze		NULL
 #define platform_pm_thaw		NULL
 #define platform_pm_poweroff		NULL
 #define platform_pm_restore		NULL
-#define platform_pm_freeze_noirq	NULL
-#define platform_pm_thaw_noirq		NULL
-#define platform_pm_poweroff_noirq	NULL
-#define platform_pm_restore_noirq	NULL
 #endif
 
 #ifdef CONFIG_PM_SLEEP
 #define USE_PLATFORM_PM_SLEEP_OPS \
-	.prepare = platform_pm_prepare, \
-	.complete = platform_pm_complete, \
 	.suspend = platform_pm_suspend, \
 	.resume = platform_pm_resume, \
 	.freeze = platform_pm_freeze, \
 	.thaw = platform_pm_thaw, \
 	.poweroff = platform_pm_poweroff, \
-	.restore = platform_pm_restore, \
-	.suspend_noirq = platform_pm_suspend_noirq, \
-	.resume_noirq = platform_pm_resume_noirq, \
-	.freeze_noirq = platform_pm_freeze_noirq, \
-	.thaw_noirq = platform_pm_thaw_noirq, \
-	.poweroff_noirq = platform_pm_poweroff_noirq, \
-	.restore_noirq = platform_pm_restore_noirq,
+	.restore = platform_pm_restore,
 #else
 #define USE_PLATFORM_PM_SLEEP_OPS
 #endif
-- 
cgit v1.2.3


From 90363ddf0a1a4dccfbb8d0c10b8f488bc7fa69f8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 18 Dec 2011 00:34:42 +0100
Subject: PM: Drop generic_subsys_pm_ops

Since the PM core is now going to execute driver callbacks directly
if the corresponding subsystem callbacks are not present,
forward-only subsystem callbacks (i.e. such that only execute the
corresponding driver callbacks) are not necessary any more.  Thus
it is possible to remove generic_subsys_pm_ops, because the only
callback in there that is not forward-only, .runtime_idle, is not
really used by the only user of generic_subsys_pm_ops, which is
vio_bus_type.

However, the generic callback routines themselves cannot be removed
from generic_ops.c, because they are used individually by a number
of subsystems.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/powerpc/kernel/vio.c        |  1 -
 drivers/base/power/generic_ops.c | 25 -------------------------
 include/linux/pm.h               | 13 -------------
 3 files changed, 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index f65af61996bd..8b086299ba25 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1406,7 +1406,6 @@ static struct bus_type vio_bus_type = {
 	.match = vio_bus_match,
 	.probe = vio_bus_probe,
 	.remove = vio_bus_remove,
-	.pm = GENERIC_SUBSYS_PM_OPS,
 };
 
 /**
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 5a5b154bc1e9..10bdd793f0bd 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -276,28 +276,3 @@ void pm_generic_complete(struct device *dev)
 	pm_runtime_idle(dev);
 }
 #endif /* CONFIG_PM_SLEEP */
-
-struct dev_pm_ops generic_subsys_pm_ops = {
-#ifdef CONFIG_PM_SLEEP
-	.prepare = pm_generic_prepare,
-	.suspend = pm_generic_suspend,
-	.suspend_noirq = pm_generic_suspend_noirq,
-	.resume = pm_generic_resume,
-	.resume_noirq = pm_generic_resume_noirq,
-	.freeze = pm_generic_freeze,
-	.freeze_noirq = pm_generic_freeze_noirq,
-	.thaw = pm_generic_thaw,
-	.thaw_noirq = pm_generic_thaw_noirq,
-	.poweroff = pm_generic_poweroff,
-	.poweroff_noirq = pm_generic_poweroff_noirq,
-	.restore = pm_generic_restore,
-	.restore_noirq = pm_generic_restore_noirq,
-	.complete = pm_generic_complete,
-#endif
-#ifdef CONFIG_PM_RUNTIME
-	.runtime_suspend = pm_generic_runtime_suspend,
-	.runtime_resume = pm_generic_runtime_resume,
-	.runtime_idle = pm_generic_runtime_idle,
-#endif
-};
-EXPORT_SYMBOL_GPL(generic_subsys_pm_ops);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3f3ed83a9aa5..21e04dd72a84 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -300,19 +300,6 @@ const struct dev_pm_ops name = { \
 	SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
 }
 
-/*
- * Use this for subsystems (bus types, device types, device classes) that don't
- * need any special suspend/resume handling in addition to invoking the PM
- * callbacks provided by device drivers supporting both the system sleep PM and
- * runtime PM, make the pm member point to generic_subsys_pm_ops.
- */
-#ifdef CONFIG_PM
-extern struct dev_pm_ops generic_subsys_pm_ops;
-#define GENERIC_SUBSYS_PM_OPS	(&generic_subsys_pm_ops)
-#else
-#define GENERIC_SUBSYS_PM_OPS	NULL
-#endif
-
 /**
  * PM_EVENT_ messages
  *
-- 
cgit v1.2.3


From 8a25a2fd126c621f44f3aeaef80d51f00fc11639 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 21 Dec 2011 14:29:42 -0800
Subject: cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular
 subsystem

This moves the 'cpu sysdev_class' over to a regular 'cpu' subsystem
and converts the devices to regular devices. The sysdev drivers are
implemented as subsystem interfaces now.

After all sysdev classes are ported to regular driver core entities, the
sysdev implementation will be entirely removed from the kernel.

Userspace relies on events and generic sysfs subsystem infrastructure
from sysdev devices, which are made available with this conversion.

Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Borislav Petkov <bp@amd64.org>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Cc: Len Brown <lenb@kernel.org>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/avr32/kernel/cpu.c                         |  74 +++----
 arch/ia64/kernel/err_inject.c                   |  52 ++---
 arch/ia64/kernel/topology.c                     |  10 +-
 arch/powerpc/include/asm/spu.h                  |  12 +-
 arch/powerpc/include/asm/topology.h             |  10 +-
 arch/powerpc/kernel/cacheinfo.c                 |  10 +-
 arch/powerpc/kernel/smp.c                       |   2 +-
 arch/powerpc/kernel/sysfs.c                     | 257 ++++++++++++------------
 arch/powerpc/mm/numa.c                          |   8 +-
 arch/powerpc/platforms/cell/cbe_thermal.c       | 144 ++++++-------
 arch/powerpc/platforms/cell/spu_base.c          |  61 +++---
 arch/powerpc/platforms/pseries/pseries_energy.c |  71 ++++---
 arch/powerpc/sysdev/ppc4xx_cpm.c                |   6 +-
 arch/s390/kernel/smp.c                          |  76 +++----
 arch/s390/kernel/topology.c                     |   6 +-
 arch/sh/kernel/cpu/sh4/sq.c                     |  24 ++-
 arch/sparc/kernel/sysfs.c                       | 122 +++++------
 arch/tile/kernel/sysfs.c                        |  61 +++---
 arch/x86/include/asm/mce.h                      |   2 +-
 arch/x86/kernel/cpu/intel_cacheinfo.c           |  25 ++-
 arch/x86/kernel/cpu/mcheck/mce-internal.h       |   4 +-
 arch/x86/kernel/cpu/mcheck/mce.c                | 128 ++++++------
 arch/x86/kernel/cpu/mcheck/mce_amd.c            |  11 +-
 arch/x86/kernel/cpu/mcheck/therm_throt.c        |  63 +++---
 arch/x86/kernel/microcode_core.c                |  58 +++---
 drivers/acpi/processor_driver.c                 |   6 +-
 drivers/acpi/processor_thermal.c                |   1 -
 drivers/base/cpu.c                              | 146 +++++++-------
 drivers/base/node.c                             |   8 +-
 drivers/base/topology.c                         |  51 +++--
 drivers/cpufreq/cpufreq.c                       |  79 ++++----
 drivers/cpufreq/cpufreq_stats.c                 |   1 -
 drivers/cpuidle/cpuidle.c                       |  12 +-
 drivers/cpuidle/cpuidle.h                       |  10 +-
 drivers/cpuidle/sysfs.c                         |  74 ++++---
 drivers/s390/char/sclp_config.c                 |   8 +-
 include/linux/cpu.h                             |  18 +-
 kernel/sched.c                                  |  40 ++--
 38 files changed, 874 insertions(+), 877 deletions(-)

(limited to 'include/linux')

diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c
index e84faffbbeca..2233be71e2e8 100644
--- a/arch/avr32/kernel/cpu.c
+++ b/arch/avr32/kernel/cpu.c
@@ -6,7 +6,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/seq_file.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
@@ -26,16 +26,16 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices);
  * XXX: If/when a SMP-capable implementation of AVR32 will ever be
  * made, we must make sure that the code executes on the correct CPU.
  */
-static ssize_t show_pc0event(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_pc0event(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	unsigned long pccr;
 
 	pccr = sysreg_read(PCCR);
 	return sprintf(buf, "0x%lx\n", (pccr >> 12) & 0x3f);
 }
-static ssize_t store_pc0event(struct sys_device *dev,
-			struct sysdev_attribute *attr, const char *buf,
+static ssize_t store_pc0event(struct device *dev,
+			struct device_attribute *attr, const char *buf,
 			      size_t count)
 {
 	unsigned long val;
@@ -48,16 +48,16 @@ static ssize_t store_pc0event(struct sys_device *dev,
 	sysreg_write(PCCR, val);
 	return count;
 }
-static ssize_t show_pc0count(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_pc0count(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	unsigned long pcnt0;
 
 	pcnt0 = sysreg_read(PCNT0);
 	return sprintf(buf, "%lu\n", pcnt0);
 }
-static ssize_t store_pc0count(struct sys_device *dev,
-				struct sysdev_attribute *attr,
+static ssize_t store_pc0count(struct device *dev,
+				struct device_attribute *attr,
 				const char *buf, size_t count)
 {
 	unsigned long val;
@@ -71,16 +71,16 @@ static ssize_t store_pc0count(struct sys_device *dev,
 	return count;
 }
 
-static ssize_t show_pc1event(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_pc1event(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	unsigned long pccr;
 
 	pccr = sysreg_read(PCCR);
 	return sprintf(buf, "0x%lx\n", (pccr >> 18) & 0x3f);
 }
-static ssize_t store_pc1event(struct sys_device *dev,
-			      struct sysdev_attribute *attr, const char *buf,
+static ssize_t store_pc1event(struct device *dev,
+			      struct device_attribute *attr, const char *buf,
 			      size_t count)
 {
 	unsigned long val;
@@ -93,16 +93,16 @@ static ssize_t store_pc1event(struct sys_device *dev,
 	sysreg_write(PCCR, val);
 	return count;
 }
-static ssize_t show_pc1count(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_pc1count(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	unsigned long pcnt1;
 
 	pcnt1 = sysreg_read(PCNT1);
 	return sprintf(buf, "%lu\n", pcnt1);
 }
-static ssize_t store_pc1count(struct sys_device *dev,
-				struct sysdev_attribute *attr, const char *buf,
+static ssize_t store_pc1count(struct device *dev,
+				struct device_attribute *attr, const char *buf,
 			      size_t count)
 {
 	unsigned long val;
@@ -116,16 +116,16 @@ static ssize_t store_pc1count(struct sys_device *dev,
 	return count;
 }
 
-static ssize_t show_pccycles(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_pccycles(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	unsigned long pccnt;
 
 	pccnt = sysreg_read(PCCNT);
 	return sprintf(buf, "%lu\n", pccnt);
 }
-static ssize_t store_pccycles(struct sys_device *dev,
-				struct sysdev_attribute *attr, const char *buf,
+static ssize_t store_pccycles(struct device *dev,
+				struct device_attribute *attr, const char *buf,
 			      size_t count)
 {
 	unsigned long val;
@@ -139,16 +139,16 @@ static ssize_t store_pccycles(struct sys_device *dev,
 	return count;
 }
 
-static ssize_t show_pcenable(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_pcenable(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	unsigned long pccr;
 
 	pccr = sysreg_read(PCCR);
 	return sprintf(buf, "%c\n", (pccr & 1)?'1':'0');
 }
-static ssize_t store_pcenable(struct sys_device *dev,
-			      struct sysdev_attribute *attr, const char *buf,
+static ssize_t store_pcenable(struct device *dev,
+			      struct device_attribute *attr, const char *buf,
 			      size_t count)
 {
 	unsigned long pccr, val;
@@ -167,12 +167,12 @@ static ssize_t store_pcenable(struct sys_device *dev,
 	return count;
 }
 
-static SYSDEV_ATTR(pc0event, 0600, show_pc0event, store_pc0event);
-static SYSDEV_ATTR(pc0count, 0600, show_pc0count, store_pc0count);
-static SYSDEV_ATTR(pc1event, 0600, show_pc1event, store_pc1event);
-static SYSDEV_ATTR(pc1count, 0600, show_pc1count, store_pc1count);
-static SYSDEV_ATTR(pccycles, 0600, show_pccycles, store_pccycles);
-static SYSDEV_ATTR(pcenable, 0600, show_pcenable, store_pcenable);
+static DEVICE_ATTR(pc0event, 0600, show_pc0event, store_pc0event);
+static DEVICE_ATTR(pc0count, 0600, show_pc0count, store_pc0count);
+static DEVICE_ATTR(pc1event, 0600, show_pc1event, store_pc1event);
+static DEVICE_ATTR(pc1count, 0600, show_pc1count, store_pc1count);
+static DEVICE_ATTR(pccycles, 0600, show_pccycles, store_pccycles);
+static DEVICE_ATTR(pcenable, 0600, show_pcenable, store_pcenable);
 
 #endif /* CONFIG_PERFORMANCE_COUNTERS */
 
@@ -186,12 +186,12 @@ static int __init topology_init(void)
 		register_cpu(c, cpu);
 
 #ifdef CONFIG_PERFORMANCE_COUNTERS
-		sysdev_create_file(&c->sysdev, &attr_pc0event);
-		sysdev_create_file(&c->sysdev, &attr_pc0count);
-		sysdev_create_file(&c->sysdev, &attr_pc1event);
-		sysdev_create_file(&c->sysdev, &attr_pc1count);
-		sysdev_create_file(&c->sysdev, &attr_pccycles);
-		sysdev_create_file(&c->sysdev, &attr_pcenable);
+		device_create_file(&c->dev, &dev_attr_pc0event);
+		device_create_file(&c->dev, &dev_attr_pc0count);
+		device_create_file(&c->dev, &dev_attr_pc1event);
+		device_create_file(&c->dev, &dev_attr_pc1count);
+		device_create_file(&c->dev, &dev_attr_pccycles);
+		device_create_file(&c->dev, &dev_attr_pcenable);
 #endif
 	}
 
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
index c539c689493b..2d67317a1ec2 100644
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@@ -24,7 +24,7 @@
  * Copyright (C) 2006, Intel Corp.  All rights reserved.
  *
  */
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
@@ -35,10 +35,10 @@
 #define ERR_DATA_BUFFER_SIZE 3 		// Three 8-byte;
 
 #define define_one_ro(name) 						\
-static SYSDEV_ATTR(name, 0444, show_##name, NULL)
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
 
 #define define_one_rw(name) 						\
-static SYSDEV_ATTR(name, 0644, show_##name, store_##name)
+static DEVICE_ATTR(name, 0644, show_##name, store_##name)
 
 static u64 call_start[NR_CPUS];
 static u64 phys_addr[NR_CPUS];
@@ -55,7 +55,7 @@ static u64 resources[NR_CPUS];
 
 #define show(name) 							\
 static ssize_t 								\
-show_##name(struct sys_device *dev, struct sysdev_attribute *attr,	\
+show_##name(struct device *dev, struct device_attribute *attr,	\
 		char *buf)						\
 {									\
 	u32 cpu=dev->id;						\
@@ -64,7 +64,7 @@ show_##name(struct sys_device *dev, struct sysdev_attribute *attr,	\
 
 #define store(name)							\
 static ssize_t 								\
-store_##name(struct sys_device *dev, struct sysdev_attribute *attr,	\
+store_##name(struct device *dev, struct device_attribute *attr,	\
 					const char *buf, size_t size)	\
 {									\
 	unsigned int cpu=dev->id;					\
@@ -78,7 +78,7 @@ show(call_start)
  * processor. The cpu number in driver is only used for storing data.
  */
 static ssize_t
-store_call_start(struct sys_device *dev, struct sysdev_attribute *attr,
+store_call_start(struct device *dev, struct device_attribute *attr,
 		const char *buf, size_t size)
 {
 	unsigned int cpu=dev->id;
@@ -127,7 +127,7 @@ show(err_type_info)
 store(err_type_info)
 
 static ssize_t
-show_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr,
+show_virtual_to_phys(struct device *dev, struct device_attribute *attr,
 			char *buf)
 {
 	unsigned int cpu=dev->id;
@@ -135,7 +135,7 @@ show_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr,
 }
 
 static ssize_t
-store_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr,
+store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
 			const char *buf, size_t size)
 {
 	unsigned int cpu=dev->id;
@@ -159,8 +159,8 @@ show(err_struct_info)
 store(err_struct_info)
 
 static ssize_t
-show_err_data_buffer(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+show_err_data_buffer(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	unsigned int cpu=dev->id;
 
@@ -171,8 +171,8 @@ show_err_data_buffer(struct sys_device *dev,
 }
 
 static ssize_t
-store_err_data_buffer(struct sys_device *dev,
-			struct sysdev_attribute *attr,
+store_err_data_buffer(struct device *dev,
+			struct device_attribute *attr,
 			const char *buf, size_t size)
 {
 	unsigned int cpu=dev->id;
@@ -209,14 +209,14 @@ define_one_ro(capabilities);
 define_one_ro(resources);
 
 static struct attribute *default_attrs[] = {
-	&attr_call_start.attr,
-	&attr_virtual_to_phys.attr,
-	&attr_err_type_info.attr,
-	&attr_err_struct_info.attr,
-	&attr_err_data_buffer.attr,
-	&attr_status.attr,
-	&attr_capabilities.attr,
-	&attr_resources.attr,
+	&dev_attr_call_start.attr,
+	&dev_attr_virtual_to_phys.attr,
+	&dev_attr_err_type_info.attr,
+	&dev_attr_err_struct_info.attr,
+	&dev_attr_err_data_buffer.attr,
+	&dev_attr_status.attr,
+	&dev_attr_capabilities.attr,
+	&dev_attr_resources.attr,
 	NULL
 };
 
@@ -225,12 +225,12 @@ static struct attribute_group err_inject_attr_group = {
 	.name = "err_inject"
 };
 /* Add/Remove err_inject interface for CPU device */
-static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev)
+static int __cpuinit err_inject_add_dev(struct device * sys_dev)
 {
 	return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group);
 }
 
-static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev)
+static int __cpuinit err_inject_remove_dev(struct device * sys_dev)
 {
 	sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
 	return 0;
@@ -239,9 +239,9 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb,
 		unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct sys_device *sys_dev;
+	struct device *sys_dev;
 
-	sys_dev = get_cpu_sysdev(cpu);
+	sys_dev = get_cpu_device(cpu);
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
@@ -283,13 +283,13 @@ static void __exit
 err_inject_exit(void)
 {
 	int i;
-	struct sys_device *sys_dev;
+	struct device *sys_dev;
 
 #ifdef ERR_INJ_DEBUG
 	printk(KERN_INFO "Exit error injection driver.\n");
 #endif
 	for_each_online_cpu(i) {
-		sys_dev = get_cpu_sysdev(i);
+		sys_dev = get_cpu_device(i);
 		sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
 	}
 	unregister_hotcpu_notifier(&err_inject_cpu_notifier);
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 9be1f11a01d9..9deb21dbf629 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -350,7 +350,7 @@ static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
 }
 
 /* Add cache interface for CPU device */
-static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+static int __cpuinit cache_add_dev(struct device * sys_dev)
 {
 	unsigned int cpu = sys_dev->id;
 	unsigned long i, j;
@@ -400,7 +400,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 }
 
 /* Remove cache interface for CPU device */
-static int __cpuinit cache_remove_dev(struct sys_device * sys_dev)
+static int __cpuinit cache_remove_dev(struct device * sys_dev)
 {
 	unsigned int cpu = sys_dev->id;
 	unsigned long i;
@@ -428,9 +428,9 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
 		unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct sys_device *sys_dev;
+	struct device *sys_dev;
 
-	sys_dev = get_cpu_sysdev(cpu);
+	sys_dev = get_cpu_device(cpu);
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
@@ -454,7 +454,7 @@ static int __init cache_sysfs_init(void)
 	int i;
 
 	for_each_online_cpu(i) {
-		struct sys_device *sys_dev = get_cpu_sysdev((unsigned int)i);
+		struct device *sys_dev = get_cpu_device((unsigned int)i);
 		cache_add_dev(sys_dev);
 	}
 
diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
index 4e360bd4a35a..fff921345ddc 100644
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -25,7 +25,7 @@
 #ifdef __KERNEL__
 
 #include <linux/workqueue.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/mutex.h>
 
 #define LS_SIZE (256 * 1024)
@@ -166,7 +166,7 @@ struct spu {
 	/* beat only */
 	u64 shadow_int_mask_RW[3];
 
-	struct sys_device sysdev;
+	struct device dev;
 
 	int has_mem_affinity;
 	struct list_head aff_list;
@@ -270,11 +270,11 @@ struct spufs_calls {
 int register_spu_syscalls(struct spufs_calls *calls);
 void unregister_spu_syscalls(struct spufs_calls *calls);
 
-int spu_add_sysdev_attr(struct sysdev_attribute *attr);
-void spu_remove_sysdev_attr(struct sysdev_attribute *attr);
+int spu_add_dev_attr(struct device_attribute *attr);
+void spu_remove_dev_attr(struct device_attribute *attr);
 
-int spu_add_sysdev_attr_group(struct attribute_group *attrs);
-void spu_remove_sysdev_attr_group(struct attribute_group *attrs);
+int spu_add_dev_attr_group(struct attribute_group *attrs);
+void spu_remove_dev_attr_group(struct attribute_group *attrs);
 
 int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
 		unsigned long dsisr, unsigned *flt);
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 1e104af08483..c97185885c6d 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -3,7 +3,7 @@
 #ifdef __KERNEL__
 
 
-struct sys_device;
+struct device;
 struct device_node;
 
 #ifdef CONFIG_NUMA
@@ -86,19 +86,19 @@ extern int __node_distance(int, int);
 
 extern void __init dump_numa_cpu_topology(void);
 
-extern int sysfs_add_device_to_node(struct sys_device *dev, int nid);
-extern void sysfs_remove_device_from_node(struct sys_device *dev, int nid);
+extern int sysfs_add_device_to_node(struct device *dev, int nid);
+extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 
 #else
 
 static inline void dump_numa_cpu_topology(void) {}
 
-static inline int sysfs_add_device_to_node(struct sys_device *dev, int nid)
+static inline int sysfs_add_device_to_node(struct device *dev, int nid)
 {
 	return 0;
 }
 
-static inline void sysfs_remove_device_from_node(struct sys_device *dev,
+static inline void sysfs_remove_device_from_node(struct device *dev,
 						int nid)
 {
 }
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index a3c684b4c862..92c6b008dd2b 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -451,15 +451,15 @@ out:
 static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id)
 {
 	struct cache_dir *cache_dir;
-	struct sys_device *sysdev;
+	struct device *dev;
 	struct kobject *kobj = NULL;
 
-	sysdev = get_cpu_sysdev(cpu_id);
-	WARN_ONCE(!sysdev, "no sysdev for CPU %i\n", cpu_id);
-	if (!sysdev)
+	dev = get_cpu_device(cpu_id);
+	WARN_ONCE(!dev, "no dev for CPU %i\n", cpu_id);
+	if (!dev)
 		goto err;
 
-	kobj = kobject_create_and_add("cache", &sysdev->kobj);
+	kobj = kobject_create_and_add("cache", &dev->kobj);
 	if (!kobj)
 		goto err;
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 25ddbfc7dd36..da08240353fa 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -27,7 +27,7 @@
 #include <linux/spinlock.h>
 #include <linux/cache.h>
 #include <linux/err.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/topology.h>
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index ce035c1905f0..f396ef27916b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -1,4 +1,4 @@
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -37,12 +37,12 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices);
 /* Time in microseconds we delay before sleeping in the idle loop */
 DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
 
-static ssize_t store_smt_snooze_delay(struct sys_device *dev,
-				      struct sysdev_attribute *attr,
+static ssize_t store_smt_snooze_delay(struct device *dev,
+				      struct device_attribute *attr,
 				      const char *buf,
 				      size_t count)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 	ssize_t ret;
 	long snooze;
 
@@ -50,21 +50,21 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev,
 	if (ret != 1)
 		return -EINVAL;
 
-	per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
 
 	return count;
 }
 
-static ssize_t show_smt_snooze_delay(struct sys_device *dev,
-				     struct sysdev_attribute *attr,
+static ssize_t show_smt_snooze_delay(struct device *dev,
+				     struct device_attribute *attr,
 				     char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 
-	return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
+	return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
 }
 
-static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
 		   store_smt_snooze_delay);
 
 static int __init setup_smt_snooze_delay(char *str)
@@ -117,25 +117,25 @@ static void write_##NAME(void *val) \
 	ppc_enable_pmcs(); \
 	mtspr(ADDRESS, *(unsigned long *)val);	\
 } \
-static ssize_t show_##NAME(struct sys_device *dev, \
-			struct sysdev_attribute *attr, \
+static ssize_t show_##NAME(struct device *dev, \
+			struct device_attribute *attr, \
 			char *buf) \
 { \
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+	struct cpu *cpu = container_of(dev, struct cpu, dev); \
 	unsigned long val; \
-	smp_call_function_single(cpu->sysdev.id, read_##NAME, &val, 1);	\
+	smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1);	\
 	return sprintf(buf, "%lx\n", val); \
 } \
 static ssize_t __used \
-	store_##NAME(struct sys_device *dev, struct sysdev_attribute *attr, \
+	store_##NAME(struct device *dev, struct device_attribute *attr, \
 			const char *buf, size_t count) \
 { \
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+	struct cpu *cpu = container_of(dev, struct cpu, dev); \
 	unsigned long val; \
 	int ret = sscanf(buf, "%lx", &val); \
 	if (ret != 1) \
 		return -EINVAL; \
-	smp_call_function_single(cpu->sysdev.id, write_##NAME, &val, 1); \
+	smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
 	return count; \
 }
 
@@ -178,22 +178,22 @@ SYSFS_PMCSETUP(purr, SPRN_PURR);
 SYSFS_PMCSETUP(spurr, SPRN_SPURR);
 SYSFS_PMCSETUP(dscr, SPRN_DSCR);
 
-static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
-static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL);
-static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr);
-static SYSDEV_ATTR(purr, 0600, show_purr, store_purr);
+static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(spurr, 0600, show_spurr, NULL);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
+static DEVICE_ATTR(purr, 0600, show_purr, store_purr);
 
 unsigned long dscr_default = 0;
 EXPORT_SYMBOL(dscr_default);
 
-static ssize_t show_dscr_default(struct sysdev_class *class,
-		struct sysdev_class_attribute *attr, char *buf)
+static ssize_t show_dscr_default(struct device *dev,
+		struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%lx\n", dscr_default);
 }
 
-static ssize_t __used store_dscr_default(struct sysdev_class *class,
-		struct sysdev_class_attribute *attr, const char *buf,
+static ssize_t __used store_dscr_default(struct device *dev,
+		struct device_attribute *attr, const char *buf,
 		size_t count)
 {
 	unsigned long val;
@@ -207,15 +207,14 @@ static ssize_t __used store_dscr_default(struct sysdev_class *class,
 	return count;
 }
 
-static SYSDEV_CLASS_ATTR(dscr_default, 0600,
+static DEVICE_ATTR(dscr_default, 0600,
 		show_dscr_default, store_dscr_default);
 
 static void sysfs_create_dscr_default(void)
 {
 	int err = 0;
 	if (cpu_has_feature(CPU_FTR_DSCR))
-		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
-			&attr_dscr_default.attr);
+		err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
 }
 #endif /* CONFIG_PPC64 */
 
@@ -259,72 +258,72 @@ SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3);
 #endif /* HAS_PPC_PMC_PA6T */
 
 #ifdef HAS_PPC_PMC_IBM
-static struct sysdev_attribute ibm_common_attrs[] = {
-	_SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
-	_SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+static struct device_attribute ibm_common_attrs[] = {
+	__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+	__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
 };
 #endif /* HAS_PPC_PMC_G4 */
 
 #ifdef HAS_PPC_PMC_G4
-static struct sysdev_attribute g4_common_attrs[] = {
-	_SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
-	_SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
-	_SYSDEV_ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2),
+static struct device_attribute g4_common_attrs[] = {
+	__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+	__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+	__ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2),
 };
 #endif /* HAS_PPC_PMC_G4 */
 
-static struct sysdev_attribute classic_pmc_attrs[] = {
-	_SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1),
-	_SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2),
-	_SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3),
-	_SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4),
-	_SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5),
-	_SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6),
+static struct device_attribute classic_pmc_attrs[] = {
+	__ATTR(pmc1, 0600, show_pmc1, store_pmc1),
+	__ATTR(pmc2, 0600, show_pmc2, store_pmc2),
+	__ATTR(pmc3, 0600, show_pmc3, store_pmc3),
+	__ATTR(pmc4, 0600, show_pmc4, store_pmc4),
+	__ATTR(pmc5, 0600, show_pmc5, store_pmc5),
+	__ATTR(pmc6, 0600, show_pmc6, store_pmc6),
 #ifdef CONFIG_PPC64
-	_SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7),
-	_SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8),
+	__ATTR(pmc7, 0600, show_pmc7, store_pmc7),
+	__ATTR(pmc8, 0600, show_pmc8, store_pmc8),
 #endif
 };
 
 #ifdef HAS_PPC_PMC_PA6T
-static struct sysdev_attribute pa6t_attrs[] = {
-	_SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
-	_SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
-	_SYSDEV_ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
-	_SYSDEV_ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1),
-	_SYSDEV_ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2),
-	_SYSDEV_ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
-	_SYSDEV_ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
-	_SYSDEV_ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
+static struct device_attribute pa6t_attrs[] = {
+	__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+	__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+	__ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
+	__ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1),
+	__ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2),
+	__ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
+	__ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
+	__ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
 #ifdef CONFIG_DEBUG_KERNEL
-	_SYSDEV_ATTR(hid0, 0600, show_hid0, store_hid0),
-	_SYSDEV_ATTR(hid1, 0600, show_hid1, store_hid1),
-	_SYSDEV_ATTR(hid4, 0600, show_hid4, store_hid4),
-	_SYSDEV_ATTR(hid5, 0600, show_hid5, store_hid5),
-	_SYSDEV_ATTR(ima0, 0600, show_ima0, store_ima0),
-	_SYSDEV_ATTR(ima1, 0600, show_ima1, store_ima1),
-	_SYSDEV_ATTR(ima2, 0600, show_ima2, store_ima2),
-	_SYSDEV_ATTR(ima3, 0600, show_ima3, store_ima3),
-	_SYSDEV_ATTR(ima4, 0600, show_ima4, store_ima4),
-	_SYSDEV_ATTR(ima5, 0600, show_ima5, store_ima5),
-	_SYSDEV_ATTR(ima6, 0600, show_ima6, store_ima6),
-	_SYSDEV_ATTR(ima7, 0600, show_ima7, store_ima7),
-	_SYSDEV_ATTR(ima8, 0600, show_ima8, store_ima8),
-	_SYSDEV_ATTR(ima9, 0600, show_ima9, store_ima9),
-	_SYSDEV_ATTR(imaat, 0600, show_imaat, store_imaat),
-	_SYSDEV_ATTR(btcr, 0600, show_btcr, store_btcr),
-	_SYSDEV_ATTR(pccr, 0600, show_pccr, store_pccr),
-	_SYSDEV_ATTR(rpccr, 0600, show_rpccr, store_rpccr),
-	_SYSDEV_ATTR(der, 0600, show_der, store_der),
-	_SYSDEV_ATTR(mer, 0600, show_mer, store_mer),
-	_SYSDEV_ATTR(ber, 0600, show_ber, store_ber),
-	_SYSDEV_ATTR(ier, 0600, show_ier, store_ier),
-	_SYSDEV_ATTR(sier, 0600, show_sier, store_sier),
-	_SYSDEV_ATTR(siar, 0600, show_siar, store_siar),
-	_SYSDEV_ATTR(tsr0, 0600, show_tsr0, store_tsr0),
-	_SYSDEV_ATTR(tsr1, 0600, show_tsr1, store_tsr1),
-	_SYSDEV_ATTR(tsr2, 0600, show_tsr2, store_tsr2),
-	_SYSDEV_ATTR(tsr3, 0600, show_tsr3, store_tsr3),
+	__ATTR(hid0, 0600, show_hid0, store_hid0),
+	__ATTR(hid1, 0600, show_hid1, store_hid1),
+	__ATTR(hid4, 0600, show_hid4, store_hid4),
+	__ATTR(hid5, 0600, show_hid5, store_hid5),
+	__ATTR(ima0, 0600, show_ima0, store_ima0),
+	__ATTR(ima1, 0600, show_ima1, store_ima1),
+	__ATTR(ima2, 0600, show_ima2, store_ima2),
+	__ATTR(ima3, 0600, show_ima3, store_ima3),
+	__ATTR(ima4, 0600, show_ima4, store_ima4),
+	__ATTR(ima5, 0600, show_ima5, store_ima5),
+	__ATTR(ima6, 0600, show_ima6, store_ima6),
+	__ATTR(ima7, 0600, show_ima7, store_ima7),
+	__ATTR(ima8, 0600, show_ima8, store_ima8),
+	__ATTR(ima9, 0600, show_ima9, store_ima9),
+	__ATTR(imaat, 0600, show_imaat, store_imaat),
+	__ATTR(btcr, 0600, show_btcr, store_btcr),
+	__ATTR(pccr, 0600, show_pccr, store_pccr),
+	__ATTR(rpccr, 0600, show_rpccr, store_rpccr),
+	__ATTR(der, 0600, show_der, store_der),
+	__ATTR(mer, 0600, show_mer, store_mer),
+	__ATTR(ber, 0600, show_ber, store_ber),
+	__ATTR(ier, 0600, show_ier, store_ier),
+	__ATTR(sier, 0600, show_sier, store_sier),
+	__ATTR(siar, 0600, show_siar, store_siar),
+	__ATTR(tsr0, 0600, show_tsr0, store_tsr0),
+	__ATTR(tsr1, 0600, show_tsr1, store_tsr1),
+	__ATTR(tsr2, 0600, show_tsr2, store_tsr2),
+	__ATTR(tsr3, 0600, show_tsr3, store_tsr3),
 #endif /* CONFIG_DEBUG_KERNEL */
 };
 #endif /* HAS_PPC_PMC_PA6T */
@@ -333,14 +332,14 @@ static struct sysdev_attribute pa6t_attrs[] = {
 static void __cpuinit register_cpu_online(unsigned int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
-	struct sysdev_attribute *attrs, *pmc_attrs;
+	struct device *s = &c->dev;
+	struct device_attribute *attrs, *pmc_attrs;
 	int i, nattrs;
 
 #ifdef CONFIG_PPC64
 	if (!firmware_has_feature(FW_FEATURE_ISERIES) &&
 			cpu_has_feature(CPU_FTR_SMT))
-		sysdev_create_file(s, &attr_smt_snooze_delay);
+		device_create_file(s, &dev_attr_smt_snooze_delay);
 #endif
 
 	/* PMC stuff */
@@ -348,14 +347,14 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 #ifdef HAS_PPC_PMC_IBM
 	case PPC_PMC_IBM:
 		attrs = ibm_common_attrs;
-		nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = classic_pmc_attrs;
 		break;
 #endif /* HAS_PPC_PMC_IBM */
 #ifdef HAS_PPC_PMC_G4
 	case PPC_PMC_G4:
 		attrs = g4_common_attrs;
-		nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = classic_pmc_attrs;
 		break;
 #endif /* HAS_PPC_PMC_G4 */
@@ -363,7 +362,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 	case PPC_PMC_PA6T:
 		/* PA Semi starts counting at PMC0 */
 		attrs = pa6t_attrs;
-		nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = NULL;
 		break;
 #endif /* HAS_PPC_PMC_PA6T */
@@ -374,24 +373,24 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 	}
 
 	for (i = 0; i < nattrs; i++)
-		sysdev_create_file(s, &attrs[i]);
+		device_create_file(s, &attrs[i]);
 
 	if (pmc_attrs)
 		for (i = 0; i < cur_cpu_spec->num_pmcs; i++)
-			sysdev_create_file(s, &pmc_attrs[i]);
+			device_create_file(s, &pmc_attrs[i]);
 
 #ifdef CONFIG_PPC64
 	if (cpu_has_feature(CPU_FTR_MMCRA))
-		sysdev_create_file(s, &attr_mmcra);
+		device_create_file(s, &dev_attr_mmcra);
 
 	if (cpu_has_feature(CPU_FTR_PURR))
-		sysdev_create_file(s, &attr_purr);
+		device_create_file(s, &dev_attr_purr);
 
 	if (cpu_has_feature(CPU_FTR_SPURR))
-		sysdev_create_file(s, &attr_spurr);
+		device_create_file(s, &dev_attr_spurr);
 
 	if (cpu_has_feature(CPU_FTR_DSCR))
-		sysdev_create_file(s, &attr_dscr);
+		device_create_file(s, &dev_attr_dscr);
 #endif /* CONFIG_PPC64 */
 
 	cacheinfo_cpu_online(cpu);
@@ -401,8 +400,8 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
 static void unregister_cpu_online(unsigned int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
-	struct sysdev_attribute *attrs, *pmc_attrs;
+	struct device *s = &c->dev;
+	struct device_attribute *attrs, *pmc_attrs;
 	int i, nattrs;
 
 	BUG_ON(!c->hotpluggable);
@@ -410,7 +409,7 @@ static void unregister_cpu_online(unsigned int cpu)
 #ifdef CONFIG_PPC64
 	if (!firmware_has_feature(FW_FEATURE_ISERIES) &&
 			cpu_has_feature(CPU_FTR_SMT))
-		sysdev_remove_file(s, &attr_smt_snooze_delay);
+		device_remove_file(s, &dev_attr_smt_snooze_delay);
 #endif
 
 	/* PMC stuff */
@@ -418,14 +417,14 @@ static void unregister_cpu_online(unsigned int cpu)
 #ifdef HAS_PPC_PMC_IBM
 	case PPC_PMC_IBM:
 		attrs = ibm_common_attrs;
-		nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = classic_pmc_attrs;
 		break;
 #endif /* HAS_PPC_PMC_IBM */
 #ifdef HAS_PPC_PMC_G4
 	case PPC_PMC_G4:
 		attrs = g4_common_attrs;
-		nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = classic_pmc_attrs;
 		break;
 #endif /* HAS_PPC_PMC_G4 */
@@ -433,7 +432,7 @@ static void unregister_cpu_online(unsigned int cpu)
 	case PPC_PMC_PA6T:
 		/* PA Semi starts counting at PMC0 */
 		attrs = pa6t_attrs;
-		nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute);
+		nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
 		pmc_attrs = NULL;
 		break;
 #endif /* HAS_PPC_PMC_PA6T */
@@ -444,24 +443,24 @@ static void unregister_cpu_online(unsigned int cpu)
 	}
 
 	for (i = 0; i < nattrs; i++)
-		sysdev_remove_file(s, &attrs[i]);
+		device_remove_file(s, &attrs[i]);
 
 	if (pmc_attrs)
 		for (i = 0; i < cur_cpu_spec->num_pmcs; i++)
-			sysdev_remove_file(s, &pmc_attrs[i]);
+			device_remove_file(s, &pmc_attrs[i]);
 
 #ifdef CONFIG_PPC64
 	if (cpu_has_feature(CPU_FTR_MMCRA))
-		sysdev_remove_file(s, &attr_mmcra);
+		device_remove_file(s, &dev_attr_mmcra);
 
 	if (cpu_has_feature(CPU_FTR_PURR))
-		sysdev_remove_file(s, &attr_purr);
+		device_remove_file(s, &dev_attr_purr);
 
 	if (cpu_has_feature(CPU_FTR_SPURR))
-		sysdev_remove_file(s, &attr_spurr);
+		device_remove_file(s, &dev_attr_spurr);
 
 	if (cpu_has_feature(CPU_FTR_DSCR))
-		sysdev_remove_file(s, &attr_dscr);
+		device_remove_file(s, &dev_attr_dscr);
 #endif /* CONFIG_PPC64 */
 
 	cacheinfo_cpu_offline(cpu);
@@ -513,70 +512,70 @@ static struct notifier_block __cpuinitdata sysfs_cpu_nb = {
 
 static DEFINE_MUTEX(cpu_mutex);
 
-int cpu_add_sysdev_attr(struct sysdev_attribute *attr)
+int cpu_add_dev_attr(struct device_attribute *attr)
 {
 	int cpu;
 
 	mutex_lock(&cpu_mutex);
 
 	for_each_possible_cpu(cpu) {
-		sysdev_create_file(get_cpu_sysdev(cpu), attr);
+		device_create_file(get_cpu_device(cpu), attr);
 	}
 
 	mutex_unlock(&cpu_mutex);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr);
+EXPORT_SYMBOL_GPL(cpu_add_dev_attr);
 
-int cpu_add_sysdev_attr_group(struct attribute_group *attrs)
+int cpu_add_dev_attr_group(struct attribute_group *attrs)
 {
 	int cpu;
-	struct sys_device *sysdev;
+	struct device *dev;
 	int ret;
 
 	mutex_lock(&cpu_mutex);
 
 	for_each_possible_cpu(cpu) {
-		sysdev = get_cpu_sysdev(cpu);
-		ret = sysfs_create_group(&sysdev->kobj, attrs);
+		dev = get_cpu_device(cpu);
+		ret = sysfs_create_group(&dev->kobj, attrs);
 		WARN_ON(ret != 0);
 	}
 
 	mutex_unlock(&cpu_mutex);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr_group);
+EXPORT_SYMBOL_GPL(cpu_add_dev_attr_group);
 
 
-void cpu_remove_sysdev_attr(struct sysdev_attribute *attr)
+void cpu_remove_dev_attr(struct device_attribute *attr)
 {
 	int cpu;
 
 	mutex_lock(&cpu_mutex);
 
 	for_each_possible_cpu(cpu) {
-		sysdev_remove_file(get_cpu_sysdev(cpu), attr);
+		device_remove_file(get_cpu_device(cpu), attr);
 	}
 
 	mutex_unlock(&cpu_mutex);
 }
-EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr);
+EXPORT_SYMBOL_GPL(cpu_remove_dev_attr);
 
-void cpu_remove_sysdev_attr_group(struct attribute_group *attrs)
+void cpu_remove_dev_attr_group(struct attribute_group *attrs)
 {
 	int cpu;
-	struct sys_device *sysdev;
+	struct device *dev;
 
 	mutex_lock(&cpu_mutex);
 
 	for_each_possible_cpu(cpu) {
-		sysdev = get_cpu_sysdev(cpu);
-		sysfs_remove_group(&sysdev->kobj, attrs);
+		dev = get_cpu_device(cpu);
+		sysfs_remove_group(&dev->kobj, attrs);
 	}
 
 	mutex_unlock(&cpu_mutex);
 }
-EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr_group);
+EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
 
 
 /* NUMA stuff */
@@ -590,7 +589,7 @@ static void register_nodes(void)
 		register_one_node(i);
 }
 
-int sysfs_add_device_to_node(struct sys_device *dev, int nid)
+int sysfs_add_device_to_node(struct device *dev, int nid)
 {
 	struct node *node = &node_devices[nid];
 	return sysfs_create_link(&node->sysdev.kobj, &dev->kobj,
@@ -598,7 +597,7 @@ int sysfs_add_device_to_node(struct sys_device *dev, int nid)
 }
 EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
 
-void sysfs_remove_device_from_node(struct sys_device *dev, int nid)
+void sysfs_remove_device_from_node(struct device *dev, int nid)
 {
 	struct node *node = &node_devices[nid];
 	sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj));
@@ -614,14 +613,14 @@ static void register_nodes(void)
 #endif
 
 /* Only valid if CPU is present. */
-static ssize_t show_physical_id(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_physical_id(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 
-	return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
+	return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->dev.id));
 }
-static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
+static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
 
 static int __init topology_init(void)
 {
@@ -646,7 +645,7 @@ static int __init topology_init(void)
 		if (cpu_online(cpu) || c->hotpluggable) {
 			register_cpu(c, cpu);
 
-			sysdev_create_file(&c->sysdev, &attr_physical_id);
+			device_create_file(&c->dev, &dev_attr_physical_id);
 		}
 
 		if (cpu_online(cpu))
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index c7dd4dec4df8..f2b03a863430 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1452,7 +1452,7 @@ int arch_update_cpu_topology(void)
 {
 	int cpu, nid, old_nid;
 	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
-	struct sys_device *sysdev;
+	struct device *dev;
 
 	for_each_cpu(cpu,&cpu_associativity_changes_mask) {
 		vphn_get_associativity(cpu, associativity);
@@ -1473,9 +1473,9 @@ int arch_update_cpu_topology(void)
 		register_cpu_under_node(cpu, nid);
 		put_online_cpus();
 
-		sysdev = get_cpu_sysdev(cpu);
-		if (sysdev)
-			kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+		dev = get_cpu_device(cpu);
+		if (dev)
+			kobject_uevent(&dev->kobj, KOBJ_CHANGE);
 	}
 
 	return 1;
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
index 4d4c8c169124..94560db788bf 100644
--- a/arch/powerpc/platforms/cell/cbe_thermal.c
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -46,7 +46,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/cpu.h>
 #include <asm/spu.h>
@@ -59,8 +59,8 @@
 #define TEMP_MIN 65
 #define TEMP_MAX 125
 
-#define SYSDEV_PREFIX_ATTR(_prefix,_name,_mode)			\
-struct sysdev_attribute attr_ ## _prefix ## _ ## _name = {	\
+#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode)			\
+struct device_attribute attr_ ## _prefix ## _ ## _name = {	\
 	.attr = { .name = __stringify(_name), .mode = _mode },	\
 	.show	= _prefix ## _show_ ## _name,			\
 	.store	= _prefix ## _store_ ## _name,			\
@@ -76,36 +76,36 @@ static inline u8 temp_to_reg(u8 temp)
 	return ((temp - TEMP_MIN) >> 1) & 0x3f;
 }
 
-static struct cbe_pmd_regs __iomem *get_pmd_regs(struct sys_device *sysdev)
+static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev)
 {
 	struct spu *spu;
 
-	spu = container_of(sysdev, struct spu, sysdev);
+	spu = container_of(dev, struct spu, dev);
 
 	return cbe_get_pmd_regs(spu_devnode(spu));
 }
 
 /* returns the value for a given spu in a given register */
-static u8 spu_read_register_value(struct sys_device *sysdev, union spe_reg __iomem *reg)
+static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg)
 {
 	union spe_reg value;
 	struct spu *spu;
 
-	spu = container_of(sysdev, struct spu, sysdev);
+	spu = container_of(dev, struct spu, dev);
 	value.val = in_be64(&reg->val);
 
 	return value.spe[spu->spe_id];
 }
 
-static ssize_t spu_show_temp(struct sys_device *sysdev, struct sysdev_attribute *attr,
+static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr,
 			char *buf)
 {
 	u8 value;
 	struct cbe_pmd_regs __iomem *pmd_regs;
 
-	pmd_regs = get_pmd_regs(sysdev);
+	pmd_regs = get_pmd_regs(dev);
 
-	value = spu_read_register_value(sysdev, &pmd_regs->ts_ctsr1);
+	value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1);
 
 	return sprintf(buf, "%d\n", reg_to_temp(value));
 }
@@ -147,48 +147,48 @@ static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char
 	return size;
 }
 
-static ssize_t spu_show_throttle_end(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t spu_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	return show_throttle(get_pmd_regs(sysdev), buf, 0);
+	return show_throttle(get_pmd_regs(dev), buf, 0);
 }
 
-static ssize_t spu_show_throttle_begin(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t spu_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	return show_throttle(get_pmd_regs(sysdev), buf, 8);
+	return show_throttle(get_pmd_regs(dev), buf, 8);
 }
 
-static ssize_t spu_show_throttle_full_stop(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t spu_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	return show_throttle(get_pmd_regs(sysdev), buf, 16);
+	return show_throttle(get_pmd_regs(dev), buf, 16);
 }
 
-static ssize_t spu_store_throttle_end(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, const char *buf, size_t size)
+static ssize_t spu_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
 {
-	return store_throttle(get_pmd_regs(sysdev), buf, size, 0);
+	return store_throttle(get_pmd_regs(dev), buf, size, 0);
 }
 
-static ssize_t spu_store_throttle_begin(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, const char *buf, size_t size)
+static ssize_t spu_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
 {
-	return store_throttle(get_pmd_regs(sysdev), buf, size, 8);
+	return store_throttle(get_pmd_regs(dev), buf, size, 8);
 }
 
-static ssize_t spu_store_throttle_full_stop(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, const char *buf, size_t size)
+static ssize_t spu_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
 {
-	return store_throttle(get_pmd_regs(sysdev), buf, size, 16);
+	return store_throttle(get_pmd_regs(dev), buf, size, 16);
 }
 
-static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos)
+static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos)
 {
 	struct cbe_pmd_regs __iomem *pmd_regs;
 	u64 value;
 
-	pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
+	pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
 	value = in_be64(&pmd_regs->ts_ctsr2);
 
 	value = (value >> pos) & 0x3f;
@@ -199,64 +199,64 @@ static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos)
 
 /* shows the temperature of the DTS on the PPE,
  * located near the linear thermal sensor */
-static ssize_t ppe_show_temp0(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t ppe_show_temp0(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	return ppe_show_temp(sysdev, buf, 32);
+	return ppe_show_temp(dev, buf, 32);
 }
 
 /* shows the temperature of the second DTS on the PPE */
-static ssize_t ppe_show_temp1(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t ppe_show_temp1(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	return ppe_show_temp(sysdev, buf, 0);
+	return ppe_show_temp(dev, buf, 0);
 }
 
-static ssize_t ppe_show_throttle_end(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t ppe_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 32);
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32);
 }
 
-static ssize_t ppe_show_throttle_begin(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t ppe_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 40);
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40);
 }
 
-static ssize_t ppe_show_throttle_full_stop(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t ppe_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 48);
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48);
 }
 
-static ssize_t ppe_store_throttle_end(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, const char *buf, size_t size)
+static ssize_t ppe_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
 {
-	return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 32);
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32);
 }
 
-static ssize_t ppe_store_throttle_begin(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, const char *buf, size_t size)
+static ssize_t ppe_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
 {
-	return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 40);
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40);
 }
 
-static ssize_t ppe_store_throttle_full_stop(struct sys_device *sysdev,
-			struct sysdev_attribute *attr, const char *buf, size_t size)
+static ssize_t ppe_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
 {
-	return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 48);
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48);
 }
 
 
-static struct sysdev_attribute attr_spu_temperature = {
+static struct device_attribute attr_spu_temperature = {
 	.attr = {.name = "temperature", .mode = 0400 },
 	.show = spu_show_temp,
 };
 
-static SYSDEV_PREFIX_ATTR(spu, throttle_end, 0600);
-static SYSDEV_PREFIX_ATTR(spu, throttle_begin, 0600);
-static SYSDEV_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600);
 
 
 static struct attribute *spu_attributes[] = {
@@ -272,19 +272,19 @@ static struct attribute_group spu_attribute_group = {
 	.attrs	= spu_attributes,
 };
 
-static struct sysdev_attribute attr_ppe_temperature0 = {
+static struct device_attribute attr_ppe_temperature0 = {
 	.attr = {.name = "temperature0", .mode = 0400 },
 	.show = ppe_show_temp0,
 };
 
-static struct sysdev_attribute attr_ppe_temperature1 = {
+static struct device_attribute attr_ppe_temperature1 = {
 	.attr = {.name = "temperature1", .mode = 0400 },
 	.show = ppe_show_temp1,
 };
 
-static SYSDEV_PREFIX_ATTR(ppe, throttle_end, 0600);
-static SYSDEV_PREFIX_ATTR(ppe, throttle_begin, 0600);
-static SYSDEV_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
 
 static struct attribute *ppe_attributes[] = {
 	&attr_ppe_temperature0.attr,
@@ -307,7 +307,7 @@ static int __init init_default_values(void)
 {
 	int cpu;
 	struct cbe_pmd_regs __iomem *pmd_regs;
-	struct sys_device *sysdev;
+	struct device *dev;
 	union ppe_spe_reg tpr;
 	union spe_reg str1;
 	u64 str2;
@@ -349,14 +349,14 @@ static int __init init_default_values(void)
 
 	for_each_possible_cpu (cpu) {
 		pr_debug("processing cpu %d\n", cpu);
-		sysdev = get_cpu_sysdev(cpu);
+		dev = get_cpu_device(cpu);
 
-		if (!sysdev) {
-			pr_info("invalid sysdev pointer for cbe_thermal\n");
+		if (!dev) {
+			pr_info("invalid dev pointer for cbe_thermal\n");
 			return -EINVAL;
 		}
 
-		pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
+		pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
 
 		if (!pmd_regs) {
 			pr_info("invalid CBE regs pointer for cbe_thermal\n");
@@ -379,8 +379,8 @@ static int __init thermal_init(void)
 	int rc = init_default_values();
 
 	if (rc == 0) {
-		spu_add_sysdev_attr_group(&spu_attribute_group);
-		cpu_add_sysdev_attr_group(&ppe_attribute_group);
+		spu_add_dev_attr_group(&spu_attribute_group);
+		cpu_add_dev_attr_group(&ppe_attribute_group);
 	}
 
 	return rc;
@@ -389,8 +389,8 @@ module_init(thermal_init);
 
 static void __exit thermal_exit(void)
 {
-	spu_remove_sysdev_attr_group(&spu_attribute_group);
-	cpu_remove_sysdev_attr_group(&ppe_attribute_group);
+	spu_remove_dev_attr_group(&spu_attribute_group);
+	cpu_remove_dev_attr_group(&ppe_attribute_group);
 }
 module_exit(thermal_exit);
 
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 3675da73623f..1708fb7aba35 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -522,31 +522,32 @@ void spu_init_channels(struct spu *spu)
 }
 EXPORT_SYMBOL_GPL(spu_init_channels);
 
-static struct sysdev_class spu_sysdev_class = {
+static struct bus_type spu_subsys = {
 	.name = "spu",
+	.dev_name = "spu",
 };
 
-int spu_add_sysdev_attr(struct sysdev_attribute *attr)
+int spu_add_dev_attr(struct device_attribute *attr)
 {
 	struct spu *spu;
 
 	mutex_lock(&spu_full_list_mutex);
 	list_for_each_entry(spu, &spu_full_list, full_list)
-		sysdev_create_file(&spu->sysdev, attr);
+		device_create_file(&spu->dev, attr);
 	mutex_unlock(&spu_full_list_mutex);
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
+EXPORT_SYMBOL_GPL(spu_add_dev_attr);
 
-int spu_add_sysdev_attr_group(struct attribute_group *attrs)
+int spu_add_dev_attr_group(struct attribute_group *attrs)
 {
 	struct spu *spu;
 	int rc = 0;
 
 	mutex_lock(&spu_full_list_mutex);
 	list_for_each_entry(spu, &spu_full_list, full_list) {
-		rc = sysfs_create_group(&spu->sysdev.kobj, attrs);
+		rc = sysfs_create_group(&spu->dev.kobj, attrs);
 
 		/* we're in trouble here, but try unwinding anyway */
 		if (rc) {
@@ -555,7 +556,7 @@ int spu_add_sysdev_attr_group(struct attribute_group *attrs)
 
 			list_for_each_entry_continue_reverse(spu,
 					&spu_full_list, full_list)
-				sysfs_remove_group(&spu->sysdev.kobj, attrs);
+				sysfs_remove_group(&spu->dev.kobj, attrs);
 			break;
 		}
 	}
@@ -564,45 +565,45 @@ int spu_add_sysdev_attr_group(struct attribute_group *attrs)
 
 	return rc;
 }
-EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
+EXPORT_SYMBOL_GPL(spu_add_dev_attr_group);
 
 
-void spu_remove_sysdev_attr(struct sysdev_attribute *attr)
+void spu_remove_dev_attr(struct device_attribute *attr)
 {
 	struct spu *spu;
 
 	mutex_lock(&spu_full_list_mutex);
 	list_for_each_entry(spu, &spu_full_list, full_list)
-		sysdev_remove_file(&spu->sysdev, attr);
+		device_remove_file(&spu->dev, attr);
 	mutex_unlock(&spu_full_list_mutex);
 }
-EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr);
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr);
 
-void spu_remove_sysdev_attr_group(struct attribute_group *attrs)
+void spu_remove_dev_attr_group(struct attribute_group *attrs)
 {
 	struct spu *spu;
 
 	mutex_lock(&spu_full_list_mutex);
 	list_for_each_entry(spu, &spu_full_list, full_list)
-		sysfs_remove_group(&spu->sysdev.kobj, attrs);
+		sysfs_remove_group(&spu->dev.kobj, attrs);
 	mutex_unlock(&spu_full_list_mutex);
 }
-EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group);
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);
 
-static int spu_create_sysdev(struct spu *spu)
+static int spu_create_dev(struct spu *spu)
 {
 	int ret;
 
-	spu->sysdev.id = spu->number;
-	spu->sysdev.cls = &spu_sysdev_class;
-	ret = sysdev_register(&spu->sysdev);
+	spu->dev.id = spu->number;
+	spu->dev.bus = &spu_subsys;
+	ret = device_register(&spu->dev);
 	if (ret) {
 		printk(KERN_ERR "Can't register SPU %d with sysfs\n",
 				spu->number);
 		return ret;
 	}
 
-	sysfs_add_device_to_node(&spu->sysdev, spu->node);
+	sysfs_add_device_to_node(&spu->dev, spu->node);
 
 	return 0;
 }
@@ -638,7 +639,7 @@ static int __init create_spu(void *data)
 	if (ret)
 		goto out_destroy;
 
-	ret = spu_create_sysdev(spu);
+	ret = spu_create_dev(spu);
 	if (ret)
 		goto out_free_irqs;
 
@@ -695,10 +696,10 @@ static unsigned long long spu_acct_time(struct spu *spu,
 }
 
 
-static ssize_t spu_stat_show(struct sys_device *sysdev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t spu_stat_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
-	struct spu *spu = container_of(sysdev, struct spu, sysdev);
+	struct spu *spu = container_of(dev, struct spu, dev);
 
 	return sprintf(buf, "%s %llu %llu %llu %llu "
 		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
@@ -717,7 +718,7 @@ static ssize_t spu_stat_show(struct sys_device *sysdev,
 		spu->stats.libassist);
 }
 
-static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL);
+static DEVICE_ATTR(stat, 0644, spu_stat_show, NULL);
 
 #ifdef CONFIG_KEXEC
 
@@ -816,8 +817,8 @@ static int __init init_spu_base(void)
 	if (!spu_management_ops)
 		goto out;
 
-	/* create sysdev class for spus */
-	ret = sysdev_class_register(&spu_sysdev_class);
+	/* create system subsystem for spus */
+	ret = subsys_system_register(&spu_subsys, NULL);
 	if (ret)
 		goto out;
 
@@ -826,7 +827,7 @@ static int __init init_spu_base(void)
 	if (ret < 0) {
 		printk(KERN_WARNING "%s: Error initializing spus\n",
 			__func__);
-		goto out_unregister_sysdev_class;
+		goto out_unregister_subsys;
 	}
 
 	if (ret > 0)
@@ -836,15 +837,15 @@ static int __init init_spu_base(void)
 	xmon_register_spus(&spu_full_list);
 	crash_register_spus(&spu_full_list);
 	mutex_unlock(&spu_full_list_mutex);
-	spu_add_sysdev_attr(&attr_stat);
+	spu_add_dev_attr(&dev_attr_stat);
 	register_syscore_ops(&spu_syscore_ops);
 
 	spu_init_affinity();
 
 	return 0;
 
- out_unregister_sysdev_class:
-	sysdev_class_unregister(&spu_sysdev_class);
+ out_unregister_subsys:
+	bus_unregister(&spu_subsys);
  out:
 	return ret;
 }
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index c8b3c69fe891..af281dce510a 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -15,7 +15,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/of.h>
 #include <asm/cputhreads.h>
@@ -184,7 +184,7 @@ static ssize_t get_best_energy_list(char *page, int activate)
 	return s-page;
 }
 
-static ssize_t get_best_energy_data(struct sys_device *dev,
+static ssize_t get_best_energy_data(struct device *dev,
 					char *page, int activate)
 {
 	int rc;
@@ -207,26 +207,26 @@ static ssize_t get_best_energy_data(struct sys_device *dev,
 
 /* Wrapper functions */
 
-static ssize_t cpu_activate_hint_list_show(struct sysdev_class *class,
-			struct sysdev_class_attribute *attr, char *page)
+static ssize_t cpu_activate_hint_list_show(struct device *dev,
+			struct device_attribute *attr, char *page)
 {
 	return get_best_energy_list(page, 1);
 }
 
-static ssize_t cpu_deactivate_hint_list_show(struct sysdev_class *class,
-			struct sysdev_class_attribute *attr, char *page)
+static ssize_t cpu_deactivate_hint_list_show(struct device *dev,
+			struct device_attribute *attr, char *page)
 {
 	return get_best_energy_list(page, 0);
 }
 
-static ssize_t percpu_activate_hint_show(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *page)
+static ssize_t percpu_activate_hint_show(struct device *dev,
+			struct device_attribute *attr, char *page)
 {
 	return get_best_energy_data(dev, page, 1);
 }
 
-static ssize_t percpu_deactivate_hint_show(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *page)
+static ssize_t percpu_deactivate_hint_show(struct device *dev,
+			struct device_attribute *attr, char *page)
 {
 	return get_best_energy_data(dev, page, 0);
 }
@@ -241,48 +241,48 @@ static ssize_t percpu_deactivate_hint_show(struct sys_device *dev,
  *	Per-cpu value of the hint
  */
 
-struct sysdev_class_attribute attr_cpu_activate_hint_list =
-		_SYSDEV_CLASS_ATTR(pseries_activate_hint_list, 0444,
+struct device_attribute attr_cpu_activate_hint_list =
+		__ATTR(pseries_activate_hint_list, 0444,
 		cpu_activate_hint_list_show, NULL);
 
-struct sysdev_class_attribute attr_cpu_deactivate_hint_list =
-		_SYSDEV_CLASS_ATTR(pseries_deactivate_hint_list, 0444,
+struct device_attribute attr_cpu_deactivate_hint_list =
+		__ATTR(pseries_deactivate_hint_list, 0444,
 		cpu_deactivate_hint_list_show, NULL);
 
-struct sysdev_attribute attr_percpu_activate_hint =
-		_SYSDEV_ATTR(pseries_activate_hint, 0444,
+struct device_attribute attr_percpu_activate_hint =
+		__ATTR(pseries_activate_hint, 0444,
 		percpu_activate_hint_show, NULL);
 
-struct sysdev_attribute attr_percpu_deactivate_hint =
-		_SYSDEV_ATTR(pseries_deactivate_hint, 0444,
+struct device_attribute attr_percpu_deactivate_hint =
+		__ATTR(pseries_deactivate_hint, 0444,
 		percpu_deactivate_hint_show, NULL);
 
 static int __init pseries_energy_init(void)
 {
 	int cpu, err;
-	struct sys_device *cpu_sys_dev;
+	struct device *cpu_dev;
 
 	if (!check_for_h_best_energy()) {
 		printk(KERN_INFO "Hypercall H_BEST_ENERGY not supported\n");
 		return 0;
 	}
 	/* Create the sysfs files */
-	err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
-				&attr_cpu_activate_hint_list.attr);
+	err = device_create_file(cpu_subsys.dev_root,
+				&attr_cpu_activate_hint_list);
 	if (!err)
-		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
-				&attr_cpu_deactivate_hint_list.attr);
+		err = device_create_file(cpu_subsys.dev_root,
+				&attr_cpu_deactivate_hint_list);
 
 	if (err)
 		return err;
 	for_each_possible_cpu(cpu) {
-		cpu_sys_dev = get_cpu_sysdev(cpu);
-		err = sysfs_create_file(&cpu_sys_dev->kobj,
-				&attr_percpu_activate_hint.attr);
+		cpu_dev = get_cpu_device(cpu);
+		err = device_create_file(cpu_dev,
+				&attr_percpu_activate_hint);
 		if (err)
 			break;
-		err = sysfs_create_file(&cpu_sys_dev->kobj,
-				&attr_percpu_deactivate_hint.attr);
+		err = device_create_file(cpu_dev,
+				&attr_percpu_deactivate_hint);
 		if (err)
 			break;
 	}
@@ -298,23 +298,20 @@ static int __init pseries_energy_init(void)
 static void __exit pseries_energy_cleanup(void)
 {
 	int cpu;
-	struct sys_device *cpu_sys_dev;
+	struct device *cpu_dev;
 
 	if (!sysfs_entries)
 		return;
 
 	/* Remove the sysfs files */
-	sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
-				&attr_cpu_activate_hint_list.attr);
-
-	sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
-				&attr_cpu_deactivate_hint_list.attr);
+	device_remove_file(cpu_subsys.dev_root, &attr_cpu_activate_hint_list);
+	device_remove_file(cpu_subsys.dev_root, &attr_cpu_deactivate_hint_list);
 
 	for_each_possible_cpu(cpu) {
-		cpu_sys_dev = get_cpu_sysdev(cpu);
-		sysfs_remove_file(&cpu_sys_dev->kobj,
+		cpu_dev = get_cpu_device(cpu);
+		sysfs_remove_file(&cpu_dev->kobj,
 				&attr_percpu_activate_hint.attr);
-		sysfs_remove_file(&cpu_sys_dev->kobj,
+		sysfs_remove_file(&cpu_dev->kobj,
 				&attr_percpu_deactivate_hint.attr);
 	}
 }
diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/sysdev/ppc4xx_cpm.c
index 73b86cc5ea74..82e2cfe35c62 100644
--- a/arch/powerpc/sysdev/ppc4xx_cpm.c
+++ b/arch/powerpc/sysdev/ppc4xx_cpm.c
@@ -179,12 +179,12 @@ static struct kobj_attribute cpm_idle_attr =
 
 static void cpm_idle_config_sysfs(void)
 {
-	struct sys_device *sys_dev;
+	struct device *dev;
 	unsigned long ret;
 
-	sys_dev = get_cpu_sysdev(0);
+	dev = get_cpu_device(0);
 
-	ret = sysfs_create_file(&sys_dev->kobj,
+	ret = sysfs_create_file(&dev->kobj,
 				&cpm_idle_attr.attr);
 	if (ret)
 		printk(KERN_WARNING
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3ea872890da2..66cca03c0282 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -831,8 +831,8 @@ int setup_profiling_timer(unsigned int multiplier)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static ssize_t cpu_configure_show(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t cpu_configure_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	ssize_t count;
 
@@ -842,8 +842,8 @@ static ssize_t cpu_configure_show(struct sys_device *dev,
 	return count;
 }
 
-static ssize_t cpu_configure_store(struct sys_device *dev,
-				  struct sysdev_attribute *attr,
+static ssize_t cpu_configure_store(struct device *dev,
+				  struct device_attribute *attr,
 				  const char *buf, size_t count)
 {
 	int cpu = dev->id;
@@ -889,11 +889,11 @@ out:
 	put_online_cpus();
 	return rc ? rc : count;
 }
-static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
+static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static ssize_t cpu_polarization_show(struct sys_device *dev,
-				     struct sysdev_attribute *attr, char *buf)
+static ssize_t cpu_polarization_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
 {
 	int cpu = dev->id;
 	ssize_t count;
@@ -919,22 +919,22 @@ static ssize_t cpu_polarization_show(struct sys_device *dev,
 	mutex_unlock(&smp_cpu_state_mutex);
 	return count;
 }
-static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
 
-static ssize_t show_cpu_address(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_cpu_address(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
 }
-static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL);
+static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
 
 
 static struct attribute *cpu_common_attrs[] = {
 #ifdef CONFIG_HOTPLUG_CPU
-	&attr_configure.attr,
+	&dev_attr_configure.attr,
 #endif
-	&attr_address.attr,
-	&attr_polarization.attr,
+	&dev_attr_address.attr,
+	&dev_attr_polarization.attr,
 	NULL,
 };
 
@@ -942,8 +942,8 @@ static struct attribute_group cpu_common_attr_group = {
 	.attrs = cpu_common_attrs,
 };
 
-static ssize_t show_capability(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_capability(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	unsigned int capability;
 	int rc;
@@ -953,10 +953,10 @@ static ssize_t show_capability(struct sys_device *dev,
 		return rc;
 	return sprintf(buf, "%u\n", capability);
 }
-static SYSDEV_ATTR(capability, 0444, show_capability, NULL);
+static DEVICE_ATTR(capability, 0444, show_capability, NULL);
 
-static ssize_t show_idle_count(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_idle_count(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	struct s390_idle_data *idle;
 	unsigned long long idle_count;
@@ -976,10 +976,10 @@ repeat:
 		goto repeat;
 	return sprintf(buf, "%llu\n", idle_count);
 }
-static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
+static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
 
-static ssize_t show_idle_time(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_idle_time(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	struct s390_idle_data *idle;
 	unsigned long long now, idle_time, idle_enter;
@@ -1001,12 +1001,12 @@ repeat:
 		goto repeat;
 	return sprintf(buf, "%llu\n", idle_time >> 12);
 }
-static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
+static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
 
 static struct attribute *cpu_online_attrs[] = {
-	&attr_capability.attr,
-	&attr_idle_count.attr,
-	&attr_idle_time_us.attr,
+	&dev_attr_capability.attr,
+	&dev_attr_idle_count.attr,
+	&dev_attr_idle_time_us.attr,
 	NULL,
 };
 
@@ -1019,7 +1019,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
 {
 	unsigned int cpu = (unsigned int)(long)hcpu;
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
+	struct device *s = &c->dev;
 	struct s390_idle_data *idle;
 	int err = 0;
 
@@ -1045,7 +1045,7 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = {
 static int __devinit smp_add_present_cpu(int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
+	struct device *s = &c->dev;
 	int rc;
 
 	c->hotpluggable = 1;
@@ -1098,8 +1098,8 @@ out:
 	return rc;
 }
 
-static ssize_t __ref rescan_store(struct sysdev_class *class,
-				  struct sysdev_class_attribute *attr,
+static ssize_t __ref rescan_store(struct device *dev,
+				  struct device_attribute *attr,
 				  const char *buf,
 				  size_t count)
 {
@@ -1108,11 +1108,11 @@ static ssize_t __ref rescan_store(struct sysdev_class *class,
 	rc = smp_rescan_cpus();
 	return rc ? rc : count;
 }
-static SYSDEV_CLASS_ATTR(rescan, 0200, NULL, rescan_store);
+static DEVICE_ATTR(rescan, 0200, NULL, rescan_store);
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static ssize_t dispatching_show(struct sysdev_class *class,
-				struct sysdev_class_attribute *attr,
+static ssize_t dispatching_show(struct device *dev,
+				struct device_attribute *attr,
 				char *buf)
 {
 	ssize_t count;
@@ -1123,8 +1123,8 @@ static ssize_t dispatching_show(struct sysdev_class *class,
 	return count;
 }
 
-static ssize_t dispatching_store(struct sysdev_class *dev,
-				 struct sysdev_class_attribute *attr,
+static ssize_t dispatching_store(struct device *dev,
+				 struct device_attribute *attr,
 				 const char *buf,
 				 size_t count)
 {
@@ -1148,7 +1148,7 @@ out:
 	put_online_cpus();
 	return rc ? rc : count;
 }
-static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show,
+static DEVICE_ATTR(dispatching, 0644, dispatching_show,
 			 dispatching_store);
 
 static int __init topology_init(void)
@@ -1159,11 +1159,11 @@ static int __init topology_init(void)
 	register_cpu_notifier(&smp_cpu_nb);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_rescan);
+	rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
 	if (rc)
 		return rc;
 #endif
-	rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_dispatching);
+	rc = device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
 	if (rc)
 		return rc;
 	for_each_present_cpu(cpu) {
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 77b8942b9a15..6dfc524c31aa 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -230,7 +230,7 @@ void store_topology(struct sysinfo_15_1_x *info)
 int arch_update_cpu_topology(void)
 {
 	struct sysinfo_15_1_x *info = tl_info;
-	struct sys_device *sysdev;
+	struct device *dev;
 	int cpu;
 
 	if (!MACHINE_HAS_TOPOLOGY) {
@@ -242,8 +242,8 @@ int arch_update_cpu_topology(void)
 	tl_to_cores(info);
 	update_cpu_core_map();
 	for_each_online_cpu(cpu) {
-		sysdev = get_cpu_sysdev(cpu);
-		kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+		dev = get_cpu_device(cpu);
+		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
 	}
 	return 1;
 }
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index f0907995b4c9..a8140f0bbf6c 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/bitmap.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -337,9 +337,9 @@ static struct kobj_type ktype_percpu_entry = {
 	.default_attrs	= sq_sysfs_attrs,
 };
 
-static int __devinit sq_sysdev_add(struct sys_device *sysdev)
+static int __devinit sq_dev_add(struct device *dev)
 {
-	unsigned int cpu = sysdev->id;
+	unsigned int cpu = dev->id;
 	struct kobject *kobj;
 	int error;
 
@@ -348,25 +348,27 @@ static int __devinit sq_sysdev_add(struct sys_device *sysdev)
 		return -ENOMEM;
 
 	kobj = sq_kobject[cpu];
-	error = kobject_init_and_add(kobj, &ktype_percpu_entry, &sysdev->kobj,
+	error = kobject_init_and_add(kobj, &ktype_percpu_entry, &dev->kobj,
 				     "%s", "sq");
 	if (!error)
 		kobject_uevent(kobj, KOBJ_ADD);
 	return error;
 }
 
-static int __devexit sq_sysdev_remove(struct sys_device *sysdev)
+static int __devexit sq_dev_remove(struct device *dev)
 {
-	unsigned int cpu = sysdev->id;
+	unsigned int cpu = dev->id;
 	struct kobject *kobj = sq_kobject[cpu];
 
 	kobject_put(kobj);
 	return 0;
 }
 
-static struct sysdev_driver sq_sysdev_driver = {
-	.add		= sq_sysdev_add,
-	.remove		= __devexit_p(sq_sysdev_remove),
+static struct subsys_interface sq_interface = {
+	.name		= "sq"
+	.subsys		= &cpu_subsys,
+	.add_dev	= sq_dev_add,
+	.remove_dev	= __devexit_p(sq_dev_remove),
 };
 
 static int __init sq_api_init(void)
@@ -386,7 +388,7 @@ static int __init sq_api_init(void)
 	if (unlikely(!sq_bitmap))
 		goto out;
 
-	ret = sysdev_driver_register(&cpu_sysdev_class, &sq_sysdev_driver);
+	ret = subsys_interface_register(&sq_interface);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -401,7 +403,7 @@ out:
 
 static void __exit sq_api_exit(void)
 {
-	sysdev_driver_unregister(&cpu_sysdev_class, &sq_sysdev_driver);
+	subsys_interface_unregister(&sq_interface);
 	kfree(sq_bitmap);
 	kmem_cache_destroy(sq_cache);
 }
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index 7408201d7efb..654e8aad3bbe 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -3,7 +3,7 @@
  * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
  */
 #include <linux/sched.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -16,13 +16,13 @@
 static DEFINE_PER_CPU(struct hv_mmu_statistics, mmu_stats) __attribute__((aligned(64)));
 
 #define SHOW_MMUSTAT_ULONG(NAME) \
-static ssize_t show_##NAME(struct sys_device *dev, \
-			struct sysdev_attribute *attr, char *buf) \
+static ssize_t show_##NAME(struct device *dev, \
+			struct device_attribute *attr, char *buf) \
 { \
 	struct hv_mmu_statistics *p = &per_cpu(mmu_stats, dev->id); \
 	return sprintf(buf, "%lu\n", p->NAME); \
 } \
-static SYSDEV_ATTR(NAME, 0444, show_##NAME, NULL)
+static DEVICE_ATTR(NAME, 0444, show_##NAME, NULL)
 
 SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_8k_tte);
 SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_8k_tte);
@@ -58,38 +58,38 @@ SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_256mb_tte);
 SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_256mb_tte);
 
 static struct attribute *mmu_stat_attrs[] = {
-	&attr_immu_tsb_hits_ctx0_8k_tte.attr,
-	&attr_immu_tsb_ticks_ctx0_8k_tte.attr,
-	&attr_immu_tsb_hits_ctx0_64k_tte.attr,
-	&attr_immu_tsb_ticks_ctx0_64k_tte.attr,
-	&attr_immu_tsb_hits_ctx0_4mb_tte.attr,
-	&attr_immu_tsb_ticks_ctx0_4mb_tte.attr,
-	&attr_immu_tsb_hits_ctx0_256mb_tte.attr,
-	&attr_immu_tsb_ticks_ctx0_256mb_tte.attr,
-	&attr_immu_tsb_hits_ctxnon0_8k_tte.attr,
-	&attr_immu_tsb_ticks_ctxnon0_8k_tte.attr,
-	&attr_immu_tsb_hits_ctxnon0_64k_tte.attr,
-	&attr_immu_tsb_ticks_ctxnon0_64k_tte.attr,
-	&attr_immu_tsb_hits_ctxnon0_4mb_tte.attr,
-	&attr_immu_tsb_ticks_ctxnon0_4mb_tte.attr,
-	&attr_immu_tsb_hits_ctxnon0_256mb_tte.attr,
-	&attr_immu_tsb_ticks_ctxnon0_256mb_tte.attr,
-	&attr_dmmu_tsb_hits_ctx0_8k_tte.attr,
-	&attr_dmmu_tsb_ticks_ctx0_8k_tte.attr,
-	&attr_dmmu_tsb_hits_ctx0_64k_tte.attr,
-	&attr_dmmu_tsb_ticks_ctx0_64k_tte.attr,
-	&attr_dmmu_tsb_hits_ctx0_4mb_tte.attr,
-	&attr_dmmu_tsb_ticks_ctx0_4mb_tte.attr,
-	&attr_dmmu_tsb_hits_ctx0_256mb_tte.attr,
-	&attr_dmmu_tsb_ticks_ctx0_256mb_tte.attr,
-	&attr_dmmu_tsb_hits_ctxnon0_8k_tte.attr,
-	&attr_dmmu_tsb_ticks_ctxnon0_8k_tte.attr,
-	&attr_dmmu_tsb_hits_ctxnon0_64k_tte.attr,
-	&attr_dmmu_tsb_ticks_ctxnon0_64k_tte.attr,
-	&attr_dmmu_tsb_hits_ctxnon0_4mb_tte.attr,
-	&attr_dmmu_tsb_ticks_ctxnon0_4mb_tte.attr,
-	&attr_dmmu_tsb_hits_ctxnon0_256mb_tte.attr,
-	&attr_dmmu_tsb_ticks_ctxnon0_256mb_tte.attr,
+	&dev_attr_immu_tsb_hits_ctx0_8k_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctx0_8k_tte.attr,
+	&dev_attr_immu_tsb_hits_ctx0_64k_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctx0_64k_tte.attr,
+	&dev_attr_immu_tsb_hits_ctx0_4mb_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctx0_4mb_tte.attr,
+	&dev_attr_immu_tsb_hits_ctx0_256mb_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctx0_256mb_tte.attr,
+	&dev_attr_immu_tsb_hits_ctxnon0_8k_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctxnon0_8k_tte.attr,
+	&dev_attr_immu_tsb_hits_ctxnon0_64k_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctxnon0_64k_tte.attr,
+	&dev_attr_immu_tsb_hits_ctxnon0_4mb_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctxnon0_4mb_tte.attr,
+	&dev_attr_immu_tsb_hits_ctxnon0_256mb_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctxnon0_256mb_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctx0_8k_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctx0_8k_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctx0_64k_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctx0_64k_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctx0_4mb_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctx0_4mb_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctx0_256mb_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctx0_256mb_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctxnon0_8k_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctxnon0_8k_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctxnon0_64k_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctxnon0_64k_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctxnon0_4mb_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctxnon0_4mb_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctxnon0_256mb_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctxnon0_256mb_tte.attr,
 	NULL,
 };
 
@@ -139,15 +139,15 @@ static unsigned long write_mmustat_enable(unsigned long val)
 	return sun4v_mmustat_conf(ra, &orig_ra);
 }
 
-static ssize_t show_mmustat_enable(struct sys_device *s,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_mmustat_enable(struct device *s,
+				struct device_attribute *attr, char *buf)
 {
 	unsigned long val = run_on_cpu(s->id, read_mmustat_enable, 0);
 	return sprintf(buf, "%lx\n", val);
 }
 
-static ssize_t store_mmustat_enable(struct sys_device *s,
-			struct sysdev_attribute *attr, const char *buf,
+static ssize_t store_mmustat_enable(struct device *s,
+			struct device_attribute *attr, const char *buf,
 			size_t count)
 {
 	unsigned long val, err;
@@ -163,39 +163,39 @@ static ssize_t store_mmustat_enable(struct sys_device *s,
 	return count;
 }
 
-static SYSDEV_ATTR(mmustat_enable, 0644, show_mmustat_enable, store_mmustat_enable);
+static DEVICE_ATTR(mmustat_enable, 0644, show_mmustat_enable, store_mmustat_enable);
 
 static int mmu_stats_supported;
 
-static int register_mmu_stats(struct sys_device *s)
+static int register_mmu_stats(struct device *s)
 {
 	if (!mmu_stats_supported)
 		return 0;
-	sysdev_create_file(s, &attr_mmustat_enable);
+	device_create_file(s, &dev_attr_mmustat_enable);
 	return sysfs_create_group(&s->kobj, &mmu_stat_group);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void unregister_mmu_stats(struct sys_device *s)
+static void unregister_mmu_stats(struct device *s)
 {
 	if (!mmu_stats_supported)
 		return;
 	sysfs_remove_group(&s->kobj, &mmu_stat_group);
-	sysdev_remove_file(s, &attr_mmustat_enable);
+	device_remove_file(s, &dev_attr_mmustat_enable);
 }
 #endif
 
 #define SHOW_CPUDATA_ULONG_NAME(NAME, MEMBER) \
-static ssize_t show_##NAME(struct sys_device *dev, \
-		struct sysdev_attribute *attr, char *buf) \
+static ssize_t show_##NAME(struct device *dev, \
+		struct device_attribute *attr, char *buf) \
 { \
 	cpuinfo_sparc *c = &cpu_data(dev->id); \
 	return sprintf(buf, "%lu\n", c->MEMBER); \
 }
 
 #define SHOW_CPUDATA_UINT_NAME(NAME, MEMBER) \
-static ssize_t show_##NAME(struct sys_device *dev, \
-		struct sysdev_attribute *attr, char *buf) \
+static ssize_t show_##NAME(struct device *dev, \
+		struct device_attribute *attr, char *buf) \
 { \
 	cpuinfo_sparc *c = &cpu_data(dev->id); \
 	return sprintf(buf, "%u\n", c->MEMBER); \
@@ -209,14 +209,14 @@ SHOW_CPUDATA_UINT_NAME(l1_icache_line_size, icache_line_size);
 SHOW_CPUDATA_UINT_NAME(l2_cache_size, ecache_size);
 SHOW_CPUDATA_UINT_NAME(l2_cache_line_size, ecache_line_size);
 
-static struct sysdev_attribute cpu_core_attrs[] = {
-	_SYSDEV_ATTR(clock_tick,          0444, show_clock_tick, NULL),
-	_SYSDEV_ATTR(l1_dcache_size,      0444, show_l1_dcache_size, NULL),
-	_SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL),
-	_SYSDEV_ATTR(l1_icache_size,      0444, show_l1_icache_size, NULL),
-	_SYSDEV_ATTR(l1_icache_line_size, 0444, show_l1_icache_line_size, NULL),
-	_SYSDEV_ATTR(l2_cache_size,       0444, show_l2_cache_size, NULL),
-	_SYSDEV_ATTR(l2_cache_line_size,  0444, show_l2_cache_line_size, NULL),
+static struct device_attribute cpu_core_attrs[] = {
+	__ATTR(clock_tick,          0444, show_clock_tick, NULL),
+	__ATTR(l1_dcache_size,      0444, show_l1_dcache_size, NULL),
+	__ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL),
+	__ATTR(l1_icache_size,      0444, show_l1_icache_size, NULL),
+	__ATTR(l1_icache_line_size, 0444, show_l1_icache_line_size, NULL),
+	__ATTR(l2_cache_size,       0444, show_l2_cache_size, NULL),
+	__ATTR(l2_cache_line_size,  0444, show_l2_cache_line_size, NULL),
 };
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
@@ -224,11 +224,11 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices);
 static void register_cpu_online(unsigned int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
+	struct device *s = &c->dev;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(cpu_core_attrs); i++)
-		sysdev_create_file(s, &cpu_core_attrs[i]);
+		device_create_file(s, &cpu_core_attrs[i]);
 
 	register_mmu_stats(s);
 }
@@ -237,12 +237,12 @@ static void register_cpu_online(unsigned int cpu)
 static void unregister_cpu_online(unsigned int cpu)
 {
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
+	struct device *s = &c->dev;
 	int i;
 
 	unregister_mmu_stats(s);
 	for (i = 0; i < ARRAY_SIZE(cpu_core_attrs); i++)
-		sysdev_remove_file(s, &cpu_core_attrs[i]);
+		device_remove_file(s, &cpu_core_attrs[i]);
 }
 #endif
 
diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c
index b671a86f4515..e7ce2a5161b8 100644
--- a/arch/tile/kernel/sysfs.c
+++ b/arch/tile/kernel/sysfs.c
@@ -14,7 +14,7 @@
  * /sys entry support.
  */
 
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
@@ -31,55 +31,55 @@ static ssize_t get_hv_confstr(char *page, int query)
 	return n;
 }
 
-static ssize_t chip_width_show(struct sysdev_class *dev,
-			       struct sysdev_class_attribute *attr,
+static ssize_t chip_width_show(struct device *dev,
+			       struct device_attribute *attr,
 			       char *page)
 {
 	return sprintf(page, "%u\n", smp_width);
 }
-static SYSDEV_CLASS_ATTR(chip_width, 0444, chip_width_show, NULL);
+static DEVICE_ATTR(chip_width, 0444, chip_width_show, NULL);
 
-static ssize_t chip_height_show(struct sysdev_class *dev,
-				struct sysdev_class_attribute *attr,
+static ssize_t chip_height_show(struct device *dev,
+				struct device_attribute *attr,
 				char *page)
 {
 	return sprintf(page, "%u\n", smp_height);
 }
-static SYSDEV_CLASS_ATTR(chip_height, 0444, chip_height_show, NULL);
+static DEVICE_ATTR(chip_height, 0444, chip_height_show, NULL);
 
-static ssize_t chip_serial_show(struct sysdev_class *dev,
-				struct sysdev_class_attribute *attr,
+static ssize_t chip_serial_show(struct device *dev,
+				struct device_attribute *attr,
 				char *page)
 {
 	return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM);
 }
-static SYSDEV_CLASS_ATTR(chip_serial, 0444, chip_serial_show, NULL);
+static DEVICE_ATTR(chip_serial, 0444, chip_serial_show, NULL);
 
-static ssize_t chip_revision_show(struct sysdev_class *dev,
-				  struct sysdev_class_attribute *attr,
+static ssize_t chip_revision_show(struct device *dev,
+				  struct device_attribute *attr,
 				  char *page)
 {
 	return get_hv_confstr(page, HV_CONFSTR_CHIP_REV);
 }
-static SYSDEV_CLASS_ATTR(chip_revision, 0444, chip_revision_show, NULL);
+static DEVICE_ATTR(chip_revision, 0444, chip_revision_show, NULL);
 
 
-static ssize_t type_show(struct sysdev_class *dev,
-			    struct sysdev_class_attribute *attr,
+static ssize_t type_show(struct device *dev,
+			    struct device_attribute *attr,
 			    char *page)
 {
 	return sprintf(page, "tilera\n");
 }
-static SYSDEV_CLASS_ATTR(type, 0444, type_show, NULL);
+static DEVICE_ATTR(type, 0444, type_show, NULL);
 
 #define HV_CONF_ATTR(name, conf)					\
-	static ssize_t name ## _show(struct sysdev_class *dev,		\
-				     struct sysdev_class_attribute *attr, \
+	static ssize_t name ## _show(struct device *dev,		\
+				     struct device_attribute *attr, \
 				     char *page)			\
 	{								\
 		return get_hv_confstr(page, conf);			\
 	}								\
-	static SYSDEV_CLASS_ATTR(name, 0444, name ## _show, NULL);
+	static DEVICE_ATTR(name, 0444, name ## _show, NULL);
 
 HV_CONF_ATTR(version,		HV_CONFSTR_HV_SW_VER)
 HV_CONF_ATTR(config_version,	HV_CONFSTR_HV_CONFIG_VER)
@@ -95,15 +95,15 @@ HV_CONF_ATTR(mezz_description,	HV_CONFSTR_MEZZ_DESC)
 HV_CONF_ATTR(switch_control,	HV_CONFSTR_SWITCH_CONTROL)
 
 static struct attribute *board_attrs[] = {
-	&attr_board_part.attr,
-	&attr_board_serial.attr,
-	&attr_board_revision.attr,
-	&attr_board_description.attr,
-	&attr_mezz_part.attr,
-	&attr_mezz_serial.attr,
-	&attr_mezz_revision.attr,
-	&attr_mezz_description.attr,
-	&attr_switch_control.attr,
+	&dev_attr_board_part.attr,
+	&dev_attr_board_serial.attr,
+	&dev_attr_board_revision.attr,
+	&dev_attr_board_description.attr,
+	&dev_attr_mezz_part.attr,
+	&dev_attr_mezz_serial.attr,
+	&dev_attr_mezz_revision.attr,
+	&dev_attr_mezz_description.attr,
+	&dev_attr_switch_control.attr,
 	NULL
 };
 
@@ -150,12 +150,11 @@ hvconfig_bin_read(struct file *filp, struct kobject *kobj,
 
 static int __init create_sysfs_entries(void)
 {
-	struct sysdev_class *cls = &cpu_sysdev_class;
 	int err = 0;
 
 #define create_cpu_attr(name)						\
 	if (!err)							\
-		err = sysfs_create_file(&cls->kset.kobj, &attr_##name.attr);
+		err = device_create_file(cpu_subsys.dev_root, &dev_attr_##name);
 	create_cpu_attr(chip_width);
 	create_cpu_attr(chip_height);
 	create_cpu_attr(chip_serial);
@@ -163,7 +162,7 @@ static int __init create_sysfs_entries(void)
 
 #define create_hv_attr(name)						\
 	if (!err)							\
-		err = sysfs_create_file(hypervisor_kobj, &attr_##name.attr);
+		err = sysfs_create_file(hypervisor_kobj, &dev_attr_##name);
 	create_hv_attr(type);
 	create_hv_attr(version);
 	create_hv_attr(config_version);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c9321f34e55b..0b05fb49c560 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -149,7 +149,7 @@ static inline void enable_p5_mce(void) {}
 
 void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct sys_device, mce_sysdev);
+DECLARE_PER_CPU(struct device, mce_device);
 
 /*
  * Maximum banks number.
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index a3b0811693c9..6b45e5e7a901 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -844,8 +844,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
 
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
-
-extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
+#include <linux/cpu.h>
 
 /* pointer to kobject for cpuX/cache */
 static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
@@ -1073,9 +1072,9 @@ err_out:
 static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
 
 /* Add/Remove cache interface for CPU device */
-static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+static int __cpuinit cache_add_dev(struct device *dev)
 {
-	unsigned int cpu = sys_dev->id;
+	unsigned int cpu = dev->id;
 	unsigned long i, j;
 	struct _index_kobject *this_object;
 	struct _cpuid4_info   *this_leaf;
@@ -1087,7 +1086,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 
 	retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
 				      &ktype_percpu_entry,
-				      &sys_dev->kobj, "%s", "cache");
+				      &dev->kobj, "%s", "cache");
 	if (retval < 0) {
 		cpuid4_cache_sysfs_exit(cpu);
 		return retval;
@@ -1124,9 +1123,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 	return 0;
 }
 
-static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
+static void __cpuinit cache_remove_dev(struct device *dev)
 {
-	unsigned int cpu = sys_dev->id;
+	unsigned int cpu = dev->id;
 	unsigned long i;
 
 	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
@@ -1145,17 +1144,17 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct sys_device *sys_dev;
+	struct device *dev;
 
-	sys_dev = get_cpu_sysdev(cpu);
+	dev = get_cpu_device(cpu);
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		cache_add_dev(sys_dev);
+		cache_add_dev(dev);
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-		cache_remove_dev(sys_dev);
+		cache_remove_dev(dev);
 		break;
 	}
 	return NOTIFY_OK;
@@ -1174,9 +1173,9 @@ static int __cpuinit cache_sysfs_init(void)
 
 	for_each_online_cpu(i) {
 		int err;
-		struct sys_device *sys_dev = get_cpu_sysdev(i);
+		struct device *dev = get_cpu_device(i);
 
-		err = cache_add_dev(sys_dev);
+		err = cache_add_dev(dev);
 		if (err)
 			return err;
 	}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index fefcc69ee8b5..ed44c8a65858 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,4 +1,4 @@
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <asm/mce.h>
 
 enum severity_level {
@@ -17,7 +17,7 @@ enum severity_level {
 struct mce_bank {
 	u64			ctl;			/* subevents to enable */
 	unsigned char init;				/* initialise bank? */
-	struct sysdev_attribute attr;			/* sysdev attribute */
+	struct device_attribute attr;			/* device attribute */
 	char			attrname[ATTR_LEN];	/* attribute name */
 };
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 362056aefeb4..0156c6f85d7b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -19,7 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/percpu.h>
 #include <linux/string.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/syscore_ops.h>
 #include <linux/delay.h>
 #include <linux/ctype.h>
@@ -1751,7 +1751,7 @@ static struct syscore_ops mce_syscore_ops = {
 };
 
 /*
- * mce_sysdev: Sysfs support
+ * mce_device: Sysfs support
  */
 
 static void mce_cpu_restart(void *data)
@@ -1787,27 +1787,28 @@ static void mce_enable_ce(void *all)
 		__mcheck_cpu_init_timer();
 }
 
-static struct sysdev_class mce_sysdev_class = {
+static struct bus_type mce_subsys = {
 	.name		= "machinecheck",
+	.dev_name	= "machinecheck",
 };
 
-DEFINE_PER_CPU(struct sys_device, mce_sysdev);
+DEFINE_PER_CPU(struct device, mce_device);
 
 __cpuinitdata
 void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 
-static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
+static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
 {
 	return container_of(attr, struct mce_bank, attr);
 }
 
-static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
+static ssize_t show_bank(struct device *s, struct device_attribute *attr,
 			 char *buf)
 {
 	return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
 }
 
-static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
+static ssize_t set_bank(struct device *s, struct device_attribute *attr,
 			const char *buf, size_t size)
 {
 	u64 new;
@@ -1822,14 +1823,14 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
 }
 
 static ssize_t
-show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
+show_trigger(struct device *s, struct device_attribute *attr, char *buf)
 {
 	strcpy(buf, mce_helper);
 	strcat(buf, "\n");
 	return strlen(mce_helper) + 1;
 }
 
-static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
 				const char *buf, size_t siz)
 {
 	char *p;
@@ -1844,8 +1845,8 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
 	return strlen(mce_helper) + !!p;
 }
 
-static ssize_t set_ignore_ce(struct sys_device *s,
-			     struct sysdev_attribute *attr,
+static ssize_t set_ignore_ce(struct device *s,
+			     struct device_attribute *attr,
 			     const char *buf, size_t size)
 {
 	u64 new;
@@ -1868,8 +1869,8 @@ static ssize_t set_ignore_ce(struct sys_device *s,
 	return size;
 }
 
-static ssize_t set_cmci_disabled(struct sys_device *s,
-				 struct sysdev_attribute *attr,
+static ssize_t set_cmci_disabled(struct device *s,
+				 struct device_attribute *attr,
 				 const char *buf, size_t size)
 {
 	u64 new;
@@ -1891,108 +1892,107 @@ static ssize_t set_cmci_disabled(struct sys_device *s,
 	return size;
 }
 
-static ssize_t store_int_with_restart(struct sys_device *s,
-				      struct sysdev_attribute *attr,
+static ssize_t store_int_with_restart(struct device *s,
+				      struct device_attribute *attr,
 				      const char *buf, size_t size)
 {
-	ssize_t ret = sysdev_store_int(s, attr, buf, size);
+	ssize_t ret = device_store_int(s, attr, buf, size);
 	mce_restart();
 	return ret;
 }
 
-static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
-static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
-static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
-static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
+static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
+static DEVICE_INT_ATTR(tolerant, 0644, tolerant);
+static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
+static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
 
-static struct sysdev_ext_attribute attr_check_interval = {
-	_SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
-		     store_int_with_restart),
+static struct dev_ext_attribute dev_attr_check_interval = {
+	__ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
 	&check_interval
 };
 
-static struct sysdev_ext_attribute attr_ignore_ce = {
-	_SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce),
+static struct dev_ext_attribute dev_attr_ignore_ce = {
+	__ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce),
 	&mce_ignore_ce
 };
 
-static struct sysdev_ext_attribute attr_cmci_disabled = {
-	_SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled),
+static struct dev_ext_attribute dev_attr_cmci_disabled = {
+	__ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled),
 	&mce_cmci_disabled
 };
 
-static struct sysdev_attribute *mce_sysdev_attrs[] = {
-	&attr_tolerant.attr,
-	&attr_check_interval.attr,
-	&attr_trigger,
-	&attr_monarch_timeout.attr,
-	&attr_dont_log_ce.attr,
-	&attr_ignore_ce.attr,
-	&attr_cmci_disabled.attr,
+static struct device_attribute *mce_device_attrs[] = {
+	&dev_attr_tolerant.attr,
+	&dev_attr_check_interval.attr,
+	&dev_attr_trigger,
+	&dev_attr_monarch_timeout.attr,
+	&dev_attr_dont_log_ce.attr,
+	&dev_attr_ignore_ce.attr,
+	&dev_attr_cmci_disabled.attr,
 	NULL
 };
 
-static cpumask_var_t mce_sysdev_initialized;
+static cpumask_var_t mce_device_initialized;
 
-/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
-static __cpuinit int mce_sysdev_create(unsigned int cpu)
+/* Per cpu device init. All of the cpus still share the same ctrl bank: */
+static __cpuinit int mce_device_create(unsigned int cpu)
 {
-	struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
+	struct device *dev = &per_cpu(mce_device, cpu);
 	int err;
 	int i, j;
 
 	if (!mce_available(&boot_cpu_data))
 		return -EIO;
 
-	memset(&sysdev->kobj, 0, sizeof(struct kobject));
-	sysdev->id  = cpu;
-	sysdev->cls = &mce_sysdev_class;
+	memset(&dev->kobj, 0, sizeof(struct kobject));
+	dev->id  = cpu;
+	dev->bus = &mce_subsys;
 
-	err = sysdev_register(sysdev);
+	err = device_register(dev);
 	if (err)
 		return err;
 
-	for (i = 0; mce_sysdev_attrs[i]; i++) {
-		err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]);
+	for (i = 0; mce_device_attrs[i]; i++) {
+		err = device_create_file(dev, mce_device_attrs[i]);
 		if (err)
 			goto error;
 	}
 	for (j = 0; j < banks; j++) {
-		err = sysdev_create_file(sysdev, &mce_banks[j].attr);
+		err = device_create_file(dev, &mce_banks[j].attr);
 		if (err)
 			goto error2;
 	}
-	cpumask_set_cpu(cpu, mce_sysdev_initialized);
+	cpumask_set_cpu(cpu, mce_device_initialized);
 
 	return 0;
 error2:
 	while (--j >= 0)
-		sysdev_remove_file(sysdev, &mce_banks[j].attr);
+		device_remove_file(dev, &mce_banks[j].attr);
 error:
 	while (--i >= 0)
-		sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);
+		device_remove_file(dev, mce_device_attrs[i]);
 
-	sysdev_unregister(sysdev);
+	device_unregister(dev);
 
 	return err;
 }
 
-static __cpuinit void mce_sysdev_remove(unsigned int cpu)
+static __cpuinit void mce_device_remove(unsigned int cpu)
 {
-	struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
+	struct device *dev = &per_cpu(mce_device, cpu);
 	int i;
 
-	if (!cpumask_test_cpu(cpu, mce_sysdev_initialized))
+	if (!cpumask_test_cpu(cpu, mce_device_initialized))
 		return;
 
-	for (i = 0; mce_sysdev_attrs[i]; i++)
-		sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);
+	for (i = 0; mce_device_attrs[i]; i++)
+		device_remove_file(dev, mce_device_attrs[i]);
 
 	for (i = 0; i < banks; i++)
-		sysdev_remove_file(sysdev, &mce_banks[i].attr);
+		device_remove_file(dev, &mce_banks[i].attr);
 
-	sysdev_unregister(sysdev);
-	cpumask_clear_cpu(cpu, mce_sysdev_initialized);
+	device_unregister(dev);
+	cpumask_clear_cpu(cpu, mce_device_initialized);
 }
 
 /* Make sure there are no machine checks on offlined CPUs. */
@@ -2042,7 +2042,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		mce_sysdev_create(cpu);
+		mce_device_create(cpu);
 		if (threshold_cpu_callback)
 			threshold_cpu_callback(action, cpu);
 		break;
@@ -2050,7 +2050,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_DEAD_FROZEN:
 		if (threshold_cpu_callback)
 			threshold_cpu_callback(action, cpu);
-		mce_sysdev_remove(cpu);
+		mce_device_remove(cpu);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
@@ -2084,7 +2084,7 @@ static __init void mce_init_banks(void)
 
 	for (i = 0; i < banks; i++) {
 		struct mce_bank *b = &mce_banks[i];
-		struct sysdev_attribute *a = &b->attr;
+		struct device_attribute *a = &b->attr;
 
 		sysfs_attr_init(&a->attr);
 		a->attr.name	= b->attrname;
@@ -2104,16 +2104,16 @@ static __init int mcheck_init_device(void)
 	if (!mce_available(&boot_cpu_data))
 		return -EIO;
 
-	zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL);
+	zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
 
 	mce_init_banks();
 
-	err = sysdev_class_register(&mce_sysdev_class);
+	err = subsys_system_register(&mce_subsys, NULL);
 	if (err)
 		return err;
 
 	for_each_online_cpu(i) {
-		err = mce_sysdev_create(i);
+		err = mce_device_create(i);
 		if (err)
 			return err;
 	}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f5474218cffe..56d2aa1acd55 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -17,7 +17,6 @@
 #include <linux/notifier.h>
 #include <linux/kobject.h>
 #include <linux/percpu.h>
-#include <linux/sysdev.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sysfs.h>
@@ -548,7 +547,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (!b)
 			goto out;
 
-		err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj,
+		err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj,
 					b->kobj, name);
 		if (err)
 			goto out;
@@ -571,7 +570,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		goto out;
 	}
 
-	b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj);
+	b->kobj = kobject_create_and_add(name, &per_cpu(mce_device, cpu).kobj);
 	if (!b->kobj)
 		goto out_free;
 
@@ -591,7 +590,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (i == cpu)
 			continue;
 
-		err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj,
+		err = sysfs_create_link(&per_cpu(mce_device, i).kobj,
 					b->kobj, name);
 		if (err)
 			goto out;
@@ -669,7 +668,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 #ifdef CONFIG_SMP
 	/* sibling symlink */
 	if (shared_bank[bank] && b->blocks->cpu != cpu) {
-		sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name);
+		sysfs_remove_link(&per_cpu(mce_device, cpu).kobj, name);
 		per_cpu(threshold_banks, cpu)[bank] = NULL;
 
 		return;
@@ -681,7 +680,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 		if (i == cpu)
 			continue;
 
-		sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name);
+		sysfs_remove_link(&per_cpu(mce_device, i).kobj, name);
 		per_cpu(threshold_banks, i)[bank] = NULL;
 	}
 
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 787e06c84ea6..59e3f6ed265f 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -19,7 +19,6 @@
 #include <linux/kernel.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
-#include <linux/sysdev.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/smp.h>
@@ -69,16 +68,16 @@ static atomic_t therm_throt_en	= ATOMIC_INIT(0);
 static u32 lvtthmr_init __read_mostly;
 
 #ifdef CONFIG_SYSFS
-#define define_therm_throt_sysdev_one_ro(_name)				\
-	static SYSDEV_ATTR(_name, 0444,					\
-			   therm_throt_sysdev_show_##_name,		\
+#define define_therm_throt_device_one_ro(_name)				\
+	static DEVICE_ATTR(_name, 0444,					\
+			   therm_throt_device_show_##_name,		\
 				   NULL)				\
 
-#define define_therm_throt_sysdev_show_func(event, name)		\
+#define define_therm_throt_device_show_func(event, name)		\
 									\
-static ssize_t therm_throt_sysdev_show_##event##_##name(		\
-			struct sys_device *dev,				\
-			struct sysdev_attribute *attr,			\
+static ssize_t therm_throt_device_show_##event##_##name(		\
+			struct device *dev,				\
+			struct device_attribute *attr,			\
 			char *buf)					\
 {									\
 	unsigned int cpu = dev->id;					\
@@ -95,20 +94,20 @@ static ssize_t therm_throt_sysdev_show_##event##_##name(		\
 	return ret;							\
 }
 
-define_therm_throt_sysdev_show_func(core_throttle, count);
-define_therm_throt_sysdev_one_ro(core_throttle_count);
+define_therm_throt_device_show_func(core_throttle, count);
+define_therm_throt_device_one_ro(core_throttle_count);
 
-define_therm_throt_sysdev_show_func(core_power_limit, count);
-define_therm_throt_sysdev_one_ro(core_power_limit_count);
+define_therm_throt_device_show_func(core_power_limit, count);
+define_therm_throt_device_one_ro(core_power_limit_count);
 
-define_therm_throt_sysdev_show_func(package_throttle, count);
-define_therm_throt_sysdev_one_ro(package_throttle_count);
+define_therm_throt_device_show_func(package_throttle, count);
+define_therm_throt_device_one_ro(package_throttle_count);
 
-define_therm_throt_sysdev_show_func(package_power_limit, count);
-define_therm_throt_sysdev_one_ro(package_power_limit_count);
+define_therm_throt_device_show_func(package_power_limit, count);
+define_therm_throt_device_one_ro(package_power_limit_count);
 
 static struct attribute *thermal_throttle_attrs[] = {
-	&attr_core_throttle_count.attr,
+	&dev_attr_core_throttle_count.attr,
 	NULL
 };
 
@@ -223,36 +222,36 @@ static int thresh_event_valid(int event)
 
 #ifdef CONFIG_SYSFS
 /* Add/Remove thermal_throttle interface for CPU device: */
-static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
+static __cpuinit int thermal_throttle_add_dev(struct device *dev,
 				unsigned int cpu)
 {
 	int err;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group);
+	err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
 	if (err)
 		return err;
 
 	if (cpu_has(c, X86_FEATURE_PLN))
-		err = sysfs_add_file_to_group(&sys_dev->kobj,
-					      &attr_core_power_limit_count.attr,
+		err = sysfs_add_file_to_group(&dev->kobj,
+					      &dev_attr_core_power_limit_count.attr,
 					      thermal_attr_group.name);
 	if (cpu_has(c, X86_FEATURE_PTS)) {
-		err = sysfs_add_file_to_group(&sys_dev->kobj,
-					      &attr_package_throttle_count.attr,
+		err = sysfs_add_file_to_group(&dev->kobj,
+					      &dev_attr_package_throttle_count.attr,
 					      thermal_attr_group.name);
 		if (cpu_has(c, X86_FEATURE_PLN))
-			err = sysfs_add_file_to_group(&sys_dev->kobj,
-					&attr_package_power_limit_count.attr,
+			err = sysfs_add_file_to_group(&dev->kobj,
+					&dev_attr_package_power_limit_count.attr,
 					thermal_attr_group.name);
 	}
 
 	return err;
 }
 
-static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
+static __cpuinit void thermal_throttle_remove_dev(struct device *dev)
 {
-	sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group);
+	sysfs_remove_group(&dev->kobj, &thermal_attr_group);
 }
 
 /* Mutex protecting device creation against CPU hotplug: */
@@ -265,16 +264,16 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
 			      void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct sys_device *sys_dev;
+	struct device *dev;
 	int err = 0;
 
-	sys_dev = get_cpu_sysdev(cpu);
+	dev = get_cpu_device(cpu);
 
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
 		mutex_lock(&therm_cpu_lock);
-		err = thermal_throttle_add_dev(sys_dev, cpu);
+		err = thermal_throttle_add_dev(dev, cpu);
 		mutex_unlock(&therm_cpu_lock);
 		WARN_ON(err);
 		break;
@@ -283,7 +282,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		mutex_lock(&therm_cpu_lock);
-		thermal_throttle_remove_dev(sys_dev);
+		thermal_throttle_remove_dev(dev);
 		mutex_unlock(&therm_cpu_lock);
 		break;
 	}
@@ -310,7 +309,7 @@ static __init int thermal_throttle_init_device(void)
 #endif
 	/* connect live CPUs to sysfs */
 	for_each_online_cpu(cpu) {
-		err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu);
+		err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
 		WARN_ON(err);
 	}
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index f2d2a664e797..cf88f2a16473 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -292,8 +292,8 @@ static int reload_for_cpu(int cpu)
 	return err;
 }
 
-static ssize_t reload_store(struct sys_device *dev,
-			    struct sysdev_attribute *attr,
+static ssize_t reload_store(struct device *dev,
+			    struct device_attribute *attr,
 			    const char *buf, size_t size)
 {
 	unsigned long val;
@@ -318,30 +318,30 @@ static ssize_t reload_store(struct sys_device *dev,
 	return ret;
 }
 
-static ssize_t version_show(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t version_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
 
 	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
 }
 
-static ssize_t pf_show(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t pf_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
 
 	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
 }
 
-static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
-static SYSDEV_ATTR(version, 0400, version_show, NULL);
-static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
+static DEVICE_ATTR(reload, 0200, NULL, reload_store);
+static DEVICE_ATTR(version, 0400, version_show, NULL);
+static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
 
 static struct attribute *mc_default_attrs[] = {
-	&attr_reload.attr,
-	&attr_version.attr,
-	&attr_processor_flags.attr,
+	&dev_attr_reload.attr,
+	&dev_attr_version.attr,
+	&dev_attr_processor_flags.attr,
 	NULL
 };
 
@@ -405,43 +405,45 @@ static enum ucode_state microcode_update_cpu(int cpu)
 	return ustate;
 }
 
-static int mc_sysdev_add(struct sys_device *sys_dev)
+static int mc_device_add(struct device *dev, struct subsys_interface *sif)
 {
-	int err, cpu = sys_dev->id;
+	int err, cpu = dev->id;
 
 	if (!cpu_online(cpu))
 		return 0;
 
 	pr_debug("CPU%d added\n", cpu);
 
-	err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
+	err = sysfs_create_group(&dev->kobj, &mc_attr_group);
 	if (err)
 		return err;
 
 	if (microcode_init_cpu(cpu) == UCODE_ERROR) {
-		sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+		sysfs_remove_group(&dev->kobj, &mc_attr_group);
 		return -EINVAL;
 	}
 
 	return err;
 }
 
-static int mc_sysdev_remove(struct sys_device *sys_dev)
+static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
 {
-	int cpu = sys_dev->id;
+	int cpu = dev->id;
 
 	if (!cpu_online(cpu))
 		return 0;
 
 	pr_debug("CPU%d removed\n", cpu);
 	microcode_fini_cpu(cpu);
-	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+	sysfs_remove_group(&dev->kobj, &mc_attr_group);
 	return 0;
 }
 
-static struct sysdev_driver mc_sysdev_driver = {
-	.add			= mc_sysdev_add,
-	.remove			= mc_sysdev_remove,
+static struct subsys_interface mc_cpu_interface = {
+	.name			= "microcode",
+	.subsys			= &cpu_subsys,
+	.add_dev		= mc_device_add,
+	.remove_dev		= mc_device_remove,
 };
 
 /**
@@ -464,9 +466,9 @@ static __cpuinit int
 mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct sys_device *sys_dev;
+	struct device *dev;
 
-	sys_dev = get_cpu_sysdev(cpu);
+	dev = get_cpu_device(cpu);
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
@@ -474,13 +476,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
 		pr_debug("CPU%d added\n", cpu);
-		if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
+		if (sysfs_create_group(&dev->kobj, &mc_attr_group))
 			pr_err("Failed to create group for CPU%d\n", cpu);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		/* Suspend is in progress, only remove the interface */
-		sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+		sysfs_remove_group(&dev->kobj, &mc_attr_group);
 		pr_debug("CPU%d removed\n", cpu);
 		break;
 
@@ -527,7 +529,7 @@ static int __init microcode_init(void)
 	get_online_cpus();
 	mutex_lock(&microcode_mutex);
 
-	error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
+	error = subsys_interface_register(&mc_cpu_interface);
 
 	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
@@ -561,7 +563,7 @@ static void __exit microcode_exit(void)
 	get_online_cpus();
 	mutex_lock(&microcode_mutex);
 
-	sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
+	subsys_interface_unregister(&mc_cpu_interface);
 
 	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 9d7bc9f6b6cc..20a68ca386de 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -446,7 +446,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device)
 {
 	struct acpi_processor *pr = NULL;
 	int result = 0;
-	struct sys_device *sysdev;
+	struct device *dev;
 
 	pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
 	if (!pr)
@@ -491,8 +491,8 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device)
 
 	per_cpu(processors, pr->id) = pr;
 
-	sysdev = get_cpu_sysdev(pr->id);
-	if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
+	dev = get_cpu_device(pr->id);
+	if (sysfs_create_link(&device->dev.kobj, &dev->kobj, "sysdev")) {
 		result = -EFAULT;
 		goto err_free_cpumask;
 	}
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 870550d6a4bf..3b599abf2b40 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -30,7 +30,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
-#include <linux/sysdev.h>
 
 #include <asm/uaccess.h>
 
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 251acea3d359..5bb0298fbcc0 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -1,8 +1,7 @@
 /*
- * drivers/base/cpu.c - basic CPU class support
+ * CPU subsystem support
  */
 
-#include <linux/sysdev.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -14,40 +13,40 @@
 
 #include "base.h"
 
-static struct sysdev_class_attribute *cpu_sysdev_class_attrs[];
-
-struct sysdev_class cpu_sysdev_class = {
+struct bus_type cpu_subsys = {
 	.name = "cpu",
-	.attrs = cpu_sysdev_class_attrs,
+	.dev_name = "cpu",
 };
-EXPORT_SYMBOL(cpu_sysdev_class);
+EXPORT_SYMBOL_GPL(cpu_subsys);
 
-static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
+static DEFINE_PER_CPU(struct device *, cpu_sys_devices);
 
 #ifdef CONFIG_HOTPLUG_CPU
-static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr,
+static ssize_t show_online(struct device *dev,
+			   struct device_attribute *attr,
 			   char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 
-	return sprintf(buf, "%u\n", !!cpu_online(cpu->sysdev.id));
+	return sprintf(buf, "%u\n", !!cpu_online(cpu->dev.id));
 }
 
-static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribute *attr,
-				 const char *buf, size_t count)
+static ssize_t __ref store_online(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 	ssize_t ret;
 
 	cpu_hotplug_driver_lock();
 	switch (buf[0]) {
 	case '0':
-		ret = cpu_down(cpu->sysdev.id);
+		ret = cpu_down(cpu->dev.id);
 		if (!ret)
 			kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
 		break;
 	case '1':
-		ret = cpu_up(cpu->sysdev.id);
+		ret = cpu_up(cpu->dev.id);
 		if (!ret)
 			kobject_uevent(&dev->kobj, KOBJ_ONLINE);
 		break;
@@ -60,44 +59,44 @@ static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribut
 		ret = count;
 	return ret;
 }
-static SYSDEV_ATTR(online, 0644, show_online, store_online);
+static DEVICE_ATTR(online, 0644, show_online, store_online);
 
 static void __cpuinit register_cpu_control(struct cpu *cpu)
 {
-	sysdev_create_file(&cpu->sysdev, &attr_online);
+	device_create_file(&cpu->dev, &dev_attr_online);
 }
 void unregister_cpu(struct cpu *cpu)
 {
-	int logical_cpu = cpu->sysdev.id;
+	int logical_cpu = cpu->dev.id;
 
 	unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu));
 
-	sysdev_remove_file(&cpu->sysdev, &attr_online);
+	device_remove_file(&cpu->dev, &dev_attr_online);
 
-	sysdev_unregister(&cpu->sysdev);
+	device_unregister(&cpu->dev);
 	per_cpu(cpu_sys_devices, logical_cpu) = NULL;
 	return;
 }
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-static ssize_t cpu_probe_store(struct sysdev_class *class,
-			       struct sysdev_class_attribute *attr,
+static ssize_t cpu_probe_store(struct device *dev,
+			       struct device_attribute *attr,
 			       const char *buf,
 			       size_t count)
 {
 	return arch_cpu_probe(buf, count);
 }
 
-static ssize_t cpu_release_store(struct sysdev_class *class,
-				 struct sysdev_class_attribute *attr,
+static ssize_t cpu_release_store(struct device *dev,
+				 struct device_attribute *attr,
 				 const char *buf,
 				 size_t count)
 {
 	return arch_cpu_release(buf, count);
 }
 
-static SYSDEV_CLASS_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
-static SYSDEV_CLASS_ATTR(release, S_IWUSR, NULL, cpu_release_store);
+static DEVICE_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
+static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store);
 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 
 #else /* ... !CONFIG_HOTPLUG_CPU */
@@ -109,15 +108,15 @@ static inline void register_cpu_control(struct cpu *cpu)
 #ifdef CONFIG_KEXEC
 #include <linux/kexec.h>
 
-static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute *attr,
+static ssize_t show_crash_notes(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
 	ssize_t rc;
 	unsigned long long addr;
 	int cpunum;
 
-	cpunum = cpu->sysdev.id;
+	cpunum = cpu->dev.id;
 
 	/*
 	 * Might be reading other cpu's data based on which cpu read thread
@@ -129,7 +128,7 @@ static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute
 	rc = sprintf(buf, "%Lx\n", addr);
 	return rc;
 }
-static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL);
+static DEVICE_ATTR(crash_notes, 0400, show_crash_notes, NULL);
 #endif
 
 /*
@@ -137,12 +136,12 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL);
  */
 
 struct cpu_attr {
-	struct sysdev_class_attribute attr;
+	struct device_attribute attr;
 	const struct cpumask *const * const map;
 };
 
-static ssize_t show_cpus_attr(struct sysdev_class *class,
-			      struct sysdev_class_attribute *attr,
+static ssize_t show_cpus_attr(struct device *dev,
+			      struct device_attribute *attr,
 			      char *buf)
 {
 	struct cpu_attr *ca = container_of(attr, struct cpu_attr, attr);
@@ -153,10 +152,10 @@ static ssize_t show_cpus_attr(struct sysdev_class *class,
 	return n;
 }
 
-#define _CPU_ATTR(name, map)						\
-	{ _SYSDEV_CLASS_ATTR(name, 0444, show_cpus_attr, NULL), map }
+#define _CPU_ATTR(name, map) \
+	{ __ATTR(name, 0444, show_cpus_attr, NULL), map }
 
-/* Keep in sync with cpu_sysdev_class_attrs */
+/* Keep in sync with cpu_subsys_attrs */
 static struct cpu_attr cpu_attrs[] = {
 	_CPU_ATTR(online, &cpu_online_mask),
 	_CPU_ATTR(possible, &cpu_possible_mask),
@@ -166,19 +165,19 @@ static struct cpu_attr cpu_attrs[] = {
 /*
  * Print values for NR_CPUS and offlined cpus
  */
-static ssize_t print_cpus_kernel_max(struct sysdev_class *class,
-				     struct sysdev_class_attribute *attr, char *buf)
+static ssize_t print_cpus_kernel_max(struct device *dev,
+				     struct device_attribute *attr, char *buf)
 {
 	int n = snprintf(buf, PAGE_SIZE-2, "%d\n", NR_CPUS - 1);
 	return n;
 }
-static SYSDEV_CLASS_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
+static DEVICE_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
 
 /* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */
 unsigned int total_cpus;
 
-static ssize_t print_cpus_offline(struct sysdev_class *class,
-				  struct sysdev_class_attribute *attr, char *buf)
+static ssize_t print_cpus_offline(struct device *dev,
+				  struct device_attribute *attr, char *buf)
 {
 	int n = 0, len = PAGE_SIZE-2;
 	cpumask_var_t offline;
@@ -205,7 +204,7 @@ static ssize_t print_cpus_offline(struct sysdev_class *class,
 	n += snprintf(&buf[n], len - n, "\n");
 	return n;
 }
-static SYSDEV_CLASS_ATTR(offline, 0444, print_cpus_offline, NULL);
+static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL);
 
 /*
  * register_cpu - Setup a sysfs device for a CPU.
@@ -218,57 +217,66 @@ static SYSDEV_CLASS_ATTR(offline, 0444, print_cpus_offline, NULL);
 int __cpuinit register_cpu(struct cpu *cpu, int num)
 {
 	int error;
-	cpu->node_id = cpu_to_node(num);
-	cpu->sysdev.id = num;
-	cpu->sysdev.cls = &cpu_sysdev_class;
-
-	error = sysdev_register(&cpu->sysdev);
 
+	cpu->node_id = cpu_to_node(num);
+	cpu->dev.id = num;
+	cpu->dev.bus = &cpu_subsys;
+	error = device_register(&cpu->dev);
 	if (!error && cpu->hotpluggable)
 		register_cpu_control(cpu);
 	if (!error)
-		per_cpu(cpu_sys_devices, num) = &cpu->sysdev;
+		per_cpu(cpu_sys_devices, num) = &cpu->dev;
 	if (!error)
 		register_cpu_under_node(num, cpu_to_node(num));
 
 #ifdef CONFIG_KEXEC
 	if (!error)
-		error = sysdev_create_file(&cpu->sysdev, &attr_crash_notes);
+		error = device_create_file(&cpu->dev, &dev_attr_crash_notes);
 #endif
 	return error;
 }
 
-struct sys_device *get_cpu_sysdev(unsigned cpu)
+struct device *get_cpu_device(unsigned cpu)
 {
 	if (cpu < nr_cpu_ids && cpu_possible(cpu))
 		return per_cpu(cpu_sys_devices, cpu);
 	else
 		return NULL;
 }
-EXPORT_SYMBOL_GPL(get_cpu_sysdev);
+EXPORT_SYMBOL_GPL(get_cpu_device);
+
+static struct attribute *cpu_root_attrs[] = {
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	&dev_attr_probe.attr,
+	&dev_attr_release.attr,
+#endif
+	&cpu_attrs[0].attr.attr,
+	&cpu_attrs[1].attr.attr,
+	&cpu_attrs[2].attr.attr,
+	&dev_attr_kernel_max.attr,
+	&dev_attr_offline.attr,
+	NULL
+};
+
+static struct attribute_group cpu_root_attr_group = {
+	.attrs = cpu_root_attrs,
+};
+
+static const struct attribute_group *cpu_root_attr_groups[] = {
+	&cpu_root_attr_group,
+	NULL,
+};
 
 int __init cpu_dev_init(void)
 {
 	int err;
 
-	err = sysdev_class_register(&cpu_sysdev_class);
+	err = subsys_system_register(&cpu_subsys, cpu_root_attr_groups);
+	if (err)
+		return err;
+
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-	if (!err)
-		err = sched_create_sysfs_power_savings_entries(&cpu_sysdev_class);
+	err = sched_create_sysfs_power_savings_entries(cpu_subsys.dev_root);
 #endif
-
 	return err;
 }
-
-static struct sysdev_class_attribute *cpu_sysdev_class_attrs[] = {
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-	&attr_probe,
-	&attr_release,
-#endif
-	&cpu_attrs[0].attr,
-	&cpu_attrs[1].attr,
-	&cpu_attrs[2].attr,
-	&attr_kernel_max,
-	&attr_offline,
-	NULL
-};
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 793f796c4da3..6ce1501c7de5 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -315,12 +315,12 @@ struct node node_devices[MAX_NUMNODES];
 int register_cpu_under_node(unsigned int cpu, unsigned int nid)
 {
 	int ret;
-	struct sys_device *obj;
+	struct device *obj;
 
 	if (!node_online(nid))
 		return 0;
 
-	obj = get_cpu_sysdev(cpu);
+	obj = get_cpu_device(cpu);
 	if (!obj)
 		return 0;
 
@@ -337,12 +337,12 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
 
 int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 {
-	struct sys_device *obj;
+	struct device *obj;
 
 	if (!node_online(nid))
 		return 0;
 
-	obj = get_cpu_sysdev(cpu);
+	obj = get_cpu_device(cpu);
 	if (!obj)
 		return 0;
 
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index f6f37a05a0c3..ae989c57cd5e 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -23,7 +23,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  */
-#include <linux/sysdev.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
@@ -32,14 +31,14 @@
 #include <linux/topology.h>
 
 #define define_one_ro_named(_name, _func)				\
-static SYSDEV_ATTR(_name, 0444, _func, NULL)
+	static DEVICE_ATTR(_name, 0444, _func, NULL)
 
 #define define_one_ro(_name)				\
-static SYSDEV_ATTR(_name, 0444, show_##_name, NULL)
+	static DEVICE_ATTR(_name, 0444, show_##_name, NULL)
 
 #define define_id_show_func(name)				\
-static ssize_t show_##name(struct sys_device *dev,		\
-		struct sysdev_attribute *attr, char *buf)	\
+static ssize_t show_##name(struct device *dev,			\
+		struct device_attribute *attr, char *buf)	\
 {								\
 	unsigned int cpu = dev->id;				\
 	return sprintf(buf, "%d\n", topology_##name(cpu));	\
@@ -65,16 +64,16 @@ static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf)
 
 #ifdef arch_provides_topology_pointers
 #define define_siblings_show_map(name)					\
-static ssize_t show_##name(struct sys_device *dev,			\
-			   struct sysdev_attribute *attr, char *buf)	\
+static ssize_t show_##name(struct device *dev,				\
+			   struct device_attribute *attr, char *buf)	\
 {									\
 	unsigned int cpu = dev->id;					\
 	return show_cpumap(0, topology_##name(cpu), buf);		\
 }
 
 #define define_siblings_show_list(name)					\
-static ssize_t show_##name##_list(struct sys_device *dev,		\
-				  struct sysdev_attribute *attr,	\
+static ssize_t show_##name##_list(struct device *dev,			\
+				  struct device_attribute *attr,	\
 				  char *buf)				\
 {									\
 	unsigned int cpu = dev->id;					\
@@ -83,15 +82,15 @@ static ssize_t show_##name##_list(struct sys_device *dev,		\
 
 #else
 #define define_siblings_show_map(name)					\
-static ssize_t show_##name(struct sys_device *dev,			\
-			   struct sysdev_attribute *attr, char *buf)	\
+static ssize_t show_##name(struct device *dev,				\
+			   struct device_attribute *attr, char *buf)	\
 {									\
 	return show_cpumap(0, topology_##name(dev->id), buf);		\
 }
 
 #define define_siblings_show_list(name)					\
-static ssize_t show_##name##_list(struct sys_device *dev,		\
-				  struct sysdev_attribute *attr,	\
+static ssize_t show_##name##_list(struct device *dev,			\
+				  struct device_attribute *attr,	\
 				  char *buf)				\
 {									\
 	return show_cpumap(1, topology_##name(dev->id), buf);		\
@@ -124,16 +123,16 @@ define_one_ro_named(book_siblings_list, show_book_cpumask_list);
 #endif
 
 static struct attribute *default_attrs[] = {
-	&attr_physical_package_id.attr,
-	&attr_core_id.attr,
-	&attr_thread_siblings.attr,
-	&attr_thread_siblings_list.attr,
-	&attr_core_siblings.attr,
-	&attr_core_siblings_list.attr,
+	&dev_attr_physical_package_id.attr,
+	&dev_attr_core_id.attr,
+	&dev_attr_thread_siblings.attr,
+	&dev_attr_thread_siblings_list.attr,
+	&dev_attr_core_siblings.attr,
+	&dev_attr_core_siblings_list.attr,
 #ifdef CONFIG_SCHED_BOOK
-	&attr_book_id.attr,
-	&attr_book_siblings.attr,
-	&attr_book_siblings_list.attr,
+	&dev_attr_book_id.attr,
+	&dev_attr_book_siblings.attr,
+	&dev_attr_book_siblings_list.attr,
 #endif
 	NULL
 };
@@ -146,16 +145,16 @@ static struct attribute_group topology_attr_group = {
 /* Add/Remove cpu_topology interface for CPU device */
 static int __cpuinit topology_add_dev(unsigned int cpu)
 {
-	struct sys_device *sys_dev = get_cpu_sysdev(cpu);
+	struct device *dev = get_cpu_device(cpu);
 
-	return sysfs_create_group(&sys_dev->kobj, &topology_attr_group);
+	return sysfs_create_group(&dev->kobj, &topology_attr_group);
 }
 
 static void __cpuinit topology_remove_dev(unsigned int cpu)
 {
-	struct sys_device *sys_dev = get_cpu_sysdev(cpu);
+	struct device *dev = get_cpu_device(cpu);
 
-	sysfs_remove_group(&sys_dev->kobj, &topology_attr_group);
+	sysfs_remove_group(&dev->kobj, &topology_attr_group);
 }
 
 static int __cpuinit topology_cpu_callback(struct notifier_block *nfb,
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 987a165ede26..8c2df3499da7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -679,7 +679,7 @@ static struct kobj_type ktype_cpufreq = {
  */
 static int cpufreq_add_dev_policy(unsigned int cpu,
 				  struct cpufreq_policy *policy,
-				  struct sys_device *sys_dev)
+				  struct device *dev)
 {
 	int ret = 0;
 #ifdef CONFIG_SMP
@@ -728,7 +728,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu,
 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
 			pr_debug("CPU already managed, adding link\n");
-			ret = sysfs_create_link(&sys_dev->kobj,
+			ret = sysfs_create_link(&dev->kobj,
 						&managed_policy->kobj,
 						"cpufreq");
 			if (ret)
@@ -761,7 +761,7 @@ static int cpufreq_add_dev_symlink(unsigned int cpu,
 
 	for_each_cpu(j, policy->cpus) {
 		struct cpufreq_policy *managed_policy;
-		struct sys_device *cpu_sys_dev;
+		struct device *cpu_dev;
 
 		if (j == cpu)
 			continue;
@@ -770,8 +770,8 @@ static int cpufreq_add_dev_symlink(unsigned int cpu,
 
 		pr_debug("CPU %u already managed, adding link\n", j);
 		managed_policy = cpufreq_cpu_get(cpu);
-		cpu_sys_dev = get_cpu_sysdev(j);
-		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
+		cpu_dev = get_cpu_device(j);
+		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
 					"cpufreq");
 		if (ret) {
 			cpufreq_cpu_put(managed_policy);
@@ -783,7 +783,7 @@ static int cpufreq_add_dev_symlink(unsigned int cpu,
 
 static int cpufreq_add_dev_interface(unsigned int cpu,
 				     struct cpufreq_policy *policy,
-				     struct sys_device *sys_dev)
+				     struct device *dev)
 {
 	struct cpufreq_policy new_policy;
 	struct freq_attr **drv_attr;
@@ -793,7 +793,7 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
 
 	/* prepare interface data */
 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
-				   &sys_dev->kobj, "cpufreq");
+				   &dev->kobj, "cpufreq");
 	if (ret)
 		return ret;
 
@@ -866,9 +866,9 @@ err_out_kobj_put:
  * with with cpu hotplugging and all hell will break loose. Tried to clean this
  * mess up, but more thorough testing is needed. - Mathieu
  */
-static int cpufreq_add_dev(struct sys_device *sys_dev)
+static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 {
-	unsigned int cpu = sys_dev->id;
+	unsigned int cpu = dev->id;
 	int ret = 0, found = 0;
 	struct cpufreq_policy *policy;
 	unsigned long flags;
@@ -947,7 +947,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				     CPUFREQ_START, policy);
 
-	ret = cpufreq_add_dev_policy(cpu, policy, sys_dev);
+	ret = cpufreq_add_dev_policy(cpu, policy, dev);
 	if (ret) {
 		if (ret > 0)
 			/* This is a managed cpu, symlink created,
@@ -956,7 +956,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 		goto err_unlock_policy;
 	}
 
-	ret = cpufreq_add_dev_interface(cpu, policy, sys_dev);
+	ret = cpufreq_add_dev_interface(cpu, policy, dev);
 	if (ret)
 		goto err_out_unregister;
 
@@ -999,15 +999,15 @@ module_out:
  * Caller should already have policy_rwsem in write mode for this CPU.
  * This routine frees the rwsem before returning.
  */
-static int __cpufreq_remove_dev(struct sys_device *sys_dev)
+static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
-	unsigned int cpu = sys_dev->id;
+	unsigned int cpu = dev->id;
 	unsigned long flags;
 	struct cpufreq_policy *data;
 	struct kobject *kobj;
 	struct completion *cmp;
 #ifdef CONFIG_SMP
-	struct sys_device *cpu_sys_dev;
+	struct device *cpu_dev;
 	unsigned int j;
 #endif
 
@@ -1032,7 +1032,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 		pr_debug("removing link\n");
 		cpumask_clear_cpu(cpu, data->cpus);
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-		kobj = &sys_dev->kobj;
+		kobj = &dev->kobj;
 		cpufreq_cpu_put(data);
 		unlock_policy_rwsem_write(cpu);
 		sysfs_remove_link(kobj, "cpufreq");
@@ -1071,8 +1071,8 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 			strncpy(per_cpu(cpufreq_cpu_governor, j),
 				data->governor->name, CPUFREQ_NAME_LEN);
 #endif
-			cpu_sys_dev = get_cpu_sysdev(j);
-			kobj = &cpu_sys_dev->kobj;
+			cpu_dev = get_cpu_device(j);
+			kobj = &cpu_dev->kobj;
 			unlock_policy_rwsem_write(cpu);
 			sysfs_remove_link(kobj, "cpufreq");
 			lock_policy_rwsem_write(cpu);
@@ -1112,11 +1112,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
 		/* first sibling now owns the new sysfs dir */
 		cpumask_clear_cpu(cpu, data->cpus);
-		cpufreq_add_dev(get_cpu_sysdev(cpumask_first(data->cpus)));
+		cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
 
 		/* finally remove our own symlink */
 		lock_policy_rwsem_write(cpu);
-		__cpufreq_remove_dev(sys_dev);
+		__cpufreq_remove_dev(dev, sif);
 	}
 #endif
 
@@ -1128,9 +1128,9 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 }
 
 
-static int cpufreq_remove_dev(struct sys_device *sys_dev)
+static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
-	unsigned int cpu = sys_dev->id;
+	unsigned int cpu = dev->id;
 	int retval;
 
 	if (cpu_is_offline(cpu))
@@ -1139,7 +1139,7 @@ static int cpufreq_remove_dev(struct sys_device *sys_dev)
 	if (unlikely(lock_policy_rwsem_write(cpu)))
 		BUG();
 
-	retval = __cpufreq_remove_dev(sys_dev);
+	retval = __cpufreq_remove_dev(dev, sif);
 	return retval;
 }
 
@@ -1271,9 +1271,11 @@ out:
 }
 EXPORT_SYMBOL(cpufreq_get);
 
-static struct sysdev_driver cpufreq_sysdev_driver = {
-	.add		= cpufreq_add_dev,
-	.remove		= cpufreq_remove_dev,
+static struct subsys_interface cpufreq_interface = {
+	.name		= "cpufreq",
+	.subsys		= &cpu_subsys,
+	.add_dev	= cpufreq_add_dev,
+	.remove_dev	= cpufreq_remove_dev,
 };
 
 
@@ -1765,25 +1767,25 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct sys_device *sys_dev;
+	struct device *dev;
 
-	sys_dev = get_cpu_sysdev(cpu);
-	if (sys_dev) {
+	dev = get_cpu_device(cpu);
+	if (dev) {
 		switch (action) {
 		case CPU_ONLINE:
 		case CPU_ONLINE_FROZEN:
-			cpufreq_add_dev(sys_dev);
+			cpufreq_add_dev(dev, NULL);
 			break;
 		case CPU_DOWN_PREPARE:
 		case CPU_DOWN_PREPARE_FROZEN:
 			if (unlikely(lock_policy_rwsem_write(cpu)))
 				BUG();
 
-			__cpufreq_remove_dev(sys_dev);
+			__cpufreq_remove_dev(dev, NULL);
 			break;
 		case CPU_DOWN_FAILED:
 		case CPU_DOWN_FAILED_FROZEN:
-			cpufreq_add_dev(sys_dev);
+			cpufreq_add_dev(dev, NULL);
 			break;
 		}
 	}
@@ -1830,8 +1832,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	cpufreq_driver = driver_data;
 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-	ret = sysdev_driver_register(&cpu_sysdev_class,
-					&cpufreq_sysdev_driver);
+	ret = subsys_interface_register(&cpufreq_interface);
 	if (ret)
 		goto err_null_driver;
 
@@ -1850,7 +1851,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 		if (ret) {
 			pr_debug("no CPU initialized for driver %s\n",
 							driver_data->name);
-			goto err_sysdev_unreg;
+			goto err_if_unreg;
 		}
 	}
 
@@ -1858,9 +1859,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	pr_debug("driver %s up and running\n", driver_data->name);
 
 	return 0;
-err_sysdev_unreg:
-	sysdev_driver_unregister(&cpu_sysdev_class,
-			&cpufreq_sysdev_driver);
+err_if_unreg:
+	subsys_interface_unregister(&cpufreq_interface);
 err_null_driver:
 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 	cpufreq_driver = NULL;
@@ -1887,7 +1887,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 
 	pr_debug("unregistering driver %s\n", driver->name);
 
-	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
+	subsys_interface_unregister(&cpufreq_interface);
 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
 
 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
@@ -1907,8 +1907,7 @@ static int __init cpufreq_core_init(void)
 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
 	}
 
-	cpufreq_global_kobject = kobject_create_and_add("cpufreq",
-						&cpu_sysdev_class.kset.kobj);
+	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
 	BUG_ON(!cpufreq_global_kobject);
 	register_syscore_ops(&cpufreq_syscore_ops);
 
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index c5072a91e848..390380a8cfc9 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -11,7 +11,6 @@
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/sysdev.h>
 #include <linux/cpu.h>
 #include <linux/sysfs.h>
 #include <linux/cpufreq.h>
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 06ce2680d00d..59f4261c753a 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -291,10 +291,10 @@ EXPORT_SYMBOL_GPL(cpuidle_disable_device);
 static int __cpuidle_register_device(struct cpuidle_device *dev)
 {
 	int ret;
-	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
+	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
 	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
 
-	if (!sys_dev)
+	if (!dev)
 		return -EINVAL;
 	if (!try_module_get(cpuidle_driver->owner))
 		return -EINVAL;
@@ -303,7 +303,7 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
 
 	per_cpu(cpuidle_devices, dev->cpu) = dev;
 	list_add(&dev->device_list, &cpuidle_detected_devices);
-	if ((ret = cpuidle_add_sysfs(sys_dev))) {
+	if ((ret = cpuidle_add_sysfs(cpu_dev))) {
 		module_put(cpuidle_driver->owner);
 		return ret;
 	}
@@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device);
  */
 void cpuidle_unregister_device(struct cpuidle_device *dev)
 {
-	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
+	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
 	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
 
 	if (dev->registered == 0)
@@ -354,7 +354,7 @@ void cpuidle_unregister_device(struct cpuidle_device *dev)
 
 	cpuidle_disable_device(dev);
 
-	cpuidle_remove_sysfs(sys_dev);
+	cpuidle_remove_sysfs(cpu_dev);
 	list_del(&dev->device_list);
 	wait_for_completion(&dev->kobj_unregister);
 	per_cpu(cpuidle_devices, dev->cpu) = NULL;
@@ -411,7 +411,7 @@ static int __init cpuidle_init(void)
 	if (cpuidle_disabled())
 		return -ENODEV;
 
-	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
+	ret = cpuidle_add_interface(cpu_subsys.dev_root);
 	if (ret)
 		return ret;
 
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index 38c3fd8b9d76..7db186685c27 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -5,7 +5,7 @@
 #ifndef __DRIVER_CPUIDLE_H
 #define __DRIVER_CPUIDLE_H
 
-#include <linux/sysdev.h>
+#include <linux/device.h>
 
 /* For internal use only */
 extern struct cpuidle_governor *cpuidle_curr_governor;
@@ -23,11 +23,11 @@ extern void cpuidle_uninstall_idle_handler(void);
 extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
 
 /* sysfs */
-extern int cpuidle_add_class_sysfs(struct sysdev_class *cls);
-extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls);
+extern int cpuidle_add_interface(struct device *dev);
+extern void cpuidle_remove_interface(struct device *dev);
 extern int cpuidle_add_state_sysfs(struct cpuidle_device *device);
 extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device);
-extern int cpuidle_add_sysfs(struct sys_device *sysdev);
-extern void cpuidle_remove_sysfs(struct sys_device *sysdev);
+extern int cpuidle_add_sysfs(struct device *dev);
+extern void cpuidle_remove_sysfs(struct device *dev);
 
 #endif /* __DRIVER_CPUIDLE_H */
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 1e756e160dca..3fe41fe4851a 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -22,8 +22,8 @@ static int __init cpuidle_sysfs_setup(char *unused)
 }
 __setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
 
-static ssize_t show_available_governors(struct sysdev_class *class,
-					struct sysdev_class_attribute *attr,
+static ssize_t show_available_governors(struct device *dev,
+					struct device_attribute *attr,
 					char *buf)
 {
 	ssize_t i = 0;
@@ -42,8 +42,8 @@ out:
 	return i;
 }
 
-static ssize_t show_current_driver(struct sysdev_class *class,
-				   struct sysdev_class_attribute *attr,
+static ssize_t show_current_driver(struct device *dev,
+				   struct device_attribute *attr,
 				   char *buf)
 {
 	ssize_t ret;
@@ -59,8 +59,8 @@ static ssize_t show_current_driver(struct sysdev_class *class,
 	return ret;
 }
 
-static ssize_t show_current_governor(struct sysdev_class *class,
-				     struct sysdev_class_attribute *attr,
+static ssize_t show_current_governor(struct device *dev,
+				     struct device_attribute *attr,
 				     char *buf)
 {
 	ssize_t ret;
@@ -75,8 +75,8 @@ static ssize_t show_current_governor(struct sysdev_class *class,
 	return ret;
 }
 
-static ssize_t store_current_governor(struct sysdev_class *class,
-				      struct sysdev_class_attribute *attr,
+static ssize_t store_current_governor(struct device *dev,
+				      struct device_attribute *attr,
 				      const char *buf, size_t count)
 {
 	char gov_name[CPUIDLE_NAME_LEN];
@@ -109,50 +109,48 @@ static ssize_t store_current_governor(struct sysdev_class *class,
 		return count;
 }
 
-static SYSDEV_CLASS_ATTR(current_driver, 0444, show_current_driver, NULL);
-static SYSDEV_CLASS_ATTR(current_governor_ro, 0444, show_current_governor,
-			 NULL);
+static DEVICE_ATTR(current_driver, 0444, show_current_driver, NULL);
+static DEVICE_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
 
-static struct attribute *cpuclass_default_attrs[] = {
-	&attr_current_driver.attr,
-	&attr_current_governor_ro.attr,
+static struct attribute *cpuidle_default_attrs[] = {
+	&dev_attr_current_driver.attr,
+	&dev_attr_current_governor_ro.attr,
 	NULL
 };
 
-static SYSDEV_CLASS_ATTR(available_governors, 0444, show_available_governors,
-			 NULL);
-static SYSDEV_CLASS_ATTR(current_governor, 0644, show_current_governor,
-			 store_current_governor);
+static DEVICE_ATTR(available_governors, 0444, show_available_governors, NULL);
+static DEVICE_ATTR(current_governor, 0644, show_current_governor,
+		   store_current_governor);
 
-static struct attribute *cpuclass_switch_attrs[] = {
-	&attr_available_governors.attr,
-	&attr_current_driver.attr,
-	&attr_current_governor.attr,
+static struct attribute *cpuidle_switch_attrs[] = {
+	&dev_attr_available_governors.attr,
+	&dev_attr_current_driver.attr,
+	&dev_attr_current_governor.attr,
 	NULL
 };
 
-static struct attribute_group cpuclass_attr_group = {
-	.attrs = cpuclass_default_attrs,
+static struct attribute_group cpuidle_attr_group = {
+	.attrs = cpuidle_default_attrs,
 	.name = "cpuidle",
 };
 
 /**
- * cpuidle_add_class_sysfs - add CPU global sysfs attributes
+ * cpuidle_add_interface - add CPU global sysfs attributes
  */
-int cpuidle_add_class_sysfs(struct sysdev_class *cls)
+int cpuidle_add_interface(struct device *dev)
 {
 	if (sysfs_switch)
-		cpuclass_attr_group.attrs = cpuclass_switch_attrs;
+		cpuidle_attr_group.attrs = cpuidle_switch_attrs;
 
-	return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group);
+	return sysfs_create_group(&dev->kobj, &cpuidle_attr_group);
 }
 
 /**
- * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes
+ * cpuidle_remove_interface - remove CPU global sysfs attributes
  */
-void cpuidle_remove_class_sysfs(struct sysdev_class *cls)
+void cpuidle_remove_interface(struct device *dev)
 {
-	sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group);
+	sysfs_remove_group(&dev->kobj, &cpuidle_attr_group);
 }
 
 struct cpuidle_attr {
@@ -365,16 +363,16 @@ void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
 
 /**
  * cpuidle_add_sysfs - creates a sysfs instance for the target device
- * @sysdev: the target device
+ * @dev: the target device
  */
-int cpuidle_add_sysfs(struct sys_device *sysdev)
+int cpuidle_add_sysfs(struct device *cpu_dev)
 {
-	int cpu = sysdev->id;
+	int cpu = cpu_dev->id;
 	struct cpuidle_device *dev;
 	int error;
 
 	dev = per_cpu(cpuidle_devices, cpu);
-	error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &sysdev->kobj,
+	error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
 				     "cpuidle");
 	if (!error)
 		kobject_uevent(&dev->kobj, KOBJ_ADD);
@@ -383,11 +381,11 @@ int cpuidle_add_sysfs(struct sys_device *sysdev)
 
 /**
  * cpuidle_remove_sysfs - deletes a sysfs instance on the target device
- * @sysdev: the target device
+ * @dev: the target device
  */
-void cpuidle_remove_sysfs(struct sys_device *sysdev)
+void cpuidle_remove_sysfs(struct device *cpu_dev)
 {
-	int cpu = sysdev->id;
+	int cpu = cpu_dev->id;
 	struct cpuidle_device *dev;
 
 	dev = per_cpu(cpuidle_devices, cpu);
diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c
index 95b909ac2b73..3c03c1060be6 100644
--- a/drivers/s390/char/sclp_config.c
+++ b/drivers/s390/char/sclp_config.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/cpu.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/workqueue.h>
 #include <asm/smp.h>
 
@@ -31,14 +31,14 @@ static struct work_struct sclp_cpu_change_work;
 static void sclp_cpu_capability_notify(struct work_struct *work)
 {
 	int cpu;
-	struct sys_device *sysdev;
+	struct device *dev;
 
 	s390_adjust_jiffies();
 	pr_warning("cpu capability changed.\n");
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
-		sysdev = get_cpu_sysdev(cpu);
-		kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+		dev = get_cpu_device(cpu);
+		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
 	}
 	put_online_cpus();
 }
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 6cb60fd2ea84..fc3da0d70d68 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -14,7 +14,7 @@
 #ifndef _LINUX_CPU_H_
 #define _LINUX_CPU_H_
 
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
@@ -22,19 +22,19 @@
 struct cpu {
 	int node_id;		/* The node which contains the CPU */
 	int hotpluggable;	/* creates sysfs control file if hotpluggable */
-	struct sys_device sysdev;
+	struct device dev;
 };
 
 extern int register_cpu(struct cpu *cpu, int num);
-extern struct sys_device *get_cpu_sysdev(unsigned cpu);
+extern struct device *get_cpu_device(unsigned cpu);
 
-extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr);
-extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr);
+extern int cpu_add_dev_attr(struct device_attribute *attr);
+extern void cpu_remove_dev_attr(struct device_attribute *attr);
 
-extern int cpu_add_sysdev_attr_group(struct attribute_group *attrs);
-extern void cpu_remove_sysdev_attr_group(struct attribute_group *attrs);
+extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
+extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
 
-extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls);
+extern int sched_create_sysfs_power_savings_entries(struct device *dev);
 
 #ifdef CONFIG_HOTPLUG_CPU
 extern void unregister_cpu(struct cpu *cpu);
@@ -160,7 +160,7 @@ static inline void cpu_maps_update_done(void)
 }
 
 #endif /* CONFIG_SMP */
-extern struct sysdev_class cpu_sysdev_class;
+extern struct bus_type cpu_subsys;
 
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
diff --git a/kernel/sched.c b/kernel/sched.c
index 0e9344a71be3..530772646443 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7923,54 +7923,52 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 }
 
 #ifdef CONFIG_SCHED_MC
-static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
-					   struct sysdev_class_attribute *attr,
-					   char *page)
+static ssize_t sched_mc_power_savings_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
 {
-	return sprintf(page, "%u\n", sched_mc_power_savings);
+	return sprintf(buf, "%u\n", sched_mc_power_savings);
 }
-static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
-					    struct sysdev_class_attribute *attr,
+static ssize_t sched_mc_power_savings_store(struct device *dev,
+					    struct device_attribute *attr,
 					    const char *buf, size_t count)
 {
 	return sched_power_savings_store(buf, count, 0);
 }
-static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644,
-			 sched_mc_power_savings_show,
-			 sched_mc_power_savings_store);
+static DEVICE_ATTR(sched_mc_power_savings, 0644,
+		   sched_mc_power_savings_show,
+		   sched_mc_power_savings_store);
 #endif
 
 #ifdef CONFIG_SCHED_SMT
-static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
-					    struct sysdev_class_attribute *attr,
-					    char *page)
+static ssize_t sched_smt_power_savings_show(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
 {
-	return sprintf(page, "%u\n", sched_smt_power_savings);
+	return sprintf(buf, "%u\n", sched_smt_power_savings);
 }
-static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
-					     struct sysdev_class_attribute *attr,
+static ssize_t sched_smt_power_savings_store(struct device *dev,
+					    struct device_attribute *attr,
 					     const char *buf, size_t count)
 {
 	return sched_power_savings_store(buf, count, 1);
 }
-static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644,
+static DEVICE_ATTR(sched_smt_power_savings, 0644,
 		   sched_smt_power_savings_show,
 		   sched_smt_power_savings_store);
 #endif
 
-int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
+int __init sched_create_sysfs_power_savings_entries(struct device *dev)
 {
 	int err = 0;
 
 #ifdef CONFIG_SCHED_SMT
 	if (smt_capable())
-		err = sysfs_create_file(&cls->kset.kobj,
-					&attr_sched_smt_power_savings.attr);
+		err = device_create_file(dev, &dev_attr_sched_smt_power_savings);
 #endif
 #ifdef CONFIG_SCHED_MC
 	if (!err && mc_capable())
-		err = sysfs_create_file(&cls->kset.kobj,
-					&attr_sched_mc_power_savings.attr);
+		err = device_create_file(dev, &dev_attr_sched_mc_power_savings);
 #endif
 	return err;
 }
-- 
cgit v1.2.3


From 10fbcf4c6cb122005cdf36fc24d7683da92c7a27 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 21 Dec 2011 14:48:43 -0800
Subject: convert 'memory' sysdev_class to a regular subsystem

This moves the 'memory sysdev_class' over to a regular 'memory' subsystem
and converts the devices to regular devices. The sysdev drivers are
implemented as subsystem interfaces now.

After all sysdev classes are ported to regular driver core entities, the
sysdev implementation will be entirely removed from the kernel.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/powerpc/kernel/sysfs.c |   4 +-
 drivers/base/memory.c       | 160 ++++++++++++++++++--------------------------
 drivers/base/node.c         | 146 +++++++++++++++++++++-------------------
 include/linux/memory.h      |   3 +-
 include/linux/node.h        |   6 +-
 mm/compaction.c             |  10 +--
 mm/hugetlb.c                |  34 +++++-----
 mm/vmscan.c                 |  14 ++--
 8 files changed, 177 insertions(+), 200 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index f396ef27916b..5e7c1655f13a 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -592,7 +592,7 @@ static void register_nodes(void)
 int sysfs_add_device_to_node(struct device *dev, int nid)
 {
 	struct node *node = &node_devices[nid];
-	return sysfs_create_link(&node->sysdev.kobj, &dev->kobj,
+	return sysfs_create_link(&node->dev.kobj, &dev->kobj,
 			kobject_name(&dev->kobj));
 }
 EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
@@ -600,7 +600,7 @@ EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
 void sysfs_remove_device_from_node(struct device *dev, int nid)
 {
 	struct node *node = &node_devices[nid];
-	sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj));
+	sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
 
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 8272d92d22c0..f17e3ea041c0 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -1,5 +1,5 @@
 /*
- * drivers/base/memory.c - basic Memory class support
+ * Memory subsystem support
  *
  * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
  *            Dave Hansen <haveblue@us.ibm.com>
@@ -10,7 +10,6 @@
  * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
  */
 
-#include <linux/sysdev.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/topology.h>
@@ -38,26 +37,9 @@ static inline int base_memory_block_id(int section_nr)
 	return section_nr / sections_per_block;
 }
 
-static struct sysdev_class memory_sysdev_class = {
+static struct bus_type memory_subsys = {
 	.name = MEMORY_CLASS_NAME,
-};
-
-static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
-{
-	return MEMORY_CLASS_NAME;
-}
-
-static int memory_uevent(struct kset *kset, struct kobject *obj,
-			struct kobj_uevent_env *env)
-{
-	int retval = 0;
-
-	return retval;
-}
-
-static const struct kset_uevent_ops memory_uevent_ops = {
-	.name		= memory_uevent_name,
-	.uevent		= memory_uevent,
+	.dev_name = MEMORY_CLASS_NAME,
 };
 
 static BLOCKING_NOTIFIER_HEAD(memory_chain);
@@ -96,21 +78,21 @@ int register_memory(struct memory_block *memory)
 {
 	int error;
 
-	memory->sysdev.cls = &memory_sysdev_class;
-	memory->sysdev.id = memory->start_section_nr / sections_per_block;
+	memory->dev.bus = &memory_subsys;
+	memory->dev.id = memory->start_section_nr / sections_per_block;
 
-	error = sysdev_register(&memory->sysdev);
+	error = device_register(&memory->dev);
 	return error;
 }
 
 static void
 unregister_memory(struct memory_block *memory)
 {
-	BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
+	BUG_ON(memory->dev.bus != &memory_subsys);
 
 	/* drop the ref. we got in remove_memory_block() */
-	kobject_put(&memory->sysdev.kobj);
-	sysdev_unregister(&memory->sysdev);
+	kobject_put(&memory->dev.kobj);
+	device_unregister(&memory->dev);
 }
 
 unsigned long __weak memory_block_size_bytes(void)
@@ -138,22 +120,22 @@ static unsigned long get_memory_block_size(void)
  * uses.
  */
 
-static ssize_t show_mem_start_phys_index(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_mem_start_phys_index(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
+		container_of(dev, struct memory_block, dev);
 	unsigned long phys_index;
 
 	phys_index = mem->start_section_nr / sections_per_block;
 	return sprintf(buf, "%08lx\n", phys_index);
 }
 
-static ssize_t show_mem_end_phys_index(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_mem_end_phys_index(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
+		container_of(dev, struct memory_block, dev);
 	unsigned long phys_index;
 
 	phys_index = mem->end_section_nr / sections_per_block;
@@ -163,13 +145,13 @@ static ssize_t show_mem_end_phys_index(struct sys_device *dev,
 /*
  * Show whether the section of memory is likely to be hot-removable
  */
-static ssize_t show_mem_removable(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_mem_removable(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	unsigned long i, pfn;
 	int ret = 1;
 	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
+		container_of(dev, struct memory_block, dev);
 
 	for (i = 0; i < sections_per_block; i++) {
 		pfn = section_nr_to_pfn(mem->start_section_nr + i);
@@ -182,11 +164,11 @@ static ssize_t show_mem_removable(struct sys_device *dev,
 /*
  * online, offline, going offline, etc.
  */
-static ssize_t show_mem_state(struct sys_device *dev,
-			struct sysdev_attribute *attr, char *buf)
+static ssize_t show_mem_state(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
+		container_of(dev, struct memory_block, dev);
 	ssize_t len = 0;
 
 	/*
@@ -324,13 +306,13 @@ out:
 }
 
 static ssize_t
-store_mem_state(struct sys_device *dev,
-		struct sysdev_attribute *attr, const char *buf, size_t count)
+store_mem_state(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct memory_block *mem;
 	int ret = -EINVAL;
 
-	mem = container_of(dev, struct memory_block, sysdev);
+	mem = container_of(dev, struct memory_block, dev);
 
 	if (!strncmp(buf, "online", min((int)count, 6)))
 		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
@@ -351,41 +333,41 @@ store_mem_state(struct sys_device *dev,
  * s.t. if I offline all of these sections I can then
  * remove the physical device?
  */
-static ssize_t show_phys_device(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t show_phys_device(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	struct memory_block *mem =
-		container_of(dev, struct memory_block, sysdev);
+		container_of(dev, struct memory_block, dev);
 	return sprintf(buf, "%d\n", mem->phys_device);
 }
 
-static SYSDEV_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL);
-static SYSDEV_ATTR(end_phys_index, 0444, show_mem_end_phys_index, NULL);
-static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
-static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
-static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
+static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL);
+static DEVICE_ATTR(end_phys_index, 0444, show_mem_end_phys_index, NULL);
+static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state);
+static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL);
+static DEVICE_ATTR(removable, 0444, show_mem_removable, NULL);
 
 #define mem_create_simple_file(mem, attr_name)	\
-	sysdev_create_file(&mem->sysdev, &attr_##attr_name)
+	device_create_file(&mem->dev, &dev_attr_##attr_name)
 #define mem_remove_simple_file(mem, attr_name)	\
-	sysdev_remove_file(&mem->sysdev, &attr_##attr_name)
+	device_remove_file(&mem->dev, &dev_attr_##attr_name)
 
 /*
  * Block size attribute stuff
  */
 static ssize_t
-print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr,
+print_block_size(struct device *dev, struct device_attribute *attr,
 		 char *buf)
 {
 	return sprintf(buf, "%lx\n", get_memory_block_size());
 }
 
-static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
+static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL);
 
 static int block_size_init(void)
 {
-	return sysfs_create_file(&memory_sysdev_class.kset.kobj,
-				&attr_block_size_bytes.attr);
+	return device_create_file(memory_subsys.dev_root,
+				  &dev_attr_block_size_bytes);
 }
 
 /*
@@ -396,7 +378,7 @@ static int block_size_init(void)
  */
 #ifdef CONFIG_ARCH_MEMORY_PROBE
 static ssize_t
-memory_probe_store(struct class *class, struct class_attribute *attr,
+memory_probe_store(struct device *dev, struct device_attribute *attr,
 		   const char *buf, size_t count)
 {
 	u64 phys_addr;
@@ -423,12 +405,11 @@ memory_probe_store(struct class *class, struct class_attribute *attr,
 out:
 	return ret;
 }
-static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
+static DEVICE_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
 
 static int memory_probe_init(void)
 {
-	return sysfs_create_file(&memory_sysdev_class.kset.kobj,
-				&class_attr_probe.attr);
+	return device_create_file(memory_subsys.dev_root, &dev_attr_probe);
 }
 #else
 static inline int memory_probe_init(void)
@@ -444,8 +425,8 @@ static inline int memory_probe_init(void)
 
 /* Soft offline a page */
 static ssize_t
-store_soft_offline_page(struct class *class,
-			struct class_attribute *attr,
+store_soft_offline_page(struct device *dev,
+			struct device_attribute *attr,
 			const char *buf, size_t count)
 {
 	int ret;
@@ -463,8 +444,8 @@ store_soft_offline_page(struct class *class,
 
 /* Forcibly offline a page, including killing processes. */
 static ssize_t
-store_hard_offline_page(struct class *class,
-			struct class_attribute *attr,
+store_hard_offline_page(struct device *dev,
+			struct device_attribute *attr,
 			const char *buf, size_t count)
 {
 	int ret;
@@ -478,18 +459,18 @@ store_hard_offline_page(struct class *class,
 	return ret ? ret : count;
 }
 
-static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page);
-static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page);
+static DEVICE_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page);
+static DEVICE_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page);
 
 static __init int memory_fail_init(void)
 {
 	int err;
 
-	err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
-				&class_attr_soft_offline_page.attr);
+	err = device_create_file(memory_subsys.dev_root,
+				&dev_attr_soft_offline_page);
 	if (!err)
-		err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
-				&class_attr_hard_offline_page.attr);
+		err = device_create_file(memory_subsys.dev_root,
+				&dev_attr_hard_offline_page);
 	return err;
 }
 #else
@@ -509,31 +490,23 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn)
 	return 0;
 }
 
+/*
+ * A reference for the returned object is held and the reference for the
+ * hinted object is released.
+ */
 struct memory_block *find_memory_block_hinted(struct mem_section *section,
 					      struct memory_block *hint)
 {
-	struct kobject *kobj;
-	struct sys_device *sysdev;
-	struct memory_block *mem;
-	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
 	int block_id = base_memory_block_id(__section_nr(section));
+	struct device *hintdev = hint ? &hint->dev : NULL;
+	struct device *dev;
 
-	kobj = hint ? &hint->sysdev.kobj : NULL;
-
-	/*
-	 * This only works because we know that section == sysdev->id
-	 * slightly redundant with sysdev_register()
-	 */
-	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, block_id);
-
-	kobj = kset_find_obj_hinted(&memory_sysdev_class.kset, name, kobj);
-	if (!kobj)
+	dev = subsys_find_device_by_id(&memory_subsys, block_id, hintdev);
+	if (hint)
+		put_device(&hint->dev);
+	if (!dev)
 		return NULL;
-
-	sysdev = container_of(kobj, struct sys_device, kobj);
-	mem = container_of(sysdev, struct memory_block, sysdev);
-
-	return mem;
+	return container_of(dev, struct memory_block, dev);
 }
 
 /*
@@ -542,7 +515,7 @@ struct memory_block *find_memory_block_hinted(struct mem_section *section,
  * this gets to be a real problem, we can always use a radix
  * tree or something here.
  *
- * This could be made generic for all sysdev classes.
+ * This could be made generic for all device subsystems.
  */
 struct memory_block *find_memory_block(struct mem_section *section)
 {
@@ -598,7 +571,7 @@ static int add_memory_section(int nid, struct mem_section *section,
 	mem = find_memory_block(section);
 	if (mem) {
 		mem->section_count++;
-		kobject_put(&mem->sysdev.kobj);
+		kobject_put(&mem->dev.kobj);
 	} else
 		ret = init_memory_block(&mem, section, state);
 
@@ -631,7 +604,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
 		unregister_memory(mem);
 		kfree(mem);
 	} else
-		kobject_put(&mem->sysdev.kobj);
+		kobject_put(&mem->dev.kobj);
 
 	mutex_unlock(&mem_sysfs_mutex);
 	return 0;
@@ -664,8 +637,7 @@ int __init memory_dev_init(void)
 	int err;
 	unsigned long block_sz;
 
-	memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
-	ret = sysdev_class_register(&memory_sysdev_class);
+	ret = subsys_system_register(&memory_subsys, NULL);
 	if (ret)
 		goto out;
 
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 6ce1501c7de5..996d2189689b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -1,8 +1,7 @@
 /*
- * drivers/base/node.c - basic Node class support
+ * Basic Node interface support
  */
 
-#include <linux/sysdev.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/mm.h>
@@ -19,18 +18,16 @@
 #include <linux/swap.h>
 #include <linux/slab.h>
 
-static struct sysdev_class_attribute *node_state_attrs[];
-
-static struct sysdev_class node_class = {
+static struct bus_type node_subsys = {
 	.name = "node",
-	.attrs = node_state_attrs,
+	.dev_name = "node",
 };
 
 
-static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
+static ssize_t node_read_cpumap(struct device *dev, int type, char *buf)
 {
 	struct node *node_dev = to_node(dev);
-	const struct cpumask *mask = cpumask_of_node(node_dev->sysdev.id);
+	const struct cpumask *mask = cpumask_of_node(node_dev->dev.id);
 	int len;
 
 	/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
@@ -44,23 +41,23 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
 	return len;
 }
 
-static inline ssize_t node_read_cpumask(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static inline ssize_t node_read_cpumask(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	return node_read_cpumap(dev, 0, buf);
 }
-static inline ssize_t node_read_cpulist(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static inline ssize_t node_read_cpulist(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	return node_read_cpumap(dev, 1, buf);
 }
 
-static SYSDEV_ATTR(cpumap,  S_IRUGO, node_read_cpumask, NULL);
-static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
+static DEVICE_ATTR(cpumap,  S_IRUGO, node_read_cpumask, NULL);
+static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
 
 #define K(x) ((x) << (PAGE_SHIFT - 10))
-static ssize_t node_read_meminfo(struct sys_device * dev,
-			struct sysdev_attribute *attr, char * buf)
+static ssize_t node_read_meminfo(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
 	int n;
 	int nid = dev->id;
@@ -155,10 +152,10 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 }
 
 #undef K
-static SYSDEV_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
+static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
 
-static ssize_t node_read_numastat(struct sys_device * dev,
-				struct sysdev_attribute *attr, char * buf)
+static ssize_t node_read_numastat(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf,
 		       "numa_hit %lu\n"
@@ -174,10 +171,10 @@ static ssize_t node_read_numastat(struct sys_device * dev,
 		       node_page_state(dev->id, NUMA_LOCAL),
 		       node_page_state(dev->id, NUMA_OTHER));
 }
-static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
+static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
 
-static ssize_t node_read_vmstat(struct sys_device *dev,
-				struct sysdev_attribute *attr, char *buf)
+static ssize_t node_read_vmstat(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	int nid = dev->id;
 	int i;
@@ -189,10 +186,10 @@ static ssize_t node_read_vmstat(struct sys_device *dev,
 
 	return n;
 }
-static SYSDEV_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
+static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
 
-static ssize_t node_read_distance(struct sys_device * dev,
-			struct sysdev_attribute *attr, char * buf)
+static ssize_t node_read_distance(struct device *dev,
+			struct device_attribute *attr, char * buf)
 {
 	int nid = dev->id;
 	int len = 0;
@@ -210,7 +207,7 @@ static ssize_t node_read_distance(struct sys_device * dev,
 	len += sprintf(buf + len, "\n");
 	return len;
 }
-static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL);
+static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL);
 
 #ifdef CONFIG_HUGETLBFS
 /*
@@ -228,7 +225,7 @@ static node_registration_func_t __hugetlb_unregister_node;
 static inline bool hugetlb_register_node(struct node *node)
 {
 	if (__hugetlb_register_node &&
-			node_state(node->sysdev.id, N_HIGH_MEMORY)) {
+			node_state(node->dev.id, N_HIGH_MEMORY)) {
 		__hugetlb_register_node(node);
 		return true;
 	}
@@ -264,17 +261,17 @@ int register_node(struct node *node, int num, struct node *parent)
 {
 	int error;
 
-	node->sysdev.id = num;
-	node->sysdev.cls = &node_class;
-	error = sysdev_register(&node->sysdev);
+	node->dev.id = num;
+	node->dev.bus = &node_subsys;
+	error = device_register(&node->dev);
 
 	if (!error){
-		sysdev_create_file(&node->sysdev, &attr_cpumap);
-		sysdev_create_file(&node->sysdev, &attr_cpulist);
-		sysdev_create_file(&node->sysdev, &attr_meminfo);
-		sysdev_create_file(&node->sysdev, &attr_numastat);
-		sysdev_create_file(&node->sysdev, &attr_distance);
-		sysdev_create_file(&node->sysdev, &attr_vmstat);
+		device_create_file(&node->dev, &dev_attr_cpumap);
+		device_create_file(&node->dev, &dev_attr_cpulist);
+		device_create_file(&node->dev, &dev_attr_meminfo);
+		device_create_file(&node->dev, &dev_attr_numastat);
+		device_create_file(&node->dev, &dev_attr_distance);
+		device_create_file(&node->dev, &dev_attr_vmstat);
 
 		scan_unevictable_register_node(node);
 
@@ -294,17 +291,17 @@ int register_node(struct node *node, int num, struct node *parent)
  */
 void unregister_node(struct node *node)
 {
-	sysdev_remove_file(&node->sysdev, &attr_cpumap);
-	sysdev_remove_file(&node->sysdev, &attr_cpulist);
-	sysdev_remove_file(&node->sysdev, &attr_meminfo);
-	sysdev_remove_file(&node->sysdev, &attr_numastat);
-	sysdev_remove_file(&node->sysdev, &attr_distance);
-	sysdev_remove_file(&node->sysdev, &attr_vmstat);
+	device_remove_file(&node->dev, &dev_attr_cpumap);
+	device_remove_file(&node->dev, &dev_attr_cpulist);
+	device_remove_file(&node->dev, &dev_attr_meminfo);
+	device_remove_file(&node->dev, &dev_attr_numastat);
+	device_remove_file(&node->dev, &dev_attr_distance);
+	device_remove_file(&node->dev, &dev_attr_vmstat);
 
 	scan_unevictable_unregister_node(node);
 	hugetlb_unregister_node(node);		/* no-op, if memoryless node */
 
-	sysdev_unregister(&node->sysdev);
+	device_unregister(&node->dev);
 }
 
 struct node node_devices[MAX_NUMNODES];
@@ -324,15 +321,15 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
 	if (!obj)
 		return 0;
 
-	ret = sysfs_create_link(&node_devices[nid].sysdev.kobj,
+	ret = sysfs_create_link(&node_devices[nid].dev.kobj,
 				&obj->kobj,
 				kobject_name(&obj->kobj));
 	if (ret)
 		return ret;
 
 	return sysfs_create_link(&obj->kobj,
-				 &node_devices[nid].sysdev.kobj,
-				 kobject_name(&node_devices[nid].sysdev.kobj));
+				 &node_devices[nid].dev.kobj,
+				 kobject_name(&node_devices[nid].dev.kobj));
 }
 
 int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
@@ -346,10 +343,10 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 	if (!obj)
 		return 0;
 
-	sysfs_remove_link(&node_devices[nid].sysdev.kobj,
+	sysfs_remove_link(&node_devices[nid].dev.kobj,
 			  kobject_name(&obj->kobj));
 	sysfs_remove_link(&obj->kobj,
-			  kobject_name(&node_devices[nid].sysdev.kobj));
+			  kobject_name(&node_devices[nid].dev.kobj));
 
 	return 0;
 }
@@ -391,15 +388,15 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
 			continue;
 		if (page_nid != nid)
 			continue;
-		ret = sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj,
-					&mem_blk->sysdev.kobj,
-					kobject_name(&mem_blk->sysdev.kobj));
+		ret = sysfs_create_link_nowarn(&node_devices[nid].dev.kobj,
+					&mem_blk->dev.kobj,
+					kobject_name(&mem_blk->dev.kobj));
 		if (ret)
 			return ret;
 
-		return sysfs_create_link_nowarn(&mem_blk->sysdev.kobj,
-				&node_devices[nid].sysdev.kobj,
-				kobject_name(&node_devices[nid].sysdev.kobj));
+		return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
+				&node_devices[nid].dev.kobj,
+				kobject_name(&node_devices[nid].dev.kobj));
 	}
 	/* mem section does not span the specified node */
 	return 0;
@@ -432,10 +429,10 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
 			continue;
 		if (node_test_and_set(nid, *unlinked_nodes))
 			continue;
-		sysfs_remove_link(&node_devices[nid].sysdev.kobj,
-			 kobject_name(&mem_blk->sysdev.kobj));
-		sysfs_remove_link(&mem_blk->sysdev.kobj,
-			 kobject_name(&node_devices[nid].sysdev.kobj));
+		sysfs_remove_link(&node_devices[nid].dev.kobj,
+			 kobject_name(&mem_blk->dev.kobj));
+		sysfs_remove_link(&mem_blk->dev.kobj,
+			 kobject_name(&node_devices[nid].dev.kobj));
 	}
 	NODEMASK_FREE(unlinked_nodes);
 	return 0;
@@ -466,7 +463,7 @@ static int link_mem_sections(int nid)
 	}
 
 	if (mem_blk)
-		kobject_put(&mem_blk->sysdev.kobj);
+		kobject_put(&mem_blk->dev.kobj);
 	return err;
 }
 
@@ -594,19 +591,19 @@ static ssize_t print_nodes_state(enum node_states state, char *buf)
 }
 
 struct node_attr {
-	struct sysdev_class_attribute attr;
+	struct device_attribute attr;
 	enum node_states state;
 };
 
-static ssize_t show_node_state(struct sysdev_class *class,
-			       struct sysdev_class_attribute *attr, char *buf)
+static ssize_t show_node_state(struct device *dev,
+			       struct device_attribute *attr, char *buf)
 {
 	struct node_attr *na = container_of(attr, struct node_attr, attr);
 	return print_nodes_state(na->state, buf);
 }
 
 #define _NODE_ATTR(name, state) \
-	{ _SYSDEV_CLASS_ATTR(name, 0444, show_node_state, NULL), state }
+	{ __ATTR(name, 0444, show_node_state, NULL), state }
 
 static struct node_attr node_state_attr[] = {
 	_NODE_ATTR(possible, N_POSSIBLE),
@@ -618,17 +615,26 @@ static struct node_attr node_state_attr[] = {
 #endif
 };
 
-static struct sysdev_class_attribute *node_state_attrs[] = {
-	&node_state_attr[0].attr,
-	&node_state_attr[1].attr,
-	&node_state_attr[2].attr,
-	&node_state_attr[3].attr,
+static struct attribute *node_state_attrs[] = {
+	&node_state_attr[0].attr.attr,
+	&node_state_attr[1].attr.attr,
+	&node_state_attr[2].attr.attr,
+	&node_state_attr[3].attr.attr,
 #ifdef CONFIG_HIGHMEM
-	&node_state_attr[4].attr,
+	&node_state_attr[4].attr.attr,
 #endif
 	NULL
 };
 
+static struct attribute_group memory_root_attr_group = {
+	.attrs = node_state_attrs,
+};
+
+static const struct attribute_group *cpu_root_attr_groups[] = {
+	&memory_root_attr_group,
+	NULL,
+};
+
 #define NODE_CALLBACK_PRI	2	/* lower than SLAB */
 static int __init register_node_type(void)
 {
@@ -637,7 +643,7 @@ static int __init register_node_type(void)
  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
 
-	ret = sysdev_class_register(&node_class);
+	ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
 	if (!ret) {
 		hotplug_memory_notifier(node_memory_callback,
 					NODE_CALLBACK_PRI);
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 935699b30b7c..1ac7f6e405f9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -15,7 +15,6 @@
 #ifndef _LINUX_MEMORY_H_
 #define _LINUX_MEMORY_H_
 
-#include <linux/sysdev.h>
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/mutex.h>
@@ -38,7 +37,7 @@ struct memory_block {
 	int phys_device;		/* to which fru does this belong? */
 	void *hw;			/* optional pointer to fw/hw data */
 	int (*phys_callback)(struct memory_block *);
-	struct sys_device sysdev;
+	struct device dev;
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
diff --git a/include/linux/node.h b/include/linux/node.h
index 92370e22343c..624e53cecc02 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -14,12 +14,12 @@
 #ifndef _LINUX_NODE_H_
 #define _LINUX_NODE_H_
 
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/cpumask.h>
 #include <linux/workqueue.h>
 
 struct node {
-	struct sys_device	sysdev;
+	struct device	dev;
 
 #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
 	struct work_struct	node_work;
@@ -80,6 +80,6 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
 }
 #endif
 
-#define to_node(sys_device) container_of(sys_device, struct node, sysdev)
+#define to_node(device) container_of(device, struct node, dev)
 
 #endif /* _LINUX_NODE_H_ */
diff --git a/mm/compaction.c b/mm/compaction.c
index 899d95638586..1253d7ac332b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -721,23 +721,23 @@ int sysctl_extfrag_handler(struct ctl_table *table, int write,
 }
 
 #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
-ssize_t sysfs_compact_node(struct sys_device *dev,
-			struct sysdev_attribute *attr,
+ssize_t sysfs_compact_node(struct device *dev,
+			struct device_attribute *attr,
 			const char *buf, size_t count)
 {
 	compact_node(dev->id);
 
 	return count;
 }
-static SYSDEV_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
+static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
 
 int compaction_register_node(struct node *node)
 {
-	return sysdev_create_file(&node->sysdev, &attr_compact);
+	return device_create_file(&node->dev, &dev_attr_compact);
 }
 
 void compaction_unregister_node(struct node *node)
 {
-	return sysdev_remove_file(&node->sysdev, &attr_compact);
+	return device_remove_file(&node->dev, &dev_attr_compact);
 }
 #endif /* CONFIG_SYSFS && CONFIG_NUMA */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dae27ba3be2c..ad713e2d61bc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1591,9 +1591,9 @@ static void __init hugetlb_sysfs_init(void)
 
 /*
  * node_hstate/s - associate per node hstate attributes, via their kobjects,
- * with node sysdevs in node_devices[] using a parallel array.  The array
- * index of a node sysdev or _hstate == node id.
- * This is here to avoid any static dependency of the node sysdev driver, in
+ * with node devices in node_devices[] using a parallel array.  The array
+ * index of a node device or _hstate == node id.
+ * This is here to avoid any static dependency of the node device driver, in
  * the base kernel, on the hugetlb module.
  */
 struct node_hstate {
@@ -1603,7 +1603,7 @@ struct node_hstate {
 struct node_hstate node_hstates[MAX_NUMNODES];
 
 /*
- * A subset of global hstate attributes for node sysdevs
+ * A subset of global hstate attributes for node devices
  */
 static struct attribute *per_node_hstate_attrs[] = {
 	&nr_hugepages_attr.attr,
@@ -1617,7 +1617,7 @@ static struct attribute_group per_node_hstate_attr_group = {
 };
 
 /*
- * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj.
+ * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj.
  * Returns node id via non-NULL nidp.
  */
 static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
@@ -1640,13 +1640,13 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
 }
 
 /*
- * Unregister hstate attributes from a single node sysdev.
+ * Unregister hstate attributes from a single node device.
  * No-op if no hstate attributes attached.
  */
 void hugetlb_unregister_node(struct node *node)
 {
 	struct hstate *h;
-	struct node_hstate *nhs = &node_hstates[node->sysdev.id];
+	struct node_hstate *nhs = &node_hstates[node->dev.id];
 
 	if (!nhs->hugepages_kobj)
 		return;		/* no hstate attributes */
@@ -1662,7 +1662,7 @@ void hugetlb_unregister_node(struct node *node)
 }
 
 /*
- * hugetlb module exit:  unregister hstate attributes from node sysdevs
+ * hugetlb module exit:  unregister hstate attributes from node devices
  * that have them.
  */
 static void hugetlb_unregister_all_nodes(void)
@@ -1670,7 +1670,7 @@ static void hugetlb_unregister_all_nodes(void)
 	int nid;
 
 	/*
-	 * disable node sysdev registrations.
+	 * disable node device registrations.
 	 */
 	register_hugetlbfs_with_node(NULL, NULL);
 
@@ -1682,20 +1682,20 @@ static void hugetlb_unregister_all_nodes(void)
 }
 
 /*
- * Register hstate attributes for a single node sysdev.
+ * Register hstate attributes for a single node device.
  * No-op if attributes already registered.
  */
 void hugetlb_register_node(struct node *node)
 {
 	struct hstate *h;
-	struct node_hstate *nhs = &node_hstates[node->sysdev.id];
+	struct node_hstate *nhs = &node_hstates[node->dev.id];
 	int err;
 
 	if (nhs->hugepages_kobj)
 		return;		/* already allocated */
 
 	nhs->hugepages_kobj = kobject_create_and_add("hugepages",
-							&node->sysdev.kobj);
+							&node->dev.kobj);
 	if (!nhs->hugepages_kobj)
 		return;
 
@@ -1706,7 +1706,7 @@ void hugetlb_register_node(struct node *node)
 		if (err) {
 			printk(KERN_ERR "Hugetlb: Unable to add hstate %s"
 					" for node %d\n",
-						h->name, node->sysdev.id);
+						h->name, node->dev.id);
 			hugetlb_unregister_node(node);
 			break;
 		}
@@ -1715,8 +1715,8 @@ void hugetlb_register_node(struct node *node)
 
 /*
  * hugetlb init time:  register hstate attributes for all registered node
- * sysdevs of nodes that have memory.  All on-line nodes should have
- * registered their associated sysdev by this time.
+ * devices of nodes that have memory.  All on-line nodes should have
+ * registered their associated device by this time.
  */
 static void hugetlb_register_all_nodes(void)
 {
@@ -1724,12 +1724,12 @@ static void hugetlb_register_all_nodes(void)
 
 	for_each_node_state(nid, N_HIGH_MEMORY) {
 		struct node *node = &node_devices[nid];
-		if (node->sysdev.id == nid)
+		if (node->dev.id == nid)
 			hugetlb_register_node(node);
 	}
 
 	/*
-	 * Let the node sysdev driver know we're here so it can
+	 * Let the node device driver know we're here so it can
 	 * [un]register hstate attributes on node hotplug.
 	 */
 	register_hugetlbfs_with_node(hugetlb_register_node,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1893c050795..2b4189d759e4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3475,16 +3475,16 @@ int scan_unevictable_handler(struct ctl_table *table, int write,
  * a specified node's per zone unevictable lists for evictable pages.
  */
 
-static ssize_t read_scan_unevictable_node(struct sys_device *dev,
-					  struct sysdev_attribute *attr,
+static ssize_t read_scan_unevictable_node(struct device *dev,
+					  struct device_attribute *attr,
 					  char *buf)
 {
 	warn_scan_unevictable_pages();
 	return sprintf(buf, "0\n");	/* always zero; should fit... */
 }
 
-static ssize_t write_scan_unevictable_node(struct sys_device *dev,
-					   struct sysdev_attribute *attr,
+static ssize_t write_scan_unevictable_node(struct device *dev,
+					   struct device_attribute *attr,
 					const char *buf, size_t count)
 {
 	warn_scan_unevictable_pages();
@@ -3492,17 +3492,17 @@ static ssize_t write_scan_unevictable_node(struct sys_device *dev,
 }
 
 
-static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
 			read_scan_unevictable_node,
 			write_scan_unevictable_node);
 
 int scan_unevictable_register_node(struct node *node)
 {
-	return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
+	return device_create_file(&node->dev, &dev_attr_scan_unevictable_pages);
 }
 
 void scan_unevictable_unregister_node(struct node *node)
 {
-	sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
+	device_remove_file(&node->dev, &dev_attr_scan_unevictable_pages);
 }
 #endif
-- 
cgit v1.2.3


From b3e8d7b2478401b2f25f4566a90faad54f7d6d07 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 21 Dec 2011 15:13:54 -0800
Subject: kobject: remove kset_find_obj_hinted()

Now that there are no in-kernel users of this function, remove it as it
is no longer needed.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h |  2 --
 lib/kobject.c           | 37 -------------------------------------
 2 files changed, 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index ad81e1c51487..fc615a97e2d3 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -191,8 +191,6 @@ static inline struct kobj_type *get_ktype(struct kobject *kobj)
 }
 
 extern struct kobject *kset_find_obj(struct kset *, const char *);
-extern struct kobject *kset_find_obj_hinted(struct kset *, const char *,
-						struct kobject *);
 
 /* The global /sys/kernel/ kobject for people to chain off of */
 extern struct kobject *kernel_kobj;
diff --git a/lib/kobject.c b/lib/kobject.c
index 640bd98a4c8a..c33d7a18d635 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -745,44 +745,12 @@ void kset_unregister(struct kset *k)
  * take a reference and return the object.
  */
 struct kobject *kset_find_obj(struct kset *kset, const char *name)
-{
-	return kset_find_obj_hinted(kset, name, NULL);
-}
-
-/**
- * kset_find_obj_hinted - search for object in kset given a predecessor hint.
- * @kset: kset we're looking in.
- * @name: object's name.
- * @hint: hint to possible object's predecessor.
- *
- * Check the hint's next object and if it is a match return it directly,
- * otherwise, fall back to the behavior of kset_find_obj().  Either way
- * a reference for the returned object is held and the reference on the
- * hinted object is released.
- */
-struct kobject *kset_find_obj_hinted(struct kset *kset, const char *name,
-				     struct kobject *hint)
 {
 	struct kobject *k;
 	struct kobject *ret = NULL;
 
 	spin_lock(&kset->list_lock);
 
-	if (!hint)
-		goto slow_search;
-
-	/* end of list detection */
-	if (hint->entry.next == kset->list.next)
-		goto slow_search;
-
-	k = container_of(hint->entry.next, struct kobject, entry);
-	if (!kobject_name(k) || strcmp(kobject_name(k), name))
-		goto slow_search;
-
-	ret = kobject_get(k);
-	goto unlock_exit;
-
-slow_search:
 	list_for_each_entry(k, &kset->list, entry) {
 		if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
 			ret = kobject_get(k);
@@ -790,12 +758,7 @@ slow_search:
 		}
 	}
 
-unlock_exit:
 	spin_unlock(&kset->list_lock);
-
-	if (hint)
-		kobject_put(hint);
-
 	return ret;
 }
 
-- 
cgit v1.2.3


From e30e2fdfe56288576ee9e04dbb06b4bd5f282203 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 22 Dec 2011 02:45:29 +0530
Subject: VFS: Fix race between CPU hotplug and lglocks

Currently, the *_global_[un]lock_online() routines are not at all synchronized
with CPU hotplug. Soft-lockups detected as a consequence of this race was
reported earlier at https://lkml.org/lkml/2011/8/24/185. (Thanks to Cong Meng
for finding out that the root-cause of this issue is the race condition
between br_write_[un]lock() and CPU hotplug, which results in the lock states
getting messed up).

Fixing this race by just adding {get,put}_online_cpus() at appropriate places
in *_global_[un]lock_online() is not a good option, because, then suddenly
br_write_[un]lock() would become blocking, whereas they have been kept as
non-blocking all this time, and we would want to keep them that way.

So, overall, we want to ensure 3 things:
1. br_write_lock() and br_write_unlock() must remain as non-blocking.
2. The corresponding lock and unlock of the per-cpu spinlocks must not happen
   for different sets of CPUs.
3. Either prevent any new CPU online operation in between this lock-unlock, or
   ensure that the newly onlined CPU does not proceed with its corresponding
   per-cpu spinlock unlocked.

To achieve all this:
(a) We introduce a new spinlock that is taken by the *_global_lock_online()
    routine and released by the *_global_unlock_online() routine.
(b) We register a callback for CPU hotplug notifications, and this callback
    takes the same spinlock as above.
(c) We maintain a bitmap which is close to the cpu_online_mask, and once it is
    initialized in the lock_init() code, all future updates to it are done in
    the callback, under the above spinlock.
(d) The above bitmap is used (instead of cpu_online_mask) while locking and
    unlocking the per-cpu locks.

The callback takes the spinlock upon the CPU_UP_PREPARE event. So, if the
br_write_lock-unlock sequence is in progress, the callback keeps spinning,
thus preventing the CPU online operation till the lock-unlock sequence is
complete. This takes care of requirement (3).

The bitmap that we maintain remains unmodified throughout the lock-unlock
sequence, since all updates to it are managed by the callback, which takes
the same spinlock as the one taken by the lock code and released only by the
unlock routine. Combining this with (d) above, satisfies requirement (2).

Overall, since we use a spinlock (mentioned in (a)) to prevent CPU hotplug
operations from racing with br_write_lock-unlock, requirement (1) is also
taken care of.

By the way, it is to be noted that a CPU offline operation can actually run
in parallel with our lock-unlock sequence, because our callback doesn't react
to notifications earlier than CPU_DEAD (in order to maintain our bitmap
properly). And this means, since we use our own bitmap (which is stale, on
purpose) during the lock-unlock sequence, we could end up unlocking the
per-cpu lock of an offline CPU (because we had locked it earlier, when the
CPU was online), in order to satisfy requirement (2). But this is harmless,
though it looks a bit awkward.

Debugged-by: Cong Meng <mc@linux.vnet.ibm.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
---
 include/linux/lglock.h | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index f549056fb20b..87f402ccec55 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -22,6 +22,7 @@
 #include <linux/spinlock.h>
 #include <linux/lockdep.h>
 #include <linux/percpu.h>
+#include <linux/cpu.h>
 
 /* can make br locks by using local lock for read side, global lock for write */
 #define br_lock_init(name)	name##_lock_init()
@@ -72,9 +73,31 @@
 
 #define DEFINE_LGLOCK(name)						\
 									\
+ DEFINE_SPINLOCK(name##_cpu_lock);					\
+ cpumask_t name##_cpus __read_mostly;					\
  DEFINE_PER_CPU(arch_spinlock_t, name##_lock);				\
  DEFINE_LGLOCK_LOCKDEP(name);						\
 									\
+ static int								\
+ name##_lg_cpu_callback(struct notifier_block *nb,			\
+				unsigned long action, void *hcpu)	\
+ {									\
+	switch (action & ~CPU_TASKS_FROZEN) {				\
+	case CPU_UP_PREPARE:						\
+		spin_lock(&name##_cpu_lock);				\
+		cpu_set((unsigned long)hcpu, name##_cpus);		\
+		spin_unlock(&name##_cpu_lock);				\
+		break;							\
+	case CPU_UP_CANCELED: case CPU_DEAD:				\
+		spin_lock(&name##_cpu_lock);				\
+		cpu_clear((unsigned long)hcpu, name##_cpus);		\
+		spin_unlock(&name##_cpu_lock);				\
+	}								\
+	return NOTIFY_OK;						\
+ }									\
+ static struct notifier_block name##_lg_cpu_notifier = {		\
+	.notifier_call = name##_lg_cpu_callback,			\
+ };									\
  void name##_lock_init(void) {						\
 	int i;								\
 	LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
@@ -83,6 +106,11 @@
 		lock = &per_cpu(name##_lock, i);			\
 		*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;	\
 	}								\
+	register_hotcpu_notifier(&name##_lg_cpu_notifier);		\
+	get_online_cpus();						\
+	for_each_online_cpu(i)						\
+		cpu_set(i, name##_cpus);				\
+	put_online_cpus();						\
  }									\
  EXPORT_SYMBOL(name##_lock_init);					\
 									\
@@ -124,9 +152,9 @@
 									\
  void name##_global_lock_online(void) {					\
 	int i;								\
-	preempt_disable();						\
+	spin_lock(&name##_cpu_lock);					\
 	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
-	for_each_online_cpu(i) {					\
+	for_each_cpu(i, &name##_cpus) {					\
 		arch_spinlock_t *lock;					\
 		lock = &per_cpu(name##_lock, i);			\
 		arch_spin_lock(lock);					\
@@ -137,12 +165,12 @@
  void name##_global_unlock_online(void) {				\
 	int i;								\
 	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
-	for_each_online_cpu(i) {					\
+	for_each_cpu(i, &name##_cpus) {					\
 		arch_spinlock_t *lock;					\
 		lock = &per_cpu(name##_lock, i);			\
 		arch_spin_unlock(lock);					\
 	}								\
-	preempt_enable();						\
+	spin_unlock(&name##_cpu_lock);					\
  }									\
  EXPORT_SYMBOL(name##_global_unlock_online);				\
 									\
-- 
cgit v1.2.3


From 93bcb23b38f634e8fb4ddda0d3f4862fda5cedae Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Wed, 21 Dec 2011 23:00:46 +0800
Subject: regulator: mc13892: add device tree probe support

It adds device tree probe support for mc13892-regulator driver.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/mc13892-regulator.c      | 43 ++++++++++++++++------
 drivers/regulator/mc13xxx-regulator-core.c | 57 ++++++++++++++++++++++++++++++
 drivers/regulator/mc13xxx.h                | 20 +++++++++++
 include/linux/mfd/mc13xxx.h                |  1 +
 4 files changed, 110 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/mc13892-regulator.c b/drivers/regulator/mc13892-regulator.c
index 2824804a2892..46bfa4ae2afd 100644
--- a/drivers/regulator/mc13892-regulator.c
+++ b/drivers/regulator/mc13892-regulator.c
@@ -527,18 +527,27 @@ static int __devinit mc13892_regulator_probe(struct platform_device *pdev)
 	struct mc13xxx *mc13892 = dev_get_drvdata(pdev->dev.parent);
 	struct mc13xxx_regulator_platform_data *pdata =
 		dev_get_platdata(&pdev->dev);
-	struct mc13xxx_regulator_init_data *init_data;
+	struct mc13xxx_regulator_init_data *mc13xxx_data;
 	int i, ret;
+	int num_regulators = 0;
 	u32 val;
 
+	num_regulators = mc13xxx_get_num_regulators_dt(pdev);
+	if (num_regulators <= 0 && pdata)
+		num_regulators = pdata->num_regulators;
+	if (num_regulators <= 0)
+		return -EINVAL;
+
 	priv = kzalloc(sizeof(*priv) +
-		pdata->num_regulators * sizeof(priv->regulators[0]),
+		num_regulators * sizeof(priv->regulators[0]),
 		GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
+	priv->num_regulators = num_regulators;
 	priv->mc13xxx_regulators = mc13892_regulators;
 	priv->mc13xxx = mc13892;
+	platform_set_drvdata(pdev, priv);
 
 	mc13xxx_lock(mc13892);
 	ret = mc13xxx_reg_read(mc13892, MC13892_REVISION, &val);
@@ -569,11 +578,27 @@ static int __devinit mc13892_regulator_probe(struct platform_device *pdev)
 		= mc13892_vcam_set_mode;
 	mc13892_regulators[MC13892_VCAM].desc.ops->get_mode
 		= mc13892_vcam_get_mode;
-	for (i = 0; i < pdata->num_regulators; i++) {
-		init_data = &pdata->regulators[i];
+
+	mc13xxx_data = mc13xxx_parse_regulators_dt(pdev, mc13892_regulators,
+					ARRAY_SIZE(mc13892_regulators));
+	for (i = 0; i < num_regulators; i++) {
+		struct regulator_init_data *init_data;
+		struct regulator_desc *desc;
+		struct device_node *node = NULL;
+		int id;
+
+		if (mc13xxx_data) {
+			id = mc13xxx_data[i].id;
+			init_data = mc13xxx_data[i].init_data;
+			node = mc13xxx_data[i].node;
+		} else {
+			id = pdata->regulators[i].id;
+			init_data = pdata->regulators[i].init_data;
+		}
+		desc = &mc13892_regulators[id].desc;
+
 		priv->regulators[i] = regulator_register(
-			&mc13892_regulators[init_data->id].desc,
-			&pdev->dev, init_data->init_data, priv, NULL);
+			desc, &pdev->dev, init_data, priv, node);
 
 		if (IS_ERR(priv->regulators[i])) {
 			dev_err(&pdev->dev, "failed to register regulator %s\n",
@@ -583,8 +608,6 @@ static int __devinit mc13892_regulator_probe(struct platform_device *pdev)
 		}
 	}
 
-	platform_set_drvdata(pdev, priv);
-
 	return 0;
 err:
 	while (--i >= 0)
@@ -600,13 +623,11 @@ err_free:
 static int __devexit mc13892_regulator_remove(struct platform_device *pdev)
 {
 	struct mc13xxx_regulator_priv *priv = platform_get_drvdata(pdev);
-	struct mc13xxx_regulator_platform_data *pdata =
-		dev_get_platdata(&pdev->dev);
 	int i;
 
 	platform_set_drvdata(pdev, NULL);
 
-	for (i = 0; i < pdata->num_regulators; i++)
+	for (i = 0; i < priv->num_regulators; i++)
 		regulator_unregister(priv->regulators[i]);
 
 	kfree(priv);
diff --git a/drivers/regulator/mc13xxx-regulator-core.c b/drivers/regulator/mc13xxx-regulator-core.c
index 6532853a6ef5..80ecafef1bc3 100644
--- a/drivers/regulator/mc13xxx-regulator-core.c
+++ b/drivers/regulator/mc13xxx-regulator-core.c
@@ -18,12 +18,14 @@
 #include <linux/mfd/mc13xxx.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/driver.h>
+#include <linux/regulator/of_regulator.h>
 #include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include "mc13xxx.h"
 
 static int mc13xxx_regulator_enable(struct regulator_dev *rdev)
@@ -236,6 +238,61 @@ int mc13xxx_sw_regulator_is_enabled(struct regulator_dev *rdev)
 }
 EXPORT_SYMBOL_GPL(mc13xxx_sw_regulator_is_enabled);
 
+#ifdef CONFIG_OF
+int __devinit mc13xxx_get_num_regulators_dt(struct platform_device *pdev)
+{
+	struct device_node *parent, *child;
+	int num = 0;
+
+	of_node_get(pdev->dev.parent->of_node);
+	parent = of_find_node_by_name(pdev->dev.parent->of_node, "regulators");
+	if (!parent)
+		return -ENODEV;
+
+	for_each_child_of_node(parent, child)
+		num++;
+
+	return num;
+}
+
+struct mc13xxx_regulator_init_data * __devinit mc13xxx_parse_regulators_dt(
+	struct platform_device *pdev, struct mc13xxx_regulator *regulators,
+	int num_regulators)
+{
+	struct mc13xxx_regulator_priv *priv = platform_get_drvdata(pdev);
+	struct mc13xxx_regulator_init_data *data, *p;
+	struct device_node *parent, *child;
+	int i;
+
+	of_node_get(pdev->dev.parent->of_node);
+	parent = of_find_node_by_name(pdev->dev.parent->of_node, "regulators");
+	if (!parent)
+		return NULL;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data) * priv->num_regulators,
+			    GFP_KERNEL);
+	if (!data)
+		return NULL;
+	p = data;
+
+	for_each_child_of_node(parent, child) {
+		for (i = 0; i < num_regulators; i++) {
+			if (!of_node_cmp(child->name,
+					 regulators[i].desc.name)) {
+				p->id = i;
+				p->init_data = of_get_regulator_init_data(
+							&pdev->dev, child);
+				p->node = child;
+				p++;
+				break;
+			}
+		}
+	}
+
+	return data;
+}
+#endif
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Yong Shen <yong.shen@linaro.org>");
 MODULE_DESCRIPTION("Regulator Driver for Freescale MC13xxx PMIC");
diff --git a/drivers/regulator/mc13xxx.h b/drivers/regulator/mc13xxx.h
index 75e383226a87..b3961c658b05 100644
--- a/drivers/regulator/mc13xxx.h
+++ b/drivers/regulator/mc13xxx.h
@@ -29,6 +29,7 @@ struct mc13xxx_regulator_priv {
 	struct mc13xxx *mc13xxx;
 	u32 powermisc_pwgt_state;
 	struct mc13xxx_regulator *mc13xxx_regulators;
+	int num_regulators;
 	struct regulator_dev *regulators[];
 };
 
@@ -42,6 +43,25 @@ extern int mc13xxx_fixed_regulator_set_voltage(struct regulator_dev *rdev,
 		int min_uV, int max_uV, unsigned *selector);
 extern int mc13xxx_fixed_regulator_get_voltage(struct regulator_dev *rdev);
 
+#ifdef CONFIG_OF
+extern int mc13xxx_get_num_regulators_dt(struct platform_device *pdev);
+extern struct mc13xxx_regulator_init_data *mc13xxx_parse_regulators_dt(
+	struct platform_device *pdev, struct mc13xxx_regulator *regulators,
+	int num_regulators);
+#else
+static inline int mc13xxx_get_num_regulators_dt(struct platform_device *pdev)
+{
+	return -ENODEV;
+}
+
+static inline struct mc13xxx_regulator_init_data *mc13xxx_parse_regulators_dt(
+	struct platform_device *pdev, struct mc13xxx_regulator *regulators,
+	int num_regulators)
+{
+	return NULL;
+}
+#endif
+
 extern struct regulator_ops mc13xxx_regulator_ops;
 extern struct regulator_ops mc13xxx_fixed_regulator_ops;
 
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index 3816c2fac0ad..a98e2a316d1f 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -69,6 +69,7 @@ struct regulator_init_data;
 struct mc13xxx_regulator_init_data {
 	int id;
 	struct regulator_init_data *init_data;
+	struct device_node *node;
 };
 
 struct mc13xxx_regulator_platform_data {
-- 
cgit v1.2.3


From 933393f58fef9963eac61db8093689544e29a600 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Thu, 22 Dec 2011 11:58:51 -0600
Subject: percpu: Remove irqsafe_cpu_xxx variants

We simply say that regular this_cpu use must be safe regardless of
preemption and interrupt state.  That has no material change for x86
and s390 implementations of this_cpu operations.  However, arches that
do not provide their own implementation for this_cpu operations will
now get code generated that disables interrupts instead of preemption.

-tj: This is part of on-going percpu API cleanup.  For detailed
     discussion of the subject, please refer to the following thread.

     http://thread.gmane.org/gmane.linux.kernel/1222078

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <alpine.DEB.2.00.1112221154380.11787@router.home>
---
 arch/s390/include/asm/percpu.h     |  44 ++++-----
 arch/x86/include/asm/percpu.h      |  28 ------
 include/linux/netdevice.h          |   4 +-
 include/linux/netfilter/x_tables.h |   4 +-
 include/linux/percpu.h             | 190 +++++--------------------------------
 include/net/snmp.h                 |  14 +--
 mm/slub.c                          |   6 +-
 net/caif/caif_dev.c                |   4 +-
 net/caif/cffrml.c                  |   4 +-
 9 files changed, 62 insertions(+), 236 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 5325c89a5843..0fbd1899c7b0 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -19,7 +19,7 @@
 #define ARCH_NEEDS_WEAK_PER_CPU
 #endif
 
-#define arch_irqsafe_cpu_to_op(pcp, val, op)				\
+#define arch_this_cpu_to_op(pcp, val, op)				\
 do {									\
 	typedef typeof(pcp) pcp_op_T__;					\
 	pcp_op_T__ old__, new__, prev__;				\
@@ -41,27 +41,27 @@ do {									\
 	preempt_enable();						\
 } while (0)
 
-#define irqsafe_cpu_add_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +)
-#define irqsafe_cpu_add_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +)
-#define irqsafe_cpu_add_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +)
-#define irqsafe_cpu_add_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +)
+#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +)
 
-#define irqsafe_cpu_and_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &)
-#define irqsafe_cpu_and_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &)
-#define irqsafe_cpu_and_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &)
-#define irqsafe_cpu_and_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &)
+#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &)
+#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &)
+#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &)
+#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, &)
 
-#define irqsafe_cpu_or_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |)
-#define irqsafe_cpu_or_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |)
-#define irqsafe_cpu_or_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |)
-#define irqsafe_cpu_or_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |)
+#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op(pcp, val, |)
+#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op(pcp, val, |)
+#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, |)
+#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, |)
 
-#define irqsafe_cpu_xor_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^)
-#define irqsafe_cpu_xor_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^)
-#define irqsafe_cpu_xor_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^)
-#define irqsafe_cpu_xor_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^)
+#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
+#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
+#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
+#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
 
-#define arch_irqsafe_cpu_cmpxchg(pcp, oval, nval)			\
+#define arch_this_cpu_cmpxchg(pcp, oval, nval)			\
 ({									\
 	typedef typeof(pcp) pcp_op_T__;					\
 	pcp_op_T__ ret__;						\
@@ -79,10 +79,10 @@ do {									\
 	ret__;								\
 })
 
-#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval)
-#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval)
-#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval)
-#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
 
 #include <asm-generic/percpu.h>
 
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 3470c9d0ebba..562ccb5323de 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -414,22 +414,6 @@ do {									\
 #define this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
 #define this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
 
-#define irqsafe_cpu_add_1(pcp, val)	percpu_add_op((pcp), val)
-#define irqsafe_cpu_add_2(pcp, val)	percpu_add_op((pcp), val)
-#define irqsafe_cpu_add_4(pcp, val)	percpu_add_op((pcp), val)
-#define irqsafe_cpu_and_1(pcp, val)	percpu_to_op("and", (pcp), val)
-#define irqsafe_cpu_and_2(pcp, val)	percpu_to_op("and", (pcp), val)
-#define irqsafe_cpu_and_4(pcp, val)	percpu_to_op("and", (pcp), val)
-#define irqsafe_cpu_or_1(pcp, val)	percpu_to_op("or", (pcp), val)
-#define irqsafe_cpu_or_2(pcp, val)	percpu_to_op("or", (pcp), val)
-#define irqsafe_cpu_or_4(pcp, val)	percpu_to_op("or", (pcp), val)
-#define irqsafe_cpu_xor_1(pcp, val)	percpu_to_op("xor", (pcp), val)
-#define irqsafe_cpu_xor_2(pcp, val)	percpu_to_op("xor", (pcp), val)
-#define irqsafe_cpu_xor_4(pcp, val)	percpu_to_op("xor", (pcp), val)
-#define irqsafe_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
-
 #ifndef CONFIG_M386
 #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
 #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
@@ -445,9 +429,6 @@ do {									\
 #define this_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
 #define this_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
 
-#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
 #endif /* !CONFIG_M386 */
 
 #ifdef CONFIG_X86_CMPXCHG64
@@ -467,7 +448,6 @@ do {									\
 
 #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
 #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
-#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)	percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
 #endif /* CONFIG_X86_CMPXCHG64 */
 
 /*
@@ -495,13 +475,6 @@ do {									\
 #define this_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
 #define this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
 
-#define irqsafe_cpu_add_8(pcp, val)	percpu_add_op((pcp), val)
-#define irqsafe_cpu_and_8(pcp, val)	percpu_to_op("and", (pcp), val)
-#define irqsafe_cpu_or_8(pcp, val)	percpu_to_op("or", (pcp), val)
-#define irqsafe_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val)
-#define irqsafe_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-
 /*
  * Pretty complex macro to generate cmpxchg16 instruction.  The instruction
  * is not supported on early AMD64 processors so we must be able to emulate
@@ -532,7 +505,6 @@ do {									\
 
 #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
 #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
-#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)	percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
 
 #endif
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a82ad4dd306a..ca8d9bc4e502 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2115,7 +2115,7 @@ extern void netdev_run_todo(void);
  */
 static inline void dev_put(struct net_device *dev)
 {
-	irqsafe_cpu_dec(*dev->pcpu_refcnt);
+	this_cpu_dec(*dev->pcpu_refcnt);
 }
 
 /**
@@ -2126,7 +2126,7 @@ static inline void dev_put(struct net_device *dev)
  */
 static inline void dev_hold(struct net_device *dev)
 {
-	irqsafe_cpu_inc(*dev->pcpu_refcnt);
+	this_cpu_inc(*dev->pcpu_refcnt);
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 32cddf78b13e..8d674a786744 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -471,7 +471,7 @@ DECLARE_PER_CPU(seqcount_t, xt_recseq);
  *
  * Begin packet processing : all readers must wait the end
  * 1) Must be called with preemption disabled
- * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add())
+ * 2) softirqs must be disabled too (or we should use this_cpu_add())
  * Returns :
  *  1 if no recursion on this cpu
  *  0 if recursion detected
@@ -503,7 +503,7 @@ static inline unsigned int xt_write_recseq_begin(void)
  *
  * End packet processing : all readers can proceed
  * 1) Must be called with preemption disabled
- * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add())
+ * 2) softirqs must be disabled too (or we should use this_cpu_add())
  */
 static inline void xt_write_recseq_end(unsigned int addend)
 {
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9ca008f0c542..32cd1f67462e 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -172,10 +172,10 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
  * equal char, int or long.  percpu_read() evaluates to a lvalue and
  * all others to void.
  *
- * These operations are guaranteed to be atomic w.r.t. preemption.
- * The generic versions use plain get/put_cpu_var().  Archs are
+ * These operations are guaranteed to be atomic.
+ * The generic versions disable interrupts.  Archs are
  * encouraged to implement single-instruction alternatives which don't
- * require preemption protection.
+ * require protection.
  */
 #ifndef percpu_read
 # define percpu_read(var)						\
@@ -347,9 +347,10 @@ do {									\
 
 #define _this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
-	preempt_disable();						\
+	unsigned long flags;						\
+	local_irq_save(flags);						\
 	*__this_cpu_ptr(&(pcp)) op val;					\
-	preempt_enable();						\
+	local_irq_restore(flags);					\
 } while (0)
 
 #ifndef this_cpu_write
@@ -447,10 +448,11 @@ do {									\
 #define _this_cpu_generic_add_return(pcp, val)				\
 ({									\
 	typeof(pcp) ret__;						\
-	preempt_disable();						\
+	unsigned long flags;						\
+	local_irq_save(flags);						\
 	__this_cpu_add(pcp, val);					\
 	ret__ = __this_cpu_read(pcp);					\
-	preempt_enable();						\
+	local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -476,10 +478,11 @@ do {									\
 
 #define _this_cpu_generic_xchg(pcp, nval)				\
 ({	typeof(pcp) ret__;						\
-	preempt_disable();						\
+	unsigned long flags;						\
+	local_irq_save(flags);						\
 	ret__ = __this_cpu_read(pcp);					\
 	__this_cpu_write(pcp, nval);					\
-	preempt_enable();						\
+	local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -501,12 +504,14 @@ do {									\
 #endif
 
 #define _this_cpu_generic_cmpxchg(pcp, oval, nval)			\
-({	typeof(pcp) ret__;						\
-	preempt_disable();						\
+({									\
+	typeof(pcp) ret__;						\
+	unsigned long flags;						\
+	local_irq_save(flags);						\
 	ret__ = __this_cpu_read(pcp);					\
 	if (ret__ == (oval))						\
 		__this_cpu_write(pcp, nval);				\
-	preempt_enable();						\
+	local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -538,10 +543,11 @@ do {									\
 #define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 ({									\
 	int ret__;							\
-	preempt_disable();						\
+	unsigned long flags;						\
+	local_irq_save(flags);						\
 	ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
 			oval1, oval2, nval1, nval2);			\
-	preempt_enable();						\
+	local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -567,9 +573,9 @@ do {									\
 #endif
 
 /*
- * Generic percpu operations that do not require preemption handling.
+ * Generic percpu operations for context that are safe from preemption/interrupts.
  * Either we do not care about races or the caller has the
- * responsibility of handling preemptions issues. Arch code can still
+ * responsibility of handling preemption/interrupt issues. Arch code can still
  * override these instructions since the arch per cpu code may be more
  * efficient and may actually get race freeness for free (that is the
  * case for x86 for example).
@@ -802,156 +808,4 @@ do {									\
 	__pcpu_double_call_return_bool(__this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
 #endif
 
-/*
- * IRQ safe versions of the per cpu RMW operations. Note that these operations
- * are *not* safe against modification of the same variable from another
- * processors (which one gets when using regular atomic operations)
- * They are guaranteed to be atomic vs. local interrupts and
- * preemption only.
- */
-#define irqsafe_cpu_generic_to_op(pcp, val, op)				\
-do {									\
-	unsigned long flags;						\
-	local_irq_save(flags);						\
-	*__this_cpu_ptr(&(pcp)) op val;					\
-	local_irq_restore(flags);					\
-} while (0)
-
-#ifndef irqsafe_cpu_add
-# ifndef irqsafe_cpu_add_1
-#  define irqsafe_cpu_add_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef irqsafe_cpu_add_2
-#  define irqsafe_cpu_add_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef irqsafe_cpu_add_4
-#  define irqsafe_cpu_add_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef irqsafe_cpu_add_8
-#  define irqsafe_cpu_add_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# define irqsafe_cpu_add(pcp, val) __pcpu_size_call(irqsafe_cpu_add_, (pcp), (val))
-#endif
-
-#ifndef irqsafe_cpu_sub
-# define irqsafe_cpu_sub(pcp, val)	irqsafe_cpu_add((pcp), -(val))
-#endif
-
-#ifndef irqsafe_cpu_inc
-# define irqsafe_cpu_inc(pcp)	irqsafe_cpu_add((pcp), 1)
-#endif
-
-#ifndef irqsafe_cpu_dec
-# define irqsafe_cpu_dec(pcp)	irqsafe_cpu_sub((pcp), 1)
-#endif
-
-#ifndef irqsafe_cpu_and
-# ifndef irqsafe_cpu_and_1
-#  define irqsafe_cpu_and_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef irqsafe_cpu_and_2
-#  define irqsafe_cpu_and_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef irqsafe_cpu_and_4
-#  define irqsafe_cpu_and_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef irqsafe_cpu_and_8
-#  define irqsafe_cpu_and_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# define irqsafe_cpu_and(pcp, val) __pcpu_size_call(irqsafe_cpu_and_, (val))
-#endif
-
-#ifndef irqsafe_cpu_or
-# ifndef irqsafe_cpu_or_1
-#  define irqsafe_cpu_or_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef irqsafe_cpu_or_2
-#  define irqsafe_cpu_or_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef irqsafe_cpu_or_4
-#  define irqsafe_cpu_or_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef irqsafe_cpu_or_8
-#  define irqsafe_cpu_or_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# define irqsafe_cpu_or(pcp, val) __pcpu_size_call(irqsafe_cpu_or_, (val))
-#endif
-
-#ifndef irqsafe_cpu_xor
-# ifndef irqsafe_cpu_xor_1
-#  define irqsafe_cpu_xor_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef irqsafe_cpu_xor_2
-#  define irqsafe_cpu_xor_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef irqsafe_cpu_xor_4
-#  define irqsafe_cpu_xor_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef irqsafe_cpu_xor_8
-#  define irqsafe_cpu_xor_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# define irqsafe_cpu_xor(pcp, val) __pcpu_size_call(irqsafe_cpu_xor_, (val))
-#endif
-
-#define irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)			\
-({									\
-	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	local_irq_save(flags);						\
-	ret__ = __this_cpu_read(pcp);					\
-	if (ret__ == (oval))						\
-		__this_cpu_write(pcp, nval);				\
-	local_irq_restore(flags);					\
-	ret__;								\
-})
-
-#ifndef irqsafe_cpu_cmpxchg
-# ifndef irqsafe_cpu_cmpxchg_1
-#  define irqsafe_cpu_cmpxchg_1(pcp, oval, nval)	irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef irqsafe_cpu_cmpxchg_2
-#  define irqsafe_cpu_cmpxchg_2(pcp, oval, nval)	irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef irqsafe_cpu_cmpxchg_4
-#  define irqsafe_cpu_cmpxchg_4(pcp, oval, nval)	irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef irqsafe_cpu_cmpxchg_8
-#  define irqsafe_cpu_cmpxchg_8(pcp, oval, nval)	irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# define irqsafe_cpu_cmpxchg(pcp, oval, nval)		\
-	__pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval)
-#endif
-
-#define irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-({									\
-	int ret__;							\
-	unsigned long flags;						\
-	local_irq_save(flags);						\
-	ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
-			oval1, oval2, nval1, nval2);			\
-	local_irq_restore(flags);					\
-	ret__;								\
-})
-
-#ifndef irqsafe_cpu_cmpxchg_double
-# ifndef irqsafe_cpu_cmpxchg_double_1
-#  define irqsafe_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef irqsafe_cpu_cmpxchg_double_2
-#  define irqsafe_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef irqsafe_cpu_cmpxchg_double_4
-#  define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef irqsafe_cpu_cmpxchg_double_8
-#  define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	__pcpu_double_call_return_bool(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
-#endif
-
 #endif /* __LINUX_PERCPU_H */
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 8f0f9ac0307f..e067aed7e378 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -129,33 +129,33 @@ struct linux_xfrm_mib {
 			__this_cpu_inc(mib[0]->mibs[field])
 
 #define SNMP_INC_STATS_USER(mib, field)	\
-			irqsafe_cpu_inc(mib[0]->mibs[field])
+			this_cpu_inc(mib[0]->mibs[field])
 
 #define SNMP_INC_STATS_ATOMIC_LONG(mib, field)	\
 			atomic_long_inc(&mib->mibs[field])
 
 #define SNMP_INC_STATS(mib, field)	\
-			irqsafe_cpu_inc(mib[0]->mibs[field])
+			this_cpu_inc(mib[0]->mibs[field])
 
 #define SNMP_DEC_STATS(mib, field)	\
-			irqsafe_cpu_dec(mib[0]->mibs[field])
+			this_cpu_dec(mib[0]->mibs[field])
 
 #define SNMP_ADD_STATS_BH(mib, field, addend)	\
 			__this_cpu_add(mib[0]->mibs[field], addend)
 
 #define SNMP_ADD_STATS_USER(mib, field, addend)	\
-			irqsafe_cpu_add(mib[0]->mibs[field], addend)
+			this_cpu_add(mib[0]->mibs[field], addend)
 
 #define SNMP_ADD_STATS(mib, field, addend)	\
-			irqsafe_cpu_add(mib[0]->mibs[field], addend)
+			this_cpu_add(mib[0]->mibs[field], addend)
 /*
  * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr"
  * to make @ptr a non-percpu pointer.
  */
 #define SNMP_UPD_PO_STATS(mib, basefield, addend)	\
 	do { \
-		irqsafe_cpu_inc(mib[0]->mibs[basefield##PKTS]);		\
-		irqsafe_cpu_add(mib[0]->mibs[basefield##OCTETS], addend);	\
+		this_cpu_inc(mib[0]->mibs[basefield##PKTS]);		\
+		this_cpu_add(mib[0]->mibs[basefield##OCTETS], addend);	\
 	} while (0)
 #define SNMP_UPD_PO_STATS_BH(mib, basefield, addend)	\
 	do { \
diff --git a/mm/slub.c b/mm/slub.c
index ed3334d9b6da..0011489c28ac 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1978,7 +1978,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 		page->pobjects = pobjects;
 		page->next = oldpage;
 
-	} while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
+	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
 	stat(s, CPU_PARTIAL_FREE);
 	return pobjects;
 }
@@ -2304,7 +2304,7 @@ redo:
 		 * Since this is without lock semantics the protection is only against
 		 * code executing on this cpu *not* from access by other cpus.
 		 */
-		if (unlikely(!irqsafe_cpu_cmpxchg_double(
+		if (unlikely(!this_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				object, tid,
 				get_freepointer_safe(s, object), next_tid(tid)))) {
@@ -2534,7 +2534,7 @@ redo:
 	if (likely(page == c->page)) {
 		set_freepointer(s, object, c->freelist);
 
-		if (unlikely(!irqsafe_cpu_cmpxchg_double(
+		if (unlikely(!this_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				c->freelist, tid,
 				object, next_tid(tid)))) {
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index f1fa1f6e658d..64930cc2746a 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -69,12 +69,12 @@ static struct caif_device_entry_list *caif_device_list(struct net *net)
 
 static void caifd_put(struct caif_device_entry *e)
 {
-	irqsafe_cpu_dec(*e->pcpu_refcnt);
+	this_cpu_dec(*e->pcpu_refcnt);
 }
 
 static void caifd_hold(struct caif_device_entry *e)
 {
-	irqsafe_cpu_inc(*e->pcpu_refcnt);
+	this_cpu_inc(*e->pcpu_refcnt);
 }
 
 static int caifd_refcnt_read(struct caif_device_entry *e)
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index d3ca87bf23b7..0a7df7ef062d 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -177,14 +177,14 @@ void cffrml_put(struct cflayer *layr)
 {
 	struct cffrml *this = container_obj(layr);
 	if (layr != NULL && this->pcpu_refcnt != NULL)
-		irqsafe_cpu_dec(*this->pcpu_refcnt);
+		this_cpu_dec(*this->pcpu_refcnt);
 }
 
 void cffrml_hold(struct cflayer *layr)
 {
 	struct cffrml *this = container_obj(layr);
 	if (layr != NULL && this->pcpu_refcnt != NULL)
-		irqsafe_cpu_inc(*this->pcpu_refcnt);
+		this_cpu_inc(*this->pcpu_refcnt);
 }
 
 int cffrml_refcnt_read(struct cflayer *layr)
-- 
cgit v1.2.3


From 38059ec2bd2ce9e4709f49f34795aa0944287908 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 23 Dec 2011 10:17:51 +1100
Subject: md: Fix userspace free_pages() macro

While using etags to find free_pages(), I stumbled across this debug
definition of free_pages() that is to be used while debugging some raid
code in userspace. The __get_free_pages() allocates the correct size,
but the free_pages() does not match. free_pages(), like
__get_free_pages(), takes an order and not a size.

Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/pq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 2b59cc824395..53272e9860a7 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -132,7 +132,7 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
 						     PROT_READ|PROT_WRITE,   \
 						     MAP_PRIVATE|MAP_ANONYMOUS,\
 						     0, 0))
-# define free_pages(x, y)	munmap((void *)(x), (y)*PAGE_SIZE)
+# define free_pages(x, y)	munmap((void *)(x), PAGE_SIZE << (y))
 
 static inline void cpu_relax(void)
 {
-- 
cgit v1.2.3


From 2d78f8c451785f030ac1676a18691896b59c69d8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 23 Dec 2011 10:17:51 +1100
Subject: md: create externally visible flags for supporting hot-replace.

hot-replace is a feature being added to md which will allow a
device to be replaced without removing it from the array first.

With hot-replace a spare can be activated and recovery can start while
the original device is still in place, thus allowing a transition from
an unreliable device to a reliable device without leaving the array
degraded during the transition.  It can also be use when the original
device is still reliable but it not wanted for some reason.

This will eventually be supported in RAID4/5/6 and RAID10.

This patch adds a super-block flag to distinguish the replacement
device.  If an old kernel sees this flag it will reject the device.

It also adds two per-device flags which are viewable and settable via
sysfs.
   "want_replacement" can be set to request that a device be replaced.
   "replacement" is set to show that this device is replacing another
   device.

The "rd%d" links in /sys/block/mdXx/md only apply to the original
device, not the replacement.  We currently don't make links for the
replacement - there doesn't seem to be a need.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 Documentation/md.txt      | 22 ++++++++++---
 drivers/md/md.c           | 55 +++++++++++++++++++++++++++++++-
 drivers/md/md.h           | 80 ++++++++++++++++++++++++++++-------------------
 include/linux/raid/md_p.h |  7 +++--
 4 files changed, 125 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/md.txt b/Documentation/md.txt
index fc94770f44ab..993fba37b7d1 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -357,14 +357,14 @@ Each directory contains:
         written to, that device.
 
       state
-        A file recording the current state of the device in the array
+	A file recording the current state of the device in the array
 	which can be a comma separated list of
 	      faulty   - device has been kicked from active use due to
-                         a detected fault or it has unacknowledged bad
-                         blocks
+			 a detected fault, or it has unacknowledged bad
+			 blocks
 	      in_sync  - device is a fully in-sync member of the array
 	      writemostly - device will only be subject to read
-		         requests if there are no other options.
+			 requests if there are no other options.
 			 This applies only to raid1 arrays.
 	      blocked  - device has failed, and the failure hasn't been
 			 acknowledged yet by the metadata handler.
@@ -374,6 +374,13 @@ Each directory contains:
 			 This includes spares that are in the process
 			 of being recovered to
 	      write_error - device has ever seen a write error.
+	      want_replacement - device is (mostly) working but probably
+			 should be replaced, either due to errors or
+			 due to user request.
+	      replacement - device is a replacement for another active
+			 device with same raid_disk.
+
+
 	This list may grow in future.
 	This can be written to.
 	Writing "faulty"  simulates a failure on the device.
@@ -386,6 +393,13 @@ Each directory contains:
 	Writing "in_sync" sets the in_sync flag.
 	Writing "write_error" sets writeerrorseen flag.
 	Writing "-write_error" clears writeerrorseen flag.
+	Writing "want_replacement" is allowed at any time except to a
+		replacement device or a spare.  It sets the flag.
+	Writing "-want_replacement" is allowed at any time.  It clears
+		the flag.
+	Writing "replacement" or "-replacement" is only allowed before
+		starting the array.  It sets or clears the flag.
+
 
 	This file responds to select/poll. Any change to 'faulty'
 	or 'blocked' causes an event.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0e2288824938..be569eb41a93 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1714,6 +1714,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
 		}
 		if (sb->devflags & WriteMostly1)
 			set_bit(WriteMostly, &rdev->flags);
+		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
+			set_bit(Replacement, &rdev->flags);
 	} else /* MULTIPATH are always insync */
 		set_bit(In_sync, &rdev->flags);
 
@@ -1767,6 +1769,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
 		sb->recovery_offset =
 			cpu_to_le64(rdev->recovery_offset);
 	}
+	if (test_bit(Replacement, &rdev->flags))
+		sb->feature_map |=
+			cpu_to_le32(MD_FEATURE_REPLACEMENT);
 
 	if (mddev->reshape_position != MaxSector) {
 		sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
@@ -2560,6 +2565,15 @@ state_show(struct md_rdev *rdev, char *page)
 		len += sprintf(page+len, "%swrite_error", sep);
 		sep = ",";
 	}
+	if (test_bit(WantReplacement, &rdev->flags)) {
+		len += sprintf(page+len, "%swant_replacement", sep);
+		sep = ",";
+	}
+	if (test_bit(Replacement, &rdev->flags)) {
+		len += sprintf(page+len, "%sreplacement", sep);
+		sep = ",";
+	}
+
 	return len+sprintf(page+len, "\n");
 }
 
@@ -2628,6 +2642,42 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
 	} else if (cmd_match(buf, "-write_error")) {
 		clear_bit(WriteErrorSeen, &rdev->flags);
 		err = 0;
+	} else if (cmd_match(buf, "want_replacement")) {
+		/* Any non-spare device that is not a replacement can
+		 * become want_replacement at any time, but we then need to
+		 * check if recovery is needed.
+		 */
+		if (rdev->raid_disk >= 0 &&
+		    !test_bit(Replacement, &rdev->flags))
+			set_bit(WantReplacement, &rdev->flags);
+		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+		md_wakeup_thread(rdev->mddev->thread);
+		err = 0;
+	} else if (cmd_match(buf, "-want_replacement")) {
+		/* Clearing 'want_replacement' is always allowed.
+		 * Once replacements starts it is too late though.
+		 */
+		err = 0;
+		clear_bit(WantReplacement, &rdev->flags);
+	} else if (cmd_match(buf, "replacement")) {
+		/* Can only set a device as a replacement when array has not
+		 * yet been started.  Once running, replacement is automatic
+		 * from spares, or by assigning 'slot'.
+		 */
+		if (rdev->mddev->pers)
+			err = -EBUSY;
+		else {
+			set_bit(Replacement, &rdev->flags);
+			err = 0;
+		}
+	} else if (cmd_match(buf, "-replacement")) {
+		/* Similarly, can only clear Replacement before start */
+		if (rdev->mddev->pers)
+			err = -EBUSY;
+		else {
+			clear_bit(Replacement, &rdev->flags);
+			err = 0;
+		}
 	}
 	if (!err)
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -6717,8 +6767,11 @@ static int md_seq_show(struct seq_file *seq, void *v)
 			if (test_bit(Faulty, &rdev->flags)) {
 				seq_printf(seq, "(F)");
 				continue;
-			} else if (rdev->raid_disk < 0)
+			}
+			if (rdev->raid_disk < 0)
 				seq_printf(seq, "(S)"); /* spare */
+			if (test_bit(Replacement, &rdev->flags))
+				seq_printf(seq, "(R)");
 			sectors += rdev->sectors;
 		}
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 670c10e6b484..44c63dfeeb2b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -72,34 +72,7 @@ struct md_rdev {
 	 * This reduces the burden of testing multiple flags in many cases
 	 */
 
-	unsigned long	flags;
-#define	Faulty		1		/* device is known to have a fault */
-#define	In_sync		2		/* device is in_sync with rest of array */
-#define	WriteMostly	4		/* Avoid reading if at all possible */
-#define	AutoDetected	7		/* added by auto-detect */
-#define Blocked		8		/* An error occurred but has not yet
-					 * been acknowledged by the metadata
-					 * handler, so don't allow writes
-					 * until it is cleared */
-#define WriteErrorSeen	9		/* A write error has been seen on this
-					 * device
-					 */
-#define FaultRecorded	10		/* Intermediate state for clearing
-					 * Blocked.  The Fault is/will-be
-					 * recorded in the metadata, but that
-					 * metadata hasn't been stored safely
-					 * on disk yet.
-					 */
-#define BlockedBadBlocks 11		/* A writer is blocked because they
-					 * found an unacknowledged bad-block.
-					 * This can safely be cleared at any
-					 * time, and the writer will re-check.
-					 * It may be set at any time, and at
-					 * worst the writer will timeout and
-					 * re-check.  So setting it as
-					 * accurately as possible is good, but
-					 * not absolutely critical.
-					 */
+	unsigned long	flags;	/* bit set of 'enum flag_bits' bits. */
 	wait_queue_head_t blocked_wait;
 
 	int desc_nr;			/* descriptor index in the superblock */
@@ -152,6 +125,44 @@ struct md_rdev {
 		sector_t size;		/* in sectors */
 	} badblocks;
 };
+enum flag_bits {
+	Faulty,			/* device is known to have a fault */
+	In_sync,		/* device is in_sync with rest of array */
+	WriteMostly,		/* Avoid reading if at all possible */
+	AutoDetected,		/* added by auto-detect */
+	Blocked,		/* An error occurred but has not yet
+				 * been acknowledged by the metadata
+				 * handler, so don't allow writes
+				 * until it is cleared */
+	WriteErrorSeen,		/* A write error has been seen on this
+				 * device
+				 */
+	FaultRecorded,		/* Intermediate state for clearing
+				 * Blocked.  The Fault is/will-be
+				 * recorded in the metadata, but that
+				 * metadata hasn't been stored safely
+				 * on disk yet.
+				 */
+	BlockedBadBlocks,	/* A writer is blocked because they
+				 * found an unacknowledged bad-block.
+				 * This can safely be cleared at any
+				 * time, and the writer will re-check.
+				 * It may be set at any time, and at
+				 * worst the writer will timeout and
+				 * re-check.  So setting it as
+				 * accurately as possible is good, but
+				 * not absolutely critical.
+				 */
+	WantReplacement,	/* This device is a candidate to be
+				 * hot-replaced, either because it has
+				 * reported some faults, or because
+				 * of explicit request.
+				 */
+	Replacement,		/* This device is a replacement for
+				 * a want_replacement device with same
+				 * raid_disk number.
+				 */
+};
 
 #define BB_LEN_MASK	(0x00000000000001FFULL)
 #define BB_OFFSET_MASK	(0x7FFFFFFFFFFFFE00ULL)
@@ -482,15 +493,20 @@ static inline char * mdname (struct mddev * mddev)
 static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char nm[20];
-	sprintf(nm, "rd%d", rdev->raid_disk);
-	return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
+	if (!test_bit(Replacement, &rdev->flags)) {
+		sprintf(nm, "rd%d", rdev->raid_disk);
+		return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
+	} else
+		return 0;
 }
 
 static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char nm[20];
-	sprintf(nm, "rd%d", rdev->raid_disk);
-	sysfs_remove_link(&mddev->kobj, nm);
+	if (!test_bit(Replacement, &rdev->flags)) {
+		sprintf(nm, "rd%d", rdev->raid_disk);
+		sysfs_remove_link(&mddev->kobj, nm);
+	}
 }
 
 /*
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 9e65d9e20662..6f6df86f1ae5 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -277,7 +277,10 @@ struct mdp_superblock_1 {
 					   */
 #define	MD_FEATURE_RESHAPE_ACTIVE	4
 #define	MD_FEATURE_BAD_BLOCKS		8 /* badblock list is not empty */
-
-#define	MD_FEATURE_ALL			(1|2|4|8)
+#define	MD_FEATURE_REPLACEMENT		16 /* This device is replacing an
+					    * active device with same 'role'.
+					    * 'recovery_offset' is also set.
+					    */
+#define	MD_FEATURE_ALL			(1|2|4|8|16)
 
 #endif 
-- 
cgit v1.2.3


From 3e2ec13a8185183cd7ff237dadc948a0f9f7398f Mon Sep 17 00:00:00 2001
From: Thomas Abraham <thomas.abraham@linaro.org>
Date: Mon, 24 Oct 2011 11:43:02 +0200
Subject: DMA: PL330: move filter function into driver

The dma channel selection filter function is moved from plat-samsung
into the pl330 driver. In additon to that, a check is added in the
filter function to ensure that the channel on which the filter has
been invoked is pl330 channel instance (and avoid any incorrect
access of chan->private in a system with multiple types of DMA
drivers).

Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thomas Abraham <thomas.abraham@linaro.org>
Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/plat-samsung/dma-ops.c |  6 ------
 drivers/dma/pl330.c             | 15 +++++++++++++++
 include/linux/amba/pl330.h      |  2 ++
 3 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-samsung/dma-ops.c b/arch/arm/plat-samsung/dma-ops.c
index 93a994a5dd8f..889c2c22325e 100644
--- a/arch/arm/plat-samsung/dma-ops.c
+++ b/arch/arm/plat-samsung/dma-ops.c
@@ -18,12 +18,6 @@
 
 #include <mach/dma.h>
 
-static inline bool pl330_filter(struct dma_chan *chan, void *param)
-{
-	struct dma_pl330_peri *peri = chan->private;
-	return peri->peri_id == (unsigned)param;
-}
-
 static unsigned samsung_dmadev_request(enum dma_ch dma_ch,
 				struct samsung_dma_info *info)
 {
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 571041477ab2..0c434dca4bf2 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -116,6 +116,9 @@ struct dma_pl330_desc {
 	struct dma_pl330_chan *pchan;
 };
 
+/* forward declaration */
+static struct amba_driver pl330_driver;
+
 static inline struct dma_pl330_chan *
 to_pchan(struct dma_chan *ch)
 {
@@ -267,6 +270,18 @@ static void dma_pl330_rqcb(void *token, enum pl330_op_err err)
 	tasklet_schedule(&pch->task);
 }
 
+bool pl330_filter(struct dma_chan *chan, void *param)
+{
+	struct dma_pl330_peri *peri;
+
+	if (chan->device->dev->driver != &pl330_driver.drv)
+		return false;
+
+	peri = chan->private;
+	return peri->peri_id == (unsigned)param;
+}
+EXPORT_SYMBOL(pl330_filter);
+
 static int pl330_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct dma_pl330_chan *pch = to_pchan(chan);
diff --git a/include/linux/amba/pl330.h b/include/linux/amba/pl330.h
index d12f077a6daf..6db72dae2f16 100644
--- a/include/linux/amba/pl330.h
+++ b/include/linux/amba/pl330.h
@@ -12,6 +12,7 @@
 #ifndef	__AMBA_PL330_H_
 #define	__AMBA_PL330_H_
 
+#include <linux/dmaengine.h>
 #include <asm/hardware/pl330.h>
 
 struct dma_pl330_peri {
@@ -38,4 +39,5 @@ struct dma_pl330_platdata {
 	unsigned mcbuf_sz;
 };
 
+extern bool pl330_filter(struct dma_chan *chan, void *param);
 #endif	/* __AMBA_PL330_H_ */
-- 
cgit v1.2.3


From cd072515215ccc37051cadc516ce28545257be41 Mon Sep 17 00:00:00 2001
From: Thomas Abraham <thomas.abraham@linaro.org>
Date: Mon, 24 Oct 2011 11:43:11 +0200
Subject: DMA: PL330: Infer transfer direction from transfer request instead of
 platform data

The transfer direction for a channel can be inferred from the transfer
request and the need for specifying transfer direction in platfrom data
can be eliminated. So the structure definition 'struct dma_pl330_peri'
is no longer required.

The channel's private data is set to point to a channel id specified in
the platform data (instead of an instance of type 'struct dma_pl330_peri').
The filter function is correspondingly modified to match the channel id.

With the 'struct dma_pl330_peri' removed from platform data, the dma
controller transfer capabilities cannot be inferred any more. Hence,
the dma controller capabilities is specified using platform data.

Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Acked-by: Boojin Kim <boojin.kim@samsung.com>
Signed-off-by: Thomas Abraham <thomas.abraham@linaro.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 drivers/dma/pl330.c        | 65 ++++++++++++----------------------------------
 include/linux/amba/pl330.h | 13 +++-------
 2 files changed, 19 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 0c434dca4bf2..317aaeaa6f66 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -272,13 +272,13 @@ static void dma_pl330_rqcb(void *token, enum pl330_op_err err)
 
 bool pl330_filter(struct dma_chan *chan, void *param)
 {
-	struct dma_pl330_peri *peri;
+	u8 *peri_id;
 
 	if (chan->device->dev->driver != &pl330_driver.drv)
 		return false;
 
-	peri = chan->private;
-	return peri->peri_id == (unsigned)param;
+	peri_id = chan->private;
+	return *peri_id == (unsigned)param;
 }
 EXPORT_SYMBOL(pl330_filter);
 
@@ -512,7 +512,7 @@ pluck_desc(struct dma_pl330_dmac *pdmac)
 static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
 {
 	struct dma_pl330_dmac *pdmac = pch->dmac;
-	struct dma_pl330_peri *peri = pch->chan.private;
+	u8 *peri_id = pch->chan.private;
 	struct dma_pl330_desc *desc;
 
 	/* Pluck one desc from the pool of DMAC */
@@ -537,13 +537,7 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
 	desc->txd.cookie = 0;
 	async_tx_ack(&desc->txd);
 
-	if (peri) {
-		desc->req.rqtype = peri->rqtype;
-		desc->req.peri = pch->chan.chan_id;
-	} else {
-		desc->req.rqtype = MEMTOMEM;
-		desc->req.peri = 0;
-	}
+	desc->req.peri = peri_id ? pch->chan.chan_id : 0;
 
 	dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
 
@@ -630,12 +624,14 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 	case DMA_TO_DEVICE:
 		desc->rqcfg.src_inc = 1;
 		desc->rqcfg.dst_inc = 0;
+		desc->req.rqtype = MEMTODEV;
 		src = dma_addr;
 		dst = pch->fifo_addr;
 		break;
 	case DMA_FROM_DEVICE:
 		desc->rqcfg.src_inc = 0;
 		desc->rqcfg.dst_inc = 1;
+		desc->req.rqtype = DEVTOMEM;
 		src = pch->fifo_addr;
 		dst = dma_addr;
 		break;
@@ -661,16 +657,12 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct dma_pl330_peri *peri = chan->private;
 	struct pl330_info *pi;
 	int burst;
 
 	if (unlikely(!pch || !len))
 		return NULL;
 
-	if (peri && peri->rqtype != MEMTOMEM)
-		return NULL;
-
 	pi = &pch->dmac->pif;
 
 	desc = __pl330_prep_dma_memcpy(pch, dst, src, len);
@@ -679,6 +671,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 
 	desc->rqcfg.src_inc = 1;
 	desc->rqcfg.dst_inc = 1;
+	desc->req.rqtype = MEMTOMEM;
 
 	/* Select max possible burst size */
 	burst = pi->pcfg.data_bus_width / 8;
@@ -707,24 +700,13 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 {
 	struct dma_pl330_desc *first, *desc = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct dma_pl330_peri *peri = chan->private;
 	struct scatterlist *sg;
 	unsigned long flags;
 	int i;
 	dma_addr_t addr;
 
-	if (unlikely(!pch || !sgl || !sg_len || !peri))
-		return NULL;
-
-	/* Make sure the direction is consistent */
-	if ((direction == DMA_TO_DEVICE &&
-				peri->rqtype != MEMTODEV) ||
-			(direction == DMA_FROM_DEVICE &&
-				peri->rqtype != DEVTOMEM)) {
-		dev_err(pch->dmac->pif.dev, "%s:%d Invalid Direction\n",
-				__func__, __LINE__);
+	if (unlikely(!pch || !sgl || !sg_len))
 		return NULL;
-	}
 
 	addr = pch->fifo_addr;
 
@@ -765,11 +747,13 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		if (direction == DMA_TO_DEVICE) {
 			desc->rqcfg.src_inc = 1;
 			desc->rqcfg.dst_inc = 0;
+			desc->req.rqtype = MEMTODEV;
 			fill_px(&desc->px,
 				addr, sg_dma_address(sg), sg_dma_len(sg));
 		} else {
 			desc->rqcfg.src_inc = 0;
 			desc->rqcfg.dst_inc = 1;
+			desc->req.rqtype = DEVTOMEM;
 			fill_px(&desc->px,
 				sg_dma_address(sg), addr, sg_dma_len(sg));
 		}
@@ -876,28 +860,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
 	for (i = 0; i < num_chan; i++) {
 		pch = &pdmac->peripherals[i];
-		if (pdat) {
-			struct dma_pl330_peri *peri = &pdat->peri[i];
-
-			switch (peri->rqtype) {
-			case MEMTOMEM:
-				dma_cap_set(DMA_MEMCPY, pd->cap_mask);
-				break;
-			case MEMTODEV:
-			case DEVTOMEM:
-				dma_cap_set(DMA_SLAVE, pd->cap_mask);
-				dma_cap_set(DMA_CYCLIC, pd->cap_mask);
-				break;
-			default:
-				dev_err(&adev->dev, "DEVTODEV Not Supported\n");
-				continue;
-			}
-			pch->chan.private = peri;
-		} else {
-			dma_cap_set(DMA_MEMCPY, pd->cap_mask);
-			pch->chan.private = NULL;
-		}
-
+		pch->chan.private = pdat ? &pdat->peri_id[i] : NULL;
 		INIT_LIST_HEAD(&pch->work_list);
 		spin_lock_init(&pch->lock);
 		pch->pl330_chid = NULL;
@@ -909,6 +872,10 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	}
 
 	pd->dev = &adev->dev;
+	if (pdat)
+		pd->cap_mask = pdat->cap_mask;
+	else
+		dma_cap_set(DMA_MEMCPY, pd->cap_mask);
 
 	pd->device_alloc_chan_resources = pl330_alloc_chan_resources;
 	pd->device_free_chan_resources = pl330_free_chan_resources;
diff --git a/include/linux/amba/pl330.h b/include/linux/amba/pl330.h
index 6db72dae2f16..12e023c19ac1 100644
--- a/include/linux/amba/pl330.h
+++ b/include/linux/amba/pl330.h
@@ -15,15 +15,6 @@
 #include <linux/dmaengine.h>
 #include <asm/hardware/pl330.h>
 
-struct dma_pl330_peri {
-	/*
-	 * Peri_Req i/f of the DMAC that is
-	 * peripheral could be reached from.
-	 */
-	u8 peri_id; /* specific dma id */
-	enum pl330_reqtype rqtype;
-};
-
 struct dma_pl330_platdata {
 	/*
 	 * Number of valid peripherals connected to DMAC.
@@ -34,7 +25,9 @@ struct dma_pl330_platdata {
 	 */
 	u8 nr_valid_peri;
 	/* Array of valid peripherals */
-	struct dma_pl330_peri *peri;
+	u8 *peri_id;
+	/* Operational capabilities */
+	dma_cap_mask_t cap_mask;
 	/* Bytes to allocate for MC buffer */
 	unsigned mcbuf_sz;
 };
-- 
cgit v1.2.3


From 3d058d7bc2c5671ae630e0b463be8a69b5783fb9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 18 Dec 2011 01:55:54 +0100
Subject: netfilter: rework user-space expectation helper support

This partially reworks bc01befdcf3e40979eb518085a075cbf0aacede0
which added userspace expectation support.

This patch removes the nf_ct_userspace_expect_list since now we
force to use the new iptables CT target feature to add the helper
extension for conntracks that have attached expectations from
userspace.

A new version of the proof-of-concept code to implement userspace
helpers from userspace is available at:

http://people.netfilter.org/pablo/userspace-conntrack-helpers/nf-ftp-helper-POC.tar.bz2

This patch also modifies the CT target to allow to set the
conntrack's userspace helper status flags. This flag is used
to tell the conntrack system to explicitly allocate the helper
extension.

This helper extension is useful to link the userspace expectations
with the master conntrack that is being tracked from one userspace
helper.

This feature fixes a problem in the current approach of the
userspace helper support. Basically, if the master conntrack that
has got a userspace expectation vanishes, the expectations point to
one invalid memory address. Thus, triggering an oops in the
expectation deletion event path.

I decided not to add a new revision of the CT target because
I only needed to add a new flag for it. I'll document in this
issue in the iptables manpage. I have also changed the return
value from EINVAL to EOPNOTSUPP if one flag not supported is
specified. Thus, in the future adding new features that only
require a new flag can be added without a new revision.

There is no official code using this in userspace (apart from
the proof-of-concept) that uses this infrastructure but there
will be some by beginning 2012.

Reported-by: Sam Roberts <vieuxtech@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h |  4 ++
 include/linux/netfilter/xt_CT.h               |  3 +-
 include/net/netfilter/nf_conntrack_expect.h   |  1 -
 net/netfilter/nf_conntrack_expect.c           | 63 +++++++++------------------
 net/netfilter/nf_conntrack_helper.c           | 12 +++++
 net/netfilter/nf_conntrack_netlink.c          |  5 ++-
 net/netfilter/xt_CT.c                         |  8 ++--
 7 files changed, 48 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 0d3dd66322ec..9e3a2838291b 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -83,6 +83,10 @@ enum ip_conntrack_status {
 	/* Conntrack is a fake untracked entry */
 	IPS_UNTRACKED_BIT = 12,
 	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
+
+	/* Conntrack has a userspace helper. */
+	IPS_USERSPACE_HELPER_BIT = 13,
+	IPS_USERSPACE_HELPER = (1 << IPS_USERSPACE_HELPER_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h
index b56e76811c04..6390f0992f36 100644
--- a/include/linux/netfilter/xt_CT.h
+++ b/include/linux/netfilter/xt_CT.h
@@ -3,7 +3,8 @@
 
 #include <linux/types.h>
 
-#define XT_CT_NOTRACK	0x1
+#define XT_CT_NOTRACK		0x1
+#define XT_CT_USERSPACE_HELPER	0x2
 
 struct xt_ct_target_info {
 	__u16 flags;
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 0f8a8c587532..4619caadd9d1 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -91,7 +91,6 @@ static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 
 void nf_ct_remove_expectations(struct nf_conn *ct);
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
-void nf_ct_remove_userspace_expectations(void);
 
 /* Allocate space for an expectation: this is mandatory before calling
    nf_ct_expect_related.  You will have to call put afterwards. */
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 340c80d968d4..bebb1675e6ff 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -38,8 +38,6 @@ unsigned int nf_ct_expect_max __read_mostly;
 
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 
-static HLIST_HEAD(nf_ct_userspace_expect_list);
-
 /* nf_conntrack_expect helper functions */
 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 				u32 pid, int report)
@@ -47,14 +45,14 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct net *net = nf_ct_exp_net(exp);
 
+	NF_CT_ASSERT(master_help);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
 
 	hlist_del_rcu(&exp->hnode);
 	net->ct.expect_count--;
 
 	hlist_del(&exp->lnode);
-	if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
-		master_help->expecting[exp->class]--;
+	master_help->expecting[exp->class]--;
 
 	nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
 	nf_ct_expect_put(exp);
@@ -314,37 +312,34 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp)
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 
-static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
+static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
+	struct nf_conntrack_helper *helper;
 	struct net *net = nf_ct_exp_net(exp);
-	const struct nf_conntrack_expect_policy *p;
 	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
 	/* two references : one for hash insert, one for the timer */
 	atomic_add(2, &exp->use);
 
-	if (master_help) {
-		hlist_add_head(&exp->lnode, &master_help->expectations);
-		master_help->expecting[exp->class]++;
-	} else if (exp->flags & NF_CT_EXPECT_USERSPACE)
-		hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
+	hlist_add_head(&exp->lnode, &master_help->expectations);
+	master_help->expecting[exp->class]++;
 
 	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
 	net->ct.expect_count++;
 
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
-	if (master_help) {
-		p = &rcu_dereference_protected(
-				master_help->helper,
-				lockdep_is_held(&nf_conntrack_lock)
-				)->expect_policy[exp->class];
-		exp->timeout.expires = jiffies + p->timeout * HZ;
+	helper = rcu_dereference_protected(master_help->helper,
+					   lockdep_is_held(&nf_conntrack_lock));
+	if (helper) {
+		exp->timeout.expires = jiffies +
+			helper->expect_policy[exp->class].timeout * HZ;
 	}
 	add_timer(&exp->timeout);
 
 	NF_CT_STAT_INC(net, expect_create);
+	return 0;
 }
 
 /* Race with expectations being used means we could have none to find; OK. */
@@ -389,14 +384,13 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	struct nf_conntrack_expect *i;
 	struct nf_conn *master = expect->master;
 	struct nf_conn_help *master_help = nfct_help(master);
+	struct nf_conntrack_helper *helper;
 	struct net *net = nf_ct_exp_net(expect);
 	struct hlist_node *n;
 	unsigned int h;
 	int ret = 1;
 
-	/* Don't allow expectations created from kernel-space with no helper */
-	if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
-	    (!master_help || (master_help && !master_help->helper))) {
+	if (!master_help) {
 		ret = -ESHUTDOWN;
 		goto out;
 	}
@@ -414,11 +408,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 		}
 	}
 	/* Will be over limit? */
-	if (master_help) {
-		p = &rcu_dereference_protected(
-			master_help->helper,
-			lockdep_is_held(&nf_conntrack_lock)
-			)->expect_policy[expect->class];
+	helper = rcu_dereference_protected(master_help->helper,
+					   lockdep_is_held(&nf_conntrack_lock));
+	if (helper) {
+		p = &helper->expect_policy[expect->class];
 		if (p->max_expected &&
 		    master_help->expecting[expect->class] >= p->max_expected) {
 			evict_oldest_expect(master, expect);
@@ -450,8 +443,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
 	if (ret <= 0)
 		goto out;
 
-	ret = 0;
-	nf_ct_expect_insert(expect);
+	ret = nf_ct_expect_insert(expect);
+	if (ret < 0)
+		goto out;
 	spin_unlock_bh(&nf_conntrack_lock);
 	nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
 	return ret;
@@ -461,21 +455,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
 
-void nf_ct_remove_userspace_expectations(void)
-{
-	struct nf_conntrack_expect *exp;
-	struct hlist_node *n, *next;
-
-	hlist_for_each_entry_safe(exp, n, next,
-				  &nf_ct_userspace_expect_list, lnode) {
-		if (del_timer(&exp->timeout)) {
-			nf_ct_unlink_expect(exp);
-			nf_ct_expect_put(exp);
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
-
 #ifdef CONFIG_PROC_FS
 struct ct_expect_iter_state {
 	struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 93c4bdbfc1ae..c9e0de08aa87 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -121,6 +121,18 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 	int ret = 0;
 
 	if (tmpl != NULL) {
+		/* we've got a userspace helper. */
+		if (tmpl->status & IPS_USERSPACE_HELPER) {
+			help = nf_ct_helper_ext_add(ct, flags);
+			if (help == NULL) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			rcu_assign_pointer(help->helper, NULL);
+			__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
+			ret = 0;
+			goto out;
+		}
 		help = nfct_help(tmpl);
 		if (help != NULL)
 			helper = help->helper;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 636617ccfe25..739548029dc2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2040,6 +2040,10 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 	}
 	help = nfct_help(ct);
 	if (!help) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+	if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) {
 		if (!cda[CTA_EXPECT_TIMEOUT]) {
 			err = -EINVAL;
 			goto out;
@@ -2264,7 +2268,6 @@ static void __exit ctnetlink_exit(void)
 {
 	pr_info("ctnetlink: unregistering from nfnetlink.\n");
 
-	nf_ct_remove_userspace_expectations();
 	unregister_pernet_subsys(&ctnetlink_net_ops);
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
 	nfnetlink_subsys_unregister(&ctnl_subsys);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 0221d10de75a..8e87123f1373 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	int ret = 0;
 	u8 proto;
 
-	if (info->flags & ~XT_CT_NOTRACK)
-		return -EINVAL;
+	if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER))
+		return -EOPNOTSUPP;
 
 	if (info->flags & XT_CT_NOTRACK) {
 		ct = nf_ct_untracked_get();
@@ -92,7 +92,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 				  GFP_KERNEL))
 		goto err3;
 
-	if (info->helper[0]) {
+	if (info->flags & XT_CT_USERSPACE_HELPER) {
+		__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
+	} else if (info->helper[0]) {
 		ret = -ENOENT;
 		proto = xt_ct_find_proto(par);
 		if (!proto) {
-- 
cgit v1.2.3


From cbc9f2f4fcd70d5a627558ca9a881fa9391abf69 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Dec 2011 13:59:49 +0100
Subject: netfilter: nf_nat: export NAT definitions to userspace

Export the NAT definitions to userspace. So far userspace (specifically,
iptables) has been copying the headers files from include/net. Also
rename some structures and definitions in preparation for IPv6 NAT.
Since these have never been officially exported, this doesn't affect
existing userspace code.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/Kbuild                     |  1 +
 .../linux/netfilter/nf_conntrack_tuple_common.h    | 27 ++++++++++
 include/linux/netfilter/nf_nat.h                   | 25 ++++++++++
 include/linux/netfilter_ipv4/Kbuild                |  1 -
 include/linux/netfilter_ipv4/nf_nat.h              | 58 ----------------------
 include/net/netfilter/nf_conntrack_tuple.h         |  1 -
 include/net/netfilter/nf_nat.h                     | 10 ++--
 include/net/netfilter/nf_nat_core.h                |  2 +-
 include/net/netfilter/nf_nat_protocol.h            | 14 +++---
 net/ipv4/netfilter/ipt_MASQUERADE.c                | 16 +++---
 net/ipv4/netfilter/ipt_NETMAP.c                    | 14 +++---
 net/ipv4/netfilter/ipt_REDIRECT.c                  | 16 +++---
 net/ipv4/netfilter/nf_nat_core.c                   | 54 ++++++++++----------
 net/ipv4/netfilter/nf_nat_h323.c                   | 20 ++++----
 net/ipv4/netfilter/nf_nat_helper.c                 | 10 ++--
 net/ipv4/netfilter/nf_nat_pptp.c                   | 14 +++---
 net/ipv4/netfilter/nf_nat_proto_common.c           | 24 ++++-----
 net/ipv4/netfilter/nf_nat_proto_dccp.c             |  4 +-
 net/ipv4/netfilter/nf_nat_proto_gre.c              |  8 +--
 net/ipv4/netfilter/nf_nat_proto_icmp.c             |  4 +-
 net/ipv4/netfilter/nf_nat_proto_sctp.c             |  4 +-
 net/ipv4/netfilter/nf_nat_proto_tcp.c              |  4 +-
 net/ipv4/netfilter/nf_nat_proto_udp.c              |  4 +-
 net/ipv4/netfilter/nf_nat_proto_udplite.c          |  4 +-
 net/ipv4/netfilter/nf_nat_proto_unknown.c          |  2 +-
 net/ipv4/netfilter/nf_nat_rule.c                   | 22 ++++----
 net/ipv4/netfilter/nf_nat_sip.c                    | 10 ++--
 net/ipv4/netfilter/nf_nat_standalone.c             |  2 +-
 net/netfilter/nf_conntrack_netlink.c               |  4 +-
 29 files changed, 185 insertions(+), 194 deletions(-)
 create mode 100644 include/linux/netfilter/nf_nat.h
 delete mode 100644 include/linux/netfilter_ipv4/nf_nat.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index a1b410c76fc3..d81f7719b01c 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h
 header-y += nf_conntrack_sctp.h
 header-y += nf_conntrack_tcp.h
 header-y += nf_conntrack_tuple_common.h
+header-y += nf_nat.h
 header-y += nfnetlink.h
 header-y += nfnetlink_compat.h
 header-y += nfnetlink_conntrack.h
diff --git a/include/linux/netfilter/nf_conntrack_tuple_common.h b/include/linux/netfilter/nf_conntrack_tuple_common.h
index 2ea22b018a87..2f6bbc5b8125 100644
--- a/include/linux/netfilter/nf_conntrack_tuple_common.h
+++ b/include/linux/netfilter/nf_conntrack_tuple_common.h
@@ -7,6 +7,33 @@ enum ip_conntrack_dir {
 	IP_CT_DIR_MAX
 };
 
+/* The protocol-specific manipulable parts of the tuple: always in
+ * network order
+ */
+union nf_conntrack_man_proto {
+	/* Add other protocols here. */
+	__be16 all;
+
+	struct {
+		__be16 port;
+	} tcp;
+	struct {
+		__be16 port;
+	} udp;
+	struct {
+		__be16 id;
+	} icmp;
+	struct {
+		__be16 port;
+	} dccp;
+	struct {
+		__be16 port;
+	} sctp;
+	struct {
+		__be16 key;	/* GRE key is 32bit, PPtP only uses 16bit */
+	} gre;
+};
+
 #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
 
 #endif /* _NF_CONNTRACK_TUPLE_COMMON_H */
diff --git a/include/linux/netfilter/nf_nat.h b/include/linux/netfilter/nf_nat.h
new file mode 100644
index 000000000000..8df2d13730b2
--- /dev/null
+++ b/include/linux/netfilter/nf_nat.h
@@ -0,0 +1,25 @@
+#ifndef _NETFILTER_NF_NAT_H
+#define _NETFILTER_NF_NAT_H
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+
+#define NF_NAT_RANGE_MAP_IPS		1
+#define NF_NAT_RANGE_PROTO_SPECIFIED	2
+#define NF_NAT_RANGE_PROTO_RANDOM	4
+#define NF_NAT_RANGE_PERSISTENT		8
+
+struct nf_nat_ipv4_range {
+	unsigned int			flags;
+	__be32				min_ip;
+	__be32				max_ip;
+	union nf_conntrack_man_proto	min;
+	union nf_conntrack_man_proto	max;
+};
+
+struct nf_nat_ipv4_multi_range_compat {
+	unsigned int			rangesize;
+	struct nf_nat_ipv4_range	range[1];
+};
+
+#endif /* _NETFILTER_NF_NAT_H */
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index c3b45480ecf7..f9930c87fff3 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -12,4 +12,3 @@ header-y += ipt_ah.h
 header-y += ipt_ecn.h
 header-y += ipt_realm.h
 header-y += ipt_ttl.h
-header-y += nf_nat.h
diff --git a/include/linux/netfilter_ipv4/nf_nat.h b/include/linux/netfilter_ipv4/nf_nat.h
deleted file mode 100644
index 7a861d09fc86..000000000000
--- a/include/linux/netfilter_ipv4/nf_nat.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _LINUX_NF_NAT_H
-#define _LINUX_NF_NAT_H
-
-#include <linux/types.h>
-
-#define IP_NAT_RANGE_MAP_IPS 1
-#define IP_NAT_RANGE_PROTO_SPECIFIED 2
-#define IP_NAT_RANGE_PROTO_RANDOM 4
-#define IP_NAT_RANGE_PERSISTENT 8
-
-/* The protocol-specific manipulable parts of the tuple. */
-union nf_conntrack_man_proto {
-	/* Add other protocols here. */
-	__be16 all;
-
-	struct {
-		__be16 port;
-	} tcp;
-	struct {
-		__be16 port;
-	} udp;
-	struct {
-		__be16 id;
-	} icmp;
-	struct {
-		__be16 port;
-	} dccp;
-	struct {
-		__be16 port;
-	} sctp;
-	struct {
-		__be16 key;	/* GRE key is 32bit, PPtP only uses 16bit */
-	} gre;
-};
-
-/* Single range specification. */
-struct nf_nat_range {
-	/* Set to OR of flags above. */
-	unsigned int flags;
-
-	/* Inclusive: network order. */
-	__be32 min_ip, max_ip;
-
-	/* Inclusive: network order */
-	union nf_conntrack_man_proto min, max;
-};
-
-/* For backwards compat: don't use in modern code. */
-struct nf_nat_multi_range_compat {
-	unsigned int rangesize; /* Must be 1. */
-
-	/* hangs off end. */
-	struct nf_nat_range range[1];
-};
-
-#define nf_nat_multi_range nf_nat_multi_range_compat
-
-#endif
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 2f8fb77bfdd1..aea3f8221be0 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -12,7 +12,6 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
-#include <linux/netfilter_ipv4/nf_nat.h>
 #include <linux/list_nulls.h>
 
 /* A `tuple' is a structure containing the information to uniquely
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index b8872df7285f..b4de990b55f1 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,14 +1,12 @@
 #ifndef _NF_NAT_H
 #define _NF_NAT_H
 #include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/nf_nat.h>
+#include <linux/netfilter/nf_nat.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
-#define NF_NAT_MAPPING_TYPE_MAX_NAMELEN 16
-
 enum nf_nat_manip_type {
-	IP_NAT_MANIP_SRC,
-	IP_NAT_MANIP_DST
+	NF_NAT_MANIP_SRC,
+	NF_NAT_MANIP_DST
 };
 
 /* SRC manip occurs POST_ROUTING or LOCAL_IN */
@@ -52,7 +50,7 @@ struct nf_conn_nat {
 
 /* Set up the info structure to map into this range. */
 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
-				      const struct nf_nat_range *range,
+				      const struct nf_nat_ipv4_range *range,
 				      enum nf_nat_manip_type maniptype);
 
 /* Is this tuple already taken? (not by us)*/
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index 3dc7b98effeb..b13d8d18d595 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -20,7 +20,7 @@ extern int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 static inline int nf_nat_initialized(struct nf_conn *ct,
 				     enum nf_nat_manip_type manip)
 {
-	if (manip == IP_NAT_MANIP_SRC)
+	if (manip == NF_NAT_MANIP_SRC)
 		return ct->status & IPS_SRC_NAT_DONE;
 	else
 		return ct->status & IPS_DST_NAT_DONE;
diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h
index 93cc90d28e66..7156c002b59c 100644
--- a/include/net/netfilter/nf_nat_protocol.h
+++ b/include/net/netfilter/nf_nat_protocol.h
@@ -4,7 +4,7 @@
 #include <net/netfilter/nf_nat.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
-struct nf_nat_range;
+struct nf_nat_ipv4_range;
 
 struct nf_nat_protocol {
 	/* Protocol number. */
@@ -30,15 +30,15 @@ struct nf_nat_protocol {
 	   possible.  Per-protocol part of tuple is initialized to the
 	   incoming packet. */
 	void (*unique_tuple)(struct nf_conntrack_tuple *tuple,
-			     const struct nf_nat_range *range,
+			     const struct nf_nat_ipv4_range *range,
 			     enum nf_nat_manip_type maniptype,
 			     const struct nf_conn *ct);
 
 	int (*range_to_nlattr)(struct sk_buff *skb,
-			       const struct nf_nat_range *range);
+			       const struct nf_nat_ipv4_range *range);
 
 	int (*nlattr_to_range)(struct nlattr *tb[],
-			       struct nf_nat_range *range);
+			       struct nf_nat_ipv4_range *range);
 };
 
 /* Protocol registration. */
@@ -61,14 +61,14 @@ extern bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 				  const union nf_conntrack_man_proto *max);
 
 extern void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-				      const struct nf_nat_range *range,
+				      const struct nf_nat_ipv4_range *range,
 				      enum nf_nat_manip_type maniptype,
 				      const struct nf_conn *ct,
 				      u_int16_t *rover);
 
 extern int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
-					const struct nf_nat_range *range);
+					const struct nf_nat_ipv4_range *range);
 extern int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
-					struct nf_nat_range *range);
+					struct nf_nat_ipv4_range *range);
 
 #endif /*_NF_NAT_PROTO_H*/
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9931152a78b5..2f210c79dc87 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -30,9 +30,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 /* FIXME: Multiple targets. --RR */
 static int masquerade_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
-	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
 		return -EINVAL;
 	}
@@ -49,8 +49,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
 	enum ip_conntrack_info ctinfo;
-	struct nf_nat_range newrange;
-	const struct nf_nat_multi_range_compat *mr;
+	struct nf_nat_ipv4_range newrange;
+	const struct nf_nat_ipv4_multi_range_compat *mr;
 	const struct rtable *rt;
 	__be32 newsrc;
 
@@ -79,13 +79,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	nat->masq_index = par->out->ifindex;
 
 	/* Transfer from original range. */
-	newrange = ((struct nf_nat_range)
-		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+	newrange = ((struct nf_nat_ipv4_range)
+		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
 		  newsrc, newsrc,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC);
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
 }
 
 static int
@@ -139,7 +139,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
 	.name		= "MASQUERADE",
 	.family		= NFPROTO_IPV4,
 	.target		= masquerade_tg,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= 1 << NF_INET_POST_ROUTING,
 	.checkentry	= masquerade_tg_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 6cdb298f1035..b5bfbbabf70d 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
 
 static int netmap_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
-	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
+	if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) {
 		pr_debug("bad MAP_IPS.\n");
 		return -EINVAL;
 	}
@@ -43,8 +43,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 new_ip, netmask;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_range newrange;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_ipv4_range newrange;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_POST_ROUTING ||
@@ -61,8 +61,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		new_ip = ip_hdr(skb)->saddr & ~netmask;
 	new_ip |= mr->range[0].min_ip & netmask;
 
-	newrange = ((struct nf_nat_range)
-		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+	newrange = ((struct nf_nat_ipv4_range)
+		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
 		  new_ip, new_ip,
 		  mr->range[0].min, mr->range[0].max });
 
@@ -74,7 +74,7 @@ static struct xt_target netmap_tg_reg __read_mostly = {
 	.name 		= "NETMAP",
 	.family		= NFPROTO_IPV4,
 	.target 	= netmap_tg,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING) |
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 18a0656505a0..7c0103a5203e 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -28,9 +28,9 @@ MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
 /* FIXME: Take multiple ranges --RR */
 static int redirect_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
-	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
 		return -EINVAL;
 	}
@@ -47,8 +47,8 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 newdst;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_range newrange;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_ipv4_range newrange;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_OUT);
@@ -76,20 +76,20 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	}
 
 	/* Transfer from original range. */
-	newrange = ((struct nf_nat_range)
-		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+	newrange = ((struct nf_nat_ipv4_range)
+		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
 		  newdst, newdst,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
 }
 
 static struct xt_target redirect_tg_reg __read_mostly = {
 	.name		= "REDIRECT",
 	.family		= NFPROTO_IPV4,
 	.target		= redirect_tg,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= redirect_tg_check,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 447bc5cfdc6c..58ab7a4611dd 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -82,14 +82,14 @@ EXPORT_SYMBOL(nf_nat_used_tuple);
  * that meet the constraints of range. */
 static int
 in_range(const struct nf_conntrack_tuple *tuple,
-	 const struct nf_nat_range *range)
+	 const struct nf_nat_ipv4_range *range)
 {
 	const struct nf_nat_protocol *proto;
 	int ret = 0;
 
 	/* If we are supposed to map IPs, then we must be in the
 	   range specified, otherwise let this drag us onto a new src IP. */
-	if (range->flags & IP_NAT_RANGE_MAP_IPS) {
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
 		if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
 		    ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
 			return 0;
@@ -97,8 +97,8 @@ in_range(const struct nf_conntrack_tuple *tuple,
 
 	rcu_read_lock();
 	proto = __nf_nat_proto_find(tuple->dst.protonum);
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
-	    proto->in_range(tuple, IP_NAT_MANIP_SRC,
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
+	    proto->in_range(tuple, NF_NAT_MANIP_SRC,
 			    &range->min, &range->max))
 		ret = 1;
 	rcu_read_unlock();
@@ -123,7 +123,7 @@ static int
 find_appropriate_src(struct net *net, u16 zone,
 		     const struct nf_conntrack_tuple *tuple,
 		     struct nf_conntrack_tuple *result,
-		     const struct nf_nat_range *range)
+		     const struct nf_nat_ipv4_range *range)
 {
 	unsigned int h = hash_by_src(net, zone, tuple);
 	const struct nf_conn_nat *nat;
@@ -157,7 +157,7 @@ find_appropriate_src(struct net *net, u16 zone,
 */
 static void
 find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
-		    const struct nf_nat_range *range,
+		    const struct nf_nat_ipv4_range *range,
 		    const struct nf_conn *ct,
 		    enum nf_nat_manip_type maniptype)
 {
@@ -166,10 +166,10 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 	u_int32_t minip, maxip, j;
 
 	/* No IP mapping?  Do nothing. */
-	if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
+	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
 		return;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		var_ipp = &tuple->src.u3.ip;
 	else
 		var_ipp = &tuple->dst.u3.ip;
@@ -189,7 +189,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 	minip = ntohl(range->min_ip);
 	maxip = ntohl(range->max_ip);
 	j = jhash_2words((__force u32)tuple->src.u3.ip,
-			 range->flags & IP_NAT_RANGE_PERSISTENT ?
+			 range->flags & NF_NAT_RANGE_PERSISTENT ?
 				0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
 	j = ((u64)j * (maxip - minip + 1)) >> 32;
 	*var_ipp = htonl(minip + j);
@@ -204,7 +204,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 static void
 get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_conntrack_tuple *orig_tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 struct nf_conn *ct,
 		 enum nf_nat_manip_type maniptype)
 {
@@ -219,8 +219,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   This is only required for source (ie. NAT/masq) mappings.
 	   So far, we don't do local source mappings, so multiple
 	   manips not an issue.  */
-	if (maniptype == IP_NAT_MANIP_SRC &&
-	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
+	if (maniptype == NF_NAT_MANIP_SRC &&
+	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
 		/* try the original tuple first */
 		if (in_range(orig_tuple, range)) {
 			if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -247,8 +247,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
 
 	/* Only bother mapping if it's not already in range and unique */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-		if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (proto->in_range(tuple, maniptype, &range->min,
 					    &range->max) &&
 			    (range->min.all == range->max.all ||
@@ -267,7 +267,7 @@ out:
 
 unsigned int
 nf_nat_setup_info(struct nf_conn *ct,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype)
 {
 	struct net *net = nf_ct_net(ct);
@@ -284,8 +284,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 		}
 	}
 
-	NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
-		     maniptype == IP_NAT_MANIP_DST);
+	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
+		     maniptype == NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
 
 	/* What we've got will look like inverse of reply. Normally
@@ -306,13 +306,13 @@ nf_nat_setup_info(struct nf_conn *ct,
 		nf_conntrack_alter_reply(ct, &reply);
 
 		/* Non-atomic: we own this at the moment. */
-		if (maniptype == IP_NAT_MANIP_SRC)
+		if (maniptype == NF_NAT_MANIP_SRC)
 			ct->status |= IPS_SRC_NAT;
 		else
 			ct->status |= IPS_DST_NAT;
 	}
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		unsigned int srchash;
 
 		srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -327,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	}
 
 	/* It's done. */
-	if (maniptype == IP_NAT_MANIP_DST)
+	if (maniptype == NF_NAT_MANIP_DST)
 		ct->status |= IPS_DST_NAT_DONE;
 	else
 		ct->status |= IPS_SRC_NAT_DONE;
@@ -361,7 +361,7 @@ manip_pkt(u_int16_t proto,
 
 	iph = (void *)skb->data + iphdroff;
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
 		iph->saddr = target->src.u3.ip;
 	} else {
@@ -381,7 +381,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
 	unsigned long statusbit;
 	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
 
-	if (mtype == IP_NAT_MANIP_SRC)
+	if (mtype == NF_NAT_MANIP_SRC)
 		statusbit = IPS_SRC_NAT;
 	else
 		statusbit = IPS_DST_NAT;
@@ -447,7 +447,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 			return 0;
 	}
 
-	if (manip == IP_NAT_MANIP_SRC)
+	if (manip == NF_NAT_MANIP_SRC)
 		statusbit = IPS_SRC_NAT;
 	else
 		statusbit = IPS_DST_NAT;
@@ -602,7 +602,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
 
 static int nfnetlink_parse_nat_proto(struct nlattr *attr,
 				     const struct nf_conn *ct,
-				     struct nf_nat_range *range)
+				     struct nf_nat_ipv4_range *range)
 {
 	struct nlattr *tb[CTA_PROTONAT_MAX+1];
 	const struct nf_nat_protocol *npt;
@@ -626,7 +626,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
 
 static int
 nfnetlink_parse_nat(const struct nlattr *nat,
-		    const struct nf_conn *ct, struct nf_nat_range *range)
+		    const struct nf_conn *ct, struct nf_nat_ipv4_range *range)
 {
 	struct nlattr *tb[CTA_NAT_MAX+1];
 	int err;
@@ -646,7 +646,7 @@ nfnetlink_parse_nat(const struct nlattr *nat,
 		range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
 
 	if (range->min_ip)
-		range->flags |= IP_NAT_RANGE_MAP_IPS;
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
 
 	if (!tb[CTA_NAT_PROTO])
 		return 0;
@@ -663,7 +663,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
 			  const struct nlattr *attr)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	if (nfnetlink_parse_nat(attr, ct, &range) < 0)
 		return -EINVAL;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index b9a1136addbd..dc1dd912baf4 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -398,7 +398,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_q931_expect(struct nf_conn *new,
 			       struct nf_conntrack_expect *this)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	if (this->tuple.src.u3.ip != 0) {	/* Only accept calls from GK */
 		nf_nat_follow_master(new, this);
@@ -409,16 +409,16 @@ static void ip_nat_q931_expect(struct nf_conn *new,
 	BUG_ON(new->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = this->saved_proto;
 	range.min_ip = range.max_ip =
 	    new->master->tuplehash[!this->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
 }
 
 /****************************************************************************/
@@ -496,21 +496,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_callforwarding_expect(struct nf_conn *new,
 					 struct nf_conntrack_expect *this)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(new->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = this->saved_proto;
 	range.min_ip = range.max_ip = this->saved_ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
 }
 
 /****************************************************************************/
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index ebc5f8894f99..049e8b7c3188 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -430,22 +430,22 @@ nf_nat_seq_adjust(struct sk_buff *skb,
 void nf_nat_follow_master(struct nf_conn *ct,
 			  struct nf_conntrack_expect *exp)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = exp->saved_proto;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 }
 EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 3e8284ba46b8..c273d58980ae 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -47,7 +47,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	struct nf_conntrack_tuple t;
 	const struct nf_ct_pptp_master *ct_pptp_info;
 	const struct nf_nat_pptp *nat_pptp_info;
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
 	nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
@@ -88,24 +88,24 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
 	if (exp->dir == IP_CT_DIR_ORIGINAL) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 		range.min = range.max = exp->saved_proto;
 	}
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
 	if (exp->dir == IP_CT_DIR_REPLY) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 		range.min = range.max = exp->saved_proto;
 	}
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 }
 
 /* outbound packets == from PNS to PAC */
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index a3d997618602..47fff91c9ae6 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -26,7 +26,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 {
 	__be16 port;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		port = tuple->src.u.all;
 	else
 		port = tuple->dst.u.all;
@@ -37,7 +37,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
 
 void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-			       const struct nf_nat_range *range,
+			       const struct nf_nat_ipv4_range *range,
 			       enum nf_nat_manip_type maniptype,
 			       const struct nf_conn *ct,
 			       u_int16_t *rover)
@@ -46,15 +46,15 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 	__be16 *portptr;
 	u_int16_t off;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		portptr = &tuple->src.u.all;
 	else
 		portptr = &tuple->dst.u.all;
 
 	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
 		/* If it's dst rewrite, can't change port */
-		if (maniptype == IP_NAT_MANIP_DST)
+		if (maniptype == NF_NAT_MANIP_DST)
 			return;
 
 		if (ntohs(*portptr) < 1024) {
@@ -75,9 +75,9 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.all) - min + 1;
 	}
 
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+	if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
 		off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
-						 maniptype == IP_NAT_MANIP_SRC
+						 maniptype == NF_NAT_MANIP_SRC
 						 ? tuple->dst.u.all
 						 : tuple->src.u.all);
 	else
@@ -87,7 +87,7 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		*portptr = htons(min + off % range_size);
 		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
 			continue;
-		if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
+		if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
 			*rover = off;
 		return;
 	}
@@ -97,7 +97,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
-				 const struct nf_nat_range *range)
+				 const struct nf_nat_ipv4_range *range)
 {
 	NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
 	NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
@@ -109,16 +109,16 @@ nla_put_failure:
 EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
 
 int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
-				 struct nf_nat_range *range)
+				 struct nf_nat_ipv4_range *range)
 {
 	if (tb[CTA_PROTONAT_PORT_MIN]) {
 		range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
 		range->max.all = range->min.tcp.port;
-		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 	if (tb[CTA_PROTONAT_PORT_MAX]) {
 		range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
-		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 570faf2667b2..c43d5b366d0d 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -24,7 +24,7 @@ static u_int16_t dccp_port_rover;
 
 static void
 dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -54,7 +54,7 @@ dccp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct dccp_hdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
 		newport = tuple->src.u.dccp.port;
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index bc8d83a31c73..9b1c629d7a00 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
 /* generate unique tuple ... */
 static void
 gre_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -52,12 +52,12 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 	if (!ct->master)
 		return;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		keyptr = &tuple->src.u.gre.key;
 	else
 		keyptr = &tuple->dst.u.gre.key;
 
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
 		pr_debug("%p: NATing GRE PPTP\n", ct);
 		min = 1;
 		range_size = 0xffff;
@@ -99,7 +99,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
 
 	/* we only have destination manip of a packet, since 'source key'
 	 * is not present in the packet itself */
-	if (maniptype != IP_NAT_MANIP_DST)
+	if (maniptype != NF_NAT_MANIP_DST)
 		return true;
 	switch (greh->version) {
 	case GRE_VERSION_1701:
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 9f4dc1235dc7..8f87b4bebf2b 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
 
 static void
 icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -40,7 +40,7 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 	range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
 	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
 		range_size = 0xFFFF;
 
 	for (i = 0; ; ++id) {
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index bd5a80a62a5b..4e70dc6fad21 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -19,7 +19,7 @@ static u_int16_t nf_sctp_port_rover;
 
 static void
 sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -46,7 +46,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct sctphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 0d67bb80130f..6fcc865dc2ee 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -23,7 +23,7 @@ static u_int16_t tcp_port_rover;
 
 static void
 tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -55,7 +55,7 @@ tcp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct tcphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 0b1b8601cba7..18ea44ebfff7 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -22,7 +22,7 @@ static u_int16_t udp_port_rover;
 
 static void
 udp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -47,7 +47,7 @@ udp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct udphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index f83ef23e2ab7..a17b75b9e2a7 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -21,7 +21,7 @@ static u_int16_t udplite_port_rover;
 
 static void
 udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
-		     const struct nf_nat_range *range,
+		     const struct nf_nat_ipv4_range *range,
 		     enum nf_nat_manip_type maniptype,
 		     const struct nf_conn *ct)
 {
@@ -47,7 +47,7 @@ udplite_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct udphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index a50f2bc1c732..ab8e8c132168 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
 }
 
 static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
-				 const struct nf_nat_range *range,
+				 const struct nf_nat_ipv4_range *range,
 				 enum nf_nat_manip_type maniptype,
 				 const struct nf_conn *ct)
 {
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 733c9abc1cbd..d2a9dc314e0e 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -44,7 +44,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_IN);
@@ -56,7 +56,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 			    ctinfo == IP_CT_RELATED_REPLY));
 	NF_CT_ASSERT(par->out != NULL);
 
-	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
+	return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC);
 }
 
 static unsigned int
@@ -64,7 +64,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_OUT);
@@ -74,12 +74,12 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
-	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
+	return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST);
 }
 
 static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
@@ -91,7 +91,7 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 
 static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
@@ -105,13 +105,13 @@ static unsigned int
 alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 {
 	/* Force range to this IP; let proto decide mapping for
-	   per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+	   per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED).
 	*/
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	range.flags = 0;
 	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
-		 HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ?
+		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
 
@@ -140,7 +140,7 @@ int nf_nat_rule_find(struct sk_buff *skb,
 static struct xt_target ipt_snat_reg __read_mostly = {
 	.name		= "SNAT",
 	.target		= ipt_snat_target,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
 	.checkentry	= ipt_snat_checkentry,
@@ -150,7 +150,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
 static struct xt_target ipt_dnat_reg __read_mostly = {
 	.name		= "DNAT",
 	.target		= ipt_dnat_target,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= ipt_dnat_checkentry,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 78844d9208f1..d0319f96269f 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -249,25 +249,25 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
 static void ip_nat_sip_expected(struct nf_conn *ct,
 				struct nf_conntrack_expect *exp)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = exp->saved_proto;
 	range.min_ip = range.max_ip = exp->saved_ip;
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 
 	/* Change src to where master sends to, but only if the connection
 	 * actually came from the same source. */
 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
 	    ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
-		range.flags = IP_NAT_RANGE_MAP_IPS;
+		range.flags = NF_NAT_RANGE_MAP_IPS;
 		range.min_ip = range.max_ip
 			= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
-		nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 	}
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 92900482edea..3828a4229822 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum,
 				return ret;
 		} else
 			pr_debug("Already setup manip %s for ct %p\n",
-				 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
 		break;
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 739548029dc2..4f9c941335c9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1102,14 +1102,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])
 
 	if (cda[CTA_NAT_DST]) {
 		ret = ctnetlink_parse_nat_setup(ct,
-						IP_NAT_MANIP_DST,
+						NF_NAT_MANIP_DST,
 						cda[CTA_NAT_DST]);
 		if (ret < 0)
 			return ret;
 	}
 	if (cda[CTA_NAT_SRC]) {
 		ret = ctnetlink_parse_nat_setup(ct,
-						IP_NAT_MANIP_SRC,
+						NF_NAT_MANIP_SRC,
 						cda[CTA_NAT_SRC]);
 		if (ret < 0)
 			return ret;
-- 
cgit v1.2.3


From 62268ce9170c5466332c046ff6ddafcb67751502 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 13 Dec 2011 23:48:03 +0800
Subject: dmaengine: add DMA_TRANS_NONE to dma_transfer_direction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before dma_transfer_direction was introduced to replace
dma_data_direction, some dmaengine device uses DMA_NONE of
dma_data_direction for some talk with its client drivers.
The mxs-dma and its clients mxs-mmc and gpmi-nand are such case.

This patch adds DMA_TRANS_NONE to dma_transfer_direction and
migrate the DMA_NONE use in mxs-dma to it.

It also fixes the compile warning below.

CC      drivers/dma/mxs-dma.o
drivers/dma/mxs-dma.c: In function ‘mxs_dma_prep_slave_sg’:
drivers/dma/mxs-dma.c:420:16: warning: comparison between ‘enum dma_transfer_direction’ and ‘enum dma_data_direction’

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/mxs-dma.c     | 2 +-
 include/linux/dmaengine.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 6548595c26dc..493af2f6e33a 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -391,7 +391,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 		idx = 0;
 	}
 
-	if (direction == DMA_NONE) {
+	if (direction == DMA_TRANS_NONE) {
 		ccw = &mxs_chan->ccw[idx++];
 		pio = (u32 *) sgl;
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 5532bb8b500c..679b349d9b66 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -88,6 +88,7 @@ enum dma_transfer_direction {
 	DMA_MEM_TO_DEV,
 	DMA_DEV_TO_MEM,
 	DMA_DEV_TO_DEV,
+	DMA_TRANS_NONE,
 };
 
 /**
-- 
cgit v1.2.3


From 4e82786f7039cb707c031dcf0dc84c025e149487 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Tue, 13 Dec 2011 23:48:05 +0800
Subject: mtd: fix compile error for gpmi-nand

The driver gpmi-nand should compile at least.  This patch adds the
missing gpmi-nand.h to fix the compile error below.

  CC      drivers/mtd/nand/gpmi-nand/gpmi-nand.o
  CC      drivers/mtd/nand/gpmi-nand/gpmi-lib.o
drivers/mtd/nand/gpmi-nand/gpmi-nand.c:25:33: fatal error: linux/mtd/gpmi-nand.h: No such file or directory
drivers/mtd/nand/gpmi-nand/gpmi-lib.c:21:33: fatal error: linux/mtd/gpmi-nand.h: No such file or directory

This header is grabbed from patch below, which has not been postponed
for merging.

  [PATCH v8 1/4] ARM: mxs: add GPMI-NAND support for imx23/imx28
  http://permalink.gmane.org/gmane.linux.drivers.mtd/37338

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/mtd/gpmi-nand.h | 68 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 include/linux/mtd/gpmi-nand.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
new file mode 100644
index 000000000000..69b6dbf46b5e
--- /dev/null
+++ b/include/linux/mtd/gpmi-nand.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef __MACH_MXS_GPMI_NAND_H__
+#define __MACH_MXS_GPMI_NAND_H__
+
+/* The size of the resources is fixed. */
+#define GPMI_NAND_RES_SIZE	6
+
+/* Resource names for the GPMI NAND driver. */
+#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "GPMI NAND GPMI Registers"
+#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME  "GPMI NAND GPMI Interrupt"
+#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "GPMI NAND BCH Registers"
+#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "GPMI NAND BCH Interrupt"
+#define GPMI_NAND_DMA_CHANNELS_RES_NAME    "GPMI NAND DMA Channels"
+#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "GPMI NAND DMA Interrupt"
+
+/**
+ * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
+ *
+ * This structure communicates platform-specific information to the GPMI NAND
+ * driver that can't be expressed as resources.
+ *
+ * @platform_init:           A pointer to a function the driver will call to
+ *                           initialize the platform (e.g., set up the pin mux).
+ * @min_prop_delay_in_ns:    Minimum propagation delay of GPMI signals to and
+ *                           from the NAND Flash device, in nanoseconds.
+ * @max_prop_delay_in_ns:    Maximum propagation delay of GPMI signals to and
+ *                           from the NAND Flash device, in nanoseconds.
+ * @max_chip_count:          The maximum number of chips for which the driver
+ *                           should configure the hardware. This value most
+ *                           likely reflects the number of pins that are
+ *                           connected to a NAND Flash device. If this is
+ *                           greater than the SoC hardware can support, the
+ *                           driver will print a message and fail to initialize.
+ * @partitions:              An optional pointer to an array of partition
+ *                           descriptions.
+ * @partition_count:         The number of elements in the partitions array.
+ */
+struct gpmi_nand_platform_data {
+	/* SoC hardware information. */
+	int		(*platform_init)(void);
+
+	/* NAND Flash information. */
+	unsigned int	min_prop_delay_in_ns;
+	unsigned int	max_prop_delay_in_ns;
+	unsigned int	max_chip_count;
+
+	/* Medium information. */
+	struct		mtd_partition *partitions;
+	unsigned	partition_count;
+};
+#endif
-- 
cgit v1.2.3


From 9d4dde5215779f4099730194ad30624fdba3d8b2 Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Thu, 22 Dec 2011 23:39:14 +0000
Subject: net: only use a single page of slop in MAX_SKB_FRAGS

In order to accommodate a 64K buffer we need 64K/PAGE_SIZE plus one more page
in order to allow for a buffer which does not start on a page boundary.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 12e6fed73f8e..f47f0c3939f2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -128,13 +128,17 @@ struct sk_buff_head {
 
 struct sk_buff;
 
-/* To allow 64K frame to be packed as single skb without frag_list. Since
- * GRO uses frags we allocate at least 16 regardless of page size.
+/* To allow 64K frame to be packed as single skb without frag_list we
+ * require 64K/PAGE_SIZE pages plus 1 additional page to allow for
+ * buffers which do not start on a page boundary.
+ *
+ * Since GRO uses frags we allocate at least 16 regardless of page
+ * size.
  */
-#if (65536/PAGE_SIZE + 2) < 16
+#if (65536/PAGE_SIZE + 1) < 16
 #define MAX_SKB_FRAGS 16UL
 #else
-#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
+#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
 #endif
 
 typedef struct skb_frag_struct skb_frag_t;
-- 
cgit v1.2.3


From c87fb57346fc7653ace98769f148e0dcd88ac1ee Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Wed, 14 Dec 2011 23:43:16 +0100
Subject: ARM: 7235/1: irqdomain: export irq_domain_simple_ops for !CONFIG_OF

irqdomain support is used in interrupt controller drivers that may not
have device tree support but only need the basic HW->Linux irq
translation.  Rather than having each of these implement their own IRQ
domain, allow them to use the simple ops.

Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rob Herring <robherring2@gmail.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/irqdomain.h |  3 ++-
 kernel/irq/irqdomain.c    | 12 +++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 99834e581b9e..bd4272b61a14 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -91,10 +91,11 @@ static inline unsigned int irq_domain_to_irq(struct irq_domain *d,
 
 extern void irq_domain_add(struct irq_domain *domain);
 extern void irq_domain_del(struct irq_domain *domain);
+
+extern struct irq_domain_ops irq_domain_simple_ops;
 #endif /* CONFIG_IRQ_DOMAIN */
 
 #if defined(CONFIG_IRQ_DOMAIN) && defined(CONFIG_OF_IRQ)
-extern struct irq_domain_ops irq_domain_simple_ops;
 extern void irq_domain_add_simple(struct device_node *controller, int irq_base);
 extern void irq_domain_generate_simple(const struct of_device_id *match,
 					u64 phys_base, unsigned int irq_start);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 200ce832c585..7ca523b249ef 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -143,11 +143,6 @@ int irq_domain_simple_dt_translate(struct irq_domain *d,
 	return 0;
 }
 
-struct irq_domain_ops irq_domain_simple_ops = {
-	.dt_translate = irq_domain_simple_dt_translate,
-};
-EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
-
 /**
  * irq_domain_create_simple() - Set up a 'simple' translation range
  */
@@ -182,3 +177,10 @@ void irq_domain_generate_simple(const struct of_device_id *match,
 }
 EXPORT_SYMBOL_GPL(irq_domain_generate_simple);
 #endif /* CONFIG_OF_IRQ */
+
+struct irq_domain_ops irq_domain_simple_ops = {
+#ifdef CONFIG_OF_IRQ
+	.dt_translate = irq_domain_simple_dt_translate,
+#endif /* CONFIG_OF_IRQ */
+};
+EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
-- 
cgit v1.2.3


From 60b778ce519625102d3f72a2071ea72a05e990ce Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 24 Dec 2011 06:56:49 +0000
Subject: rfs: better sizing of dev_flow_table

Aim of this patch is to provide full range of rps_flow_cnt on 64bit arches.

Theorical limit on number of flows is 2^32

Fix some buggy RPS/RFS macros as well.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
CC: Xi Wang <xi.wang@gmail.com>
CC: Laurent Chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  8 ++++----
 net/core/net-sysfs.c      | 44 +++++++++++++++++++++++++++-----------------
 2 files changed, 31 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 603730804da5..a776a675c0e5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -597,7 +597,7 @@ struct rps_map {
 	struct rcu_head rcu;
 	u16 cpus[0];
 };
-#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
+#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
 
 /*
  * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
@@ -621,7 +621,7 @@ struct rps_dev_flow_table {
 	struct rps_dev_flow flows[0];
 };
 #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
-    (_num * sizeof(struct rps_dev_flow)))
+    ((_num) * sizeof(struct rps_dev_flow)))
 
 /*
  * The rps_sock_flow_table contains mappings of flows to the last CPU
@@ -632,7 +632,7 @@ struct rps_sock_flow_table {
 	u16 ents[0];
 };
 #define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \
-    (_num * sizeof(u16)))
+    ((_num) * sizeof(u16)))
 
 #define RPS_NO_CPU 0xffff
 
@@ -684,7 +684,7 @@ struct xps_map {
 	struct rcu_head rcu;
 	u16 queues[0];
 };
-#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16)))
+#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16)))
 #define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
     / sizeof(u16))
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4b4d0b0a3543..abf4393a77b3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -622,15 +622,15 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 					   char *buf)
 {
 	struct rps_dev_flow_table *flow_table;
-	unsigned int val = 0;
+	unsigned long val = 0;
 
 	rcu_read_lock();
 	flow_table = rcu_dereference(queue->rps_flow_table);
 	if (flow_table)
-		val = flow_table->mask + 1;
+		val = (unsigned long)flow_table->mask + 1;
 	rcu_read_unlock();
 
-	return sprintf(buf, "%u\n", val);
+	return sprintf(buf, "%lu\n", val);
 }
 
 static void rps_dev_flow_table_release_work(struct work_struct *work)
@@ -654,36 +654,46 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 				     struct rx_queue_attribute *attr,
 				     const char *buf, size_t len)
 {
-	unsigned int count;
-	char *endp;
+	unsigned long mask, count;
 	struct rps_dev_flow_table *table, *old_table;
 	static DEFINE_SPINLOCK(rps_dev_flow_lock);
+	int rc;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	count = simple_strtoul(buf, &endp, 0);
-	if (endp == buf)
-		return -EINVAL;
+	rc = kstrtoul(buf, 0, &count);
+	if (rc < 0)
+		return rc;
 
 	if (count) {
-		int i;
-
-		if (count > INT_MAX)
+		mask = count - 1;
+		/* mask = roundup_pow_of_two(count) - 1;
+		 * without overflows...
+		 */
+		while ((mask | (mask >> 1)) != mask)
+			mask |= (mask >> 1);
+		/* On 64 bit arches, must check mask fits in table->mask (u32),
+		 * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1)
+		 * doesnt overflow.
+		 */
+#if BITS_PER_LONG > 32
+		if (mask > (unsigned long)(u32)mask)
 			return -EINVAL;
-		count = roundup_pow_of_two(count);
-		if (count > (ULONG_MAX - sizeof(struct rps_dev_flow_table))
+#else
+		if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
 				/ sizeof(struct rps_dev_flow)) {
 			/* Enforce a limit to prevent overflow */
 			return -EINVAL;
 		}
-		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
+#endif
+		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
 		if (!table)
 			return -ENOMEM;
 
-		table->mask = count - 1;
-		for (i = 0; i < count; i++)
-			table->flows[i].cpu = RPS_NO_CPU;
+		table->mask = mask;
+		for (count = 0; count <= mask; count++)
+			table->flows[count].cpu = RPS_NO_CPU;
 	} else
 		table = NULL;
 
-- 
cgit v1.2.3


From 9413902796f56f6209e19dd54e840ed46950612c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Dec 2011 14:19:50 +0100
Subject: netfilter: add extended accounting infrastructure over nfnetlink

We currently have two ways to account traffic in netfilter:

- iptables chain and rule counters:

 # iptables -L -n -v
Chain INPUT (policy DROP 3 packets, 867 bytes)
 pkts bytes target     prot opt in     out     source               destination
    8  1104 ACCEPT     all  --  lo     *       0.0.0.0/0            0.0.0.0/0

- use flow-based accounting provided by ctnetlink:

 # conntrack -L
tcp      6 431999 ESTABLISHED src=192.168.1.130 dst=212.106.219.168 sport=58152 dport=80 packets=47 bytes=7654 src=212.106.219.168 dst=192.168.1.130 sport=80 dport=58152 packets=49 bytes=66340 [ASSURED] mark=0 use=1

While trying to display real-time accounting statistics, we require
to pool the kernel periodically to obtain this information. This is
OK if the number of flows is relatively low. However, in case that
the number of flows is huge, we can spend a considerable amount of
cycles to iterate over the list of flows that have been obtained.

Moreover, if we want to obtain the sum of the flow accounting results
that match some criteria, we have to iterate over the whole list of
existing flows, look for matchings and update the counters.

This patch adds the extended accounting infrastructure for
nfnetlink which aims to allow displaying real-time traffic accounting
without the need of complicated and resource-consuming implementation
in user-space. Basically, this new infrastructure allows you to create
accounting objects. One accounting object is composed of packet and
byte counters.

In order to manipulate create accounting objects, you require the
new libnetfilter_acct library. It contains several examples of use:

libnetfilter_acct/examples# ./nfacct-add http-traffic
libnetfilter_acct/examples# ./nfacct-get
http-traffic = { pkts = 000000000000,   bytes = 000000000000 };

Then, you can use one of this accounting objects in several iptables
rules using the new nfacct match (which comes in a follow-up patch):

 # iptables -I INPUT -p tcp --sport 80 -m nfacct --nfacct-name http-traffic
 # iptables -I OUTPUT -p tcp --dport 80 -m nfacct --nfacct-name http-traffic

The idea is simple: if one packet matches the rule, the nfacct match
updates the counters.

Thanks to Patrick McHardy, Eric Dumazet, Changli Gao for reviewing and
providing feedback for this contribution.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/Kbuild           |   1 +
 include/linux/netfilter/nfnetlink.h      |   3 +-
 include/linux/netfilter/nfnetlink_acct.h |  36 ++++
 net/netfilter/Kconfig                    |   8 +
 net/netfilter/Makefile                   |   1 +
 net/netfilter/nfnetlink_acct.c           | 352 +++++++++++++++++++++++++++++++
 6 files changed, 400 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/netfilter/nfnetlink_acct.h
 create mode 100644 net/netfilter/nfnetlink_acct.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index d81f7719b01c..6785246e6e62 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -7,6 +7,7 @@ header-y += nf_conntrack_tcp.h
 header-y += nf_conntrack_tuple_common.h
 header-y += nf_nat.h
 header-y += nfnetlink.h
+header-y += nfnetlink_acct.h
 header-y += nfnetlink_compat.h
 header-y += nfnetlink_conntrack.h
 header-y += nfnetlink_log.h
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 74d33861473c..b64454c2f79f 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -48,7 +48,8 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_ULOG		4
 #define NFNL_SUBSYS_OSF			5
 #define NFNL_SUBSYS_IPSET		6
-#define NFNL_SUBSYS_COUNT		7
+#define NFNL_SUBSYS_ACCT		7
+#define NFNL_SUBSYS_COUNT		8
 
 #ifdef __KERNEL__
 
diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
new file mode 100644
index 000000000000..7c4279b4ae7a
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -0,0 +1,36 @@
+#ifndef _NFNL_ACCT_H_
+#define _NFNL_ACCT_H_
+
+#ifndef NFACCT_NAME_MAX
+#define NFACCT_NAME_MAX		32
+#endif
+
+enum nfnl_acct_msg_types {
+	NFNL_MSG_ACCT_NEW,
+	NFNL_MSG_ACCT_GET,
+	NFNL_MSG_ACCT_GET_CTRZERO,
+	NFNL_MSG_ACCT_DEL,
+	NFNL_MSG_ACCT_MAX
+};
+
+enum nfnl_acct_type {
+	NFACCT_UNSPEC,
+	NFACCT_NAME,
+	NFACCT_PKTS,
+	NFACCT_BYTES,
+	NFACCT_USE,
+	__NFACCT_MAX
+};
+#define NFACCT_MAX (__NFACCT_MAX - 1)
+
+#ifdef __KERNEL__
+
+struct nf_acct;
+
+extern struct nf_acct *nfnl_acct_find_get(const char *filter_name);
+extern void nfnl_acct_put(struct nf_acct *acct);
+extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
+
+#endif /* __KERNEL__ */
+
+#endif /* _NFNL_ACCT_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d5597b759ba3..77326acd1f57 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -4,6 +4,14 @@ menu "Core Netfilter Configuration"
 config NETFILTER_NETLINK
 	tristate
 
+config NETFILTER_NETLINK_ACCT
+tristate "Netfilter NFACCT over NFNETLINK interface"
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK
+	help
+	  If this option is enabled, the kernel will include support
+	  for extended accounting via NFNETLINK.
+
 config NETFILTER_NETLINK_QUEUE
 	tristate "Netfilter NFQUEUE over NFNETLINK interface"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1a02853df863..4da1c879644f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
+obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
 obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
 obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
 
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
new file mode 100644
index 000000000000..362ab6ca3dc1
--- /dev/null
+++ b/net/netfilter/nfnetlink_acct.c
@@ -0,0 +1,352 @@
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <asm/atomic.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
+
+static LIST_HEAD(nfnl_acct_list);
+
+struct nf_acct {
+	atomic64_t		pkts;
+	atomic64_t		bytes;
+	struct list_head	head;
+	atomic_t		refcnt;
+	char			name[NFACCT_NAME_MAX];
+	struct rcu_head		rcu_head;
+};
+
+static int
+nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
+	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+	struct nf_acct *nfacct, *matching = NULL;
+	char *acct_name;
+
+	if (!tb[NFACCT_NAME])
+		return -EINVAL;
+
+	acct_name = nla_data(tb[NFACCT_NAME]);
+
+	list_for_each_entry(nfacct, &nfnl_acct_list, head) {
+		if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
+			continue;
+
+                if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+
+		matching = nfacct;
+		break;
+        }
+
+	if (matching) {
+		if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+			/* reset counters if you request a replacement. */
+			atomic64_set(&matching->pkts, 0);
+			atomic64_set(&matching->bytes, 0);
+			return 0;
+		}
+		return -EBUSY;
+	}
+
+	nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+	if (nfacct == NULL)
+		return -ENOMEM;
+
+	strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
+
+	if (tb[NFACCT_BYTES]) {
+		atomic64_set(&nfacct->bytes,
+			     be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES])));
+	}
+	if (tb[NFACCT_PKTS]) {
+		atomic64_set(&nfacct->pkts,
+			     be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS])));
+	}
+	atomic_set(&nfacct->refcnt, 1);
+	list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
+	return 0;
+}
+
+static int
+nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+		   int event, struct nf_acct *acct)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	unsigned int flags = pid ? NLM_F_MULTI : 0;
+	u64 pkts, bytes;
+
+	event |= NFNL_SUBSYS_ACCT << 8;
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	if (nlh == NULL)
+		goto nlmsg_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family = AF_UNSPEC;
+	nfmsg->version = NFNETLINK_V0;
+	nfmsg->res_id = 0;
+
+	NLA_PUT_STRING(skb, NFACCT_NAME, acct->name);
+
+	if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
+		pkts = atomic64_xchg(&acct->pkts, 0);
+		bytes = atomic64_xchg(&acct->bytes, 0);
+	} else {
+		pkts = atomic64_read(&acct->pkts);
+		bytes = atomic64_read(&acct->bytes);
+	}
+	NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts));
+	NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes));
+	NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)));
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int
+nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_acct *cur, *last;
+
+	if (cb->args[2])
+		return 0;
+
+	last = (struct nf_acct *)cb->args[1];
+	if (cb->args[1])
+		cb->args[1] = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+		if (last && cur != last)
+			continue;
+
+		if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
+				       cb->nlh->nlmsg_seq,
+				       NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+				       NFNL_MSG_ACCT_NEW, cur) < 0) {
+			cb->args[1] = (unsigned long)cur;
+			break;
+		}
+	}
+	if (!cb->args[1])
+		cb->args[2] = 1;
+	rcu_read_unlock();
+	return skb->len;
+}
+
+static int
+nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
+	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+	int ret = 0;
+	struct nf_acct *cur;
+	char *acct_name;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump,
+					  NULL, 0);
+	}
+
+	if (!tb[NFACCT_NAME])
+		return -EINVAL;
+	acct_name = nla_data(tb[NFACCT_NAME]);
+
+	list_for_each_entry(cur, &nfnl_acct_list, head) {
+		struct sk_buff *skb2;
+
+		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+			continue;
+
+		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (skb2 == NULL)
+			break;
+
+		ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
+					 nlh->nlmsg_seq,
+					 NFNL_MSG_TYPE(nlh->nlmsg_type),
+					 NFNL_MSG_ACCT_NEW, cur);
+		if (ret <= 0)
+			kfree_skb(skb2);
+
+		break;
+	}
+	return ret;
+}
+
+/* try to delete object, fail if it is still in use. */
+static int nfnl_acct_try_del(struct nf_acct *cur)
+{
+	int ret = 0;
+
+	/* we want to avoid races with nfnl_acct_find_get. */
+	if (atomic_dec_and_test(&cur->refcnt)) {
+		/* We are protected by nfnl mutex. */
+		list_del_rcu(&cur->head);
+		kfree_rcu(cur, rcu_head);
+	} else {
+		/* still in use, restore reference counter. */
+		atomic_inc(&cur->refcnt);
+		ret = -EBUSY;
+	}
+	return ret;
+}
+
+static int
+nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
+	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+	char *acct_name;
+	struct nf_acct *cur;
+	int ret = -ENOENT;
+
+	if (!tb[NFACCT_NAME]) {
+		list_for_each_entry(cur, &nfnl_acct_list, head)
+			nfnl_acct_try_del(cur);
+
+		return 0;
+	}
+	acct_name = nla_data(tb[NFACCT_NAME]);
+
+	list_for_each_entry(cur, &nfnl_acct_list, head) {
+		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
+			continue;
+
+		ret = nfnl_acct_try_del(cur);
+		if (ret < 0)
+			return ret;
+
+		break;
+	}
+	return ret;
+}
+
+static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
+	[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
+	[NFACCT_BYTES] = { .type = NLA_U64 },
+	[NFACCT_PKTS] = { .type = NLA_U64 },
+};
+
+static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
+	[NFNL_MSG_ACCT_NEW]		= { .call = nfnl_acct_new,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
+	[NFNL_MSG_ACCT_GET] 		= { .call = nfnl_acct_get,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
+	[NFNL_MSG_ACCT_GET_CTRZERO] 	= { .call = nfnl_acct_get,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
+	[NFNL_MSG_ACCT_DEL]		= { .call = nfnl_acct_del,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
+};
+
+static const struct nfnetlink_subsystem nfnl_acct_subsys = {
+	.name				= "acct",
+	.subsys_id			= NFNL_SUBSYS_ACCT,
+	.cb_count			= NFNL_MSG_ACCT_MAX,
+	.cb				= nfnl_acct_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
+
+struct nf_acct *nfnl_acct_find_get(const char *acct_name)
+{
+	struct nf_acct *cur, *acct = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+			continue;
+
+		if (!try_module_get(THIS_MODULE))
+			goto err;
+
+		if (!atomic_inc_not_zero(&cur->refcnt)) {
+			module_put(THIS_MODULE);
+			goto err;
+		}
+
+		acct = cur;
+		break;
+	}
+err:
+	rcu_read_unlock();
+	return acct;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
+
+void nfnl_acct_put(struct nf_acct *acct)
+{
+	atomic_dec(&acct->refcnt);
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_put);
+
+void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+	atomic64_inc(&nfacct->pkts);
+	atomic64_add(skb->len, &nfacct->bytes);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_update);
+
+static int __init nfnl_acct_init(void)
+{
+	int ret;
+
+	pr_info("nfnl_acct: registering with nfnetlink.\n");
+	ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
+	if (ret < 0) {
+		pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
+		goto err_out;
+	}
+	return 0;
+err_out:
+	return ret;
+}
+
+static void __exit nfnl_acct_exit(void)
+{
+	struct nf_acct *cur, *tmp;
+
+	pr_info("nfnl_acct: unregistering from nfnetlink.\n");
+	nfnetlink_subsys_unregister(&nfnl_acct_subsys);
+
+	list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
+		list_del_rcu(&cur->head);
+		/* We are sure that our objects have no clients at this point,
+		 * it's safe to release them all without checking refcnt. */
+		kfree_rcu(cur, rcu_head);
+	}
+}
+
+module_init(nfnl_acct_init);
+module_exit(nfnl_acct_exit);
-- 
cgit v1.2.3


From ceb98d03eac5704820f2ac1f370c9ff385e3a9f5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Dec 2011 14:28:59 +0100
Subject: netfilter: xtables: add nfacct match to support extended accounting

This patch adds the match that allows to perform extended
accounting. It requires the new nfnetlink_acct infrastructure.

 # iptables -I INPUT -p tcp --sport 80 -m nfacct --nfacct-name http-traffic
 # iptables -I OUTPUT -p tcp --dport 80 -m nfacct --nfacct-name http-traffic

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/Kbuild      |  1 +
 include/linux/netfilter/xt_nfacct.h | 13 +++++++
 net/netfilter/Kconfig               | 10 +++++
 net/netfilter/Makefile              |  1 +
 net/netfilter/xt_nfacct.c           | 76 +++++++++++++++++++++++++++++++++++++
 5 files changed, 101 insertions(+)
 create mode 100644 include/linux/netfilter/xt_nfacct.h
 create mode 100644 net/netfilter/xt_nfacct.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 6785246e6e62..e630a2ed4f18 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -23,6 +23,7 @@ header-y += xt_DSCP.h
 header-y += xt_IDLETIMER.h
 header-y += xt_LED.h
 header-y += xt_MARK.h
+header-y += xt_nfacct.h
 header-y += xt_NFLOG.h
 header-y += xt_NFQUEUE.h
 header-y += xt_RATEEST.h
diff --git a/include/linux/netfilter/xt_nfacct.h b/include/linux/netfilter/xt_nfacct.h
new file mode 100644
index 000000000000..3e19c8a86576
--- /dev/null
+++ b/include/linux/netfilter/xt_nfacct.h
@@ -0,0 +1,13 @@
+#ifndef _XT_NFACCT_MATCH_H
+#define _XT_NFACCT_MATCH_H
+
+#include <linux/netfilter/nfnetlink_acct.h>
+
+struct nf_acct;
+
+struct xt_nfacct_match_info {
+	char		name[NFACCT_NAME_MAX];
+	struct nf_acct	*nfacct;
+};
+
+#endif /* _XT_NFACCT_MATCH_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 77326acd1f57..bac93ba60778 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -887,6 +887,16 @@ config NETFILTER_XT_MATCH_MULTIPORT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_NFACCT
+	tristate '"nfacct" match support'
+	default m if NETFILTER_ADVANCED=n
+	select NETFILTER_NETLINK_ACCT
+	help
+	  This option allows you to use the extended accounting through
+	  nfnetlink_acct.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_OSF
 	tristate '"osf" Passive OS fingerprint match'
 	depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 4da1c879644f..b2eee4df8168 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -91,6 +91,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
new file mode 100644
index 000000000000..b3be0ef21f19
--- /dev/null
+++ b/net/netfilter/xt_nfacct.c
@@ -0,0 +1,76 @@
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 (or any
+ * later at your option) as published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+#include <linux/netfilter/xt_nfacct.h>
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_nfacct");
+MODULE_ALIAS("ip6t_nfacct");
+
+static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_nfacct_match_info *info = par->targinfo;
+
+	nfnl_acct_update(skb, info->nfacct);
+
+	return true;
+}
+
+static int
+nfacct_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_nfacct_match_info *info = par->matchinfo;
+	struct nf_acct *nfacct;
+
+	nfacct = nfnl_acct_find_get(info->name);
+	if (nfacct == NULL) {
+		pr_info("xt_nfacct: accounting object with name `%s' "
+			"does not exists\n", info->name);
+		return -ENOENT;
+	}
+	info->nfacct = nfacct;
+	return 0;
+}
+
+static void
+nfacct_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_nfacct_match_info *info = par->matchinfo;
+
+	nfnl_acct_put(info->nfacct);
+}
+
+static struct xt_match nfacct_mt_reg __read_mostly = {
+	.name       = "nfacct",
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = nfacct_mt_checkentry,
+	.match      = nfacct_mt,
+	.destroy    = nfacct_mt_destroy,
+	.matchsize  = sizeof(struct xt_nfacct_match_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init nfacct_mt_init(void)
+{
+	return xt_register_match(&nfacct_mt_reg);
+}
+
+static void __exit nfacct_mt_exit(void)
+{
+	xt_unregister_match(&nfacct_mt_reg);
+}
+
+module_init(nfacct_mt_init);
+module_exit(nfacct_mt_exit);
-- 
cgit v1.2.3


From 0f966d74cf77a9140a025464a287e1d2fee8a1fc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 23 Dec 2011 01:23:30 +0100
Subject: PM / shmobile: Don't include SH7372's INTCS in syscore suspend/resume

Since the SH7372's INTCS in included into syscore suspend/resume,
which causes the chip to be accessed when PM domains have been
turned off during system suspend, the A4R domain containing the
INTCS has to stay on during system sleep, which is suboptimal
from the power consumption point of view.

For this reason, add a new INTC flag, skip_syscore_suspend, to mark
the INTCS for intc_suspend() and intc_resume(), so that they don't
touch it.  This allows the A4R domain to be turned off during
system suspend and the INTCS state is resrored during system
resume by the A4R's "power on" code.

Suggested-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Magnus Damm <damm@opensource.se>
---
 arch/arm/mach-shmobile/intc-sh7372.c | 1 +
 drivers/sh/intc/core.c               | 8 ++++++++
 drivers/sh/intc/internals.h          | 1 +
 include/linux/sh_intc.h              | 1 +
 4 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c
index d087b31b5d12..89afcaba99a1 100644
--- a/arch/arm/mach-shmobile/intc-sh7372.c
+++ b/arch/arm/mach-shmobile/intc-sh7372.c
@@ -535,6 +535,7 @@ static struct resource intcs_resources[] __initdata = {
 static struct intc_desc intcs_desc __initdata = {
 	.name = "sh7372-intcs",
 	.force_enable = ENABLED_INTCS,
+	.skip_syscore_suspend = true,
 	.resource = intcs_resources,
 	.num_resources = ARRAY_SIZE(intcs_resources),
 	.hw = INTC_HW_DESC(intcs_vectors, intcs_groups, intcs_mask_registers,
diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c
index 8b7a141ff35e..be5a025eeca3 100644
--- a/drivers/sh/intc/core.c
+++ b/drivers/sh/intc/core.c
@@ -354,6 +354,8 @@ int __init register_intc_controller(struct intc_desc *desc)
 	if (desc->force_enable)
 		intc_enable_disable_enum(desc, d, desc->force_enable, 1);
 
+	d->skip_suspend = desc->skip_syscore_suspend;
+
 	nr_intc_controllers++;
 
 	return 0;
@@ -386,6 +388,9 @@ static int intc_suspend(void)
 	list_for_each_entry(d, &intc_list, list) {
 		int irq;
 
+		if (d->skip_suspend)
+			continue;
+
 		/* enable wakeup irqs belonging to this intc controller */
 		for_each_active_irq(irq) {
 			struct irq_data *data;
@@ -409,6 +414,9 @@ static void intc_resume(void)
 	list_for_each_entry(d, &intc_list, list) {
 		int irq;
 
+		if (d->skip_suspend)
+			continue;
+
 		for_each_active_irq(irq) {
 			struct irq_data *data;
 			struct irq_chip *chip;
diff --git a/drivers/sh/intc/internals.h b/drivers/sh/intc/internals.h
index 5b934851efa8..b3fe1cf25a28 100644
--- a/drivers/sh/intc/internals.h
+++ b/drivers/sh/intc/internals.h
@@ -67,6 +67,7 @@ struct intc_desc_int {
 	struct intc_window *window;
 	unsigned int nr_windows;
 	struct irq_chip chip;
+	bool skip_suspend;
 };
 
 
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 5812fefbcedf..b160645f5599 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -95,6 +95,7 @@ struct intc_desc {
 	unsigned int num_resources;
 	intc_enum force_enable;
 	intc_enum force_disable;
+	bool skip_syscore_suspend;
 	struct intc_hw_desc hw;
 };
 
-- 
cgit v1.2.3


From 40a5f8be2f482783de0f1f0fe856660e489734a8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 23 Dec 2011 01:23:52 +0100
Subject: PM / QoS: Introduce dev_pm_qos_add_ancestor_request()

Some devices, like the I2C controller on SH7372, are not
necessary for providing power to their children or forwarding
wakeup signals (and generally interrupts) from them.  They are
only needed by their children when there's some data to transfer,
so they may be suspended for the majority of time and resumed
on demand, when the children have data to send or receive.  For this
purpose, however, their power.ignore_children flags have to be set,
or the PM core wouldn't allow them to be suspended while their
children were active.

Unfortunately, in some situations it may take too much time to
resume such devices so that they can assist their children in
transferring data.  For example, if such a device belongs to a PM
domain which goes to the "power off" state when that device is
suspended, it may take too much time to restore power to the
domain in response to the request from one of the device's
children.  In that case, if the parent's resume time is critical,
the domain should stay in the "power on" state, although it still may
be desirable to power manage the parent itself (e.g. by manipulating
its clock).

In general, device PM QoS may be used to address this problem.
Namely, if the device's children added PM QoS latency constraints
for it, they would be able to prevent it from being put into an
overly deep low-power state.  However, in some cases the devices
needing to be serviced are not the immediate children of a
"children-ignoring" device, but its grandchildren or even less
direct descendants.  In those cases, the entity wanting to add a
PM QoS request for a given device's ancestor that ignores its
children will have to find it in the first place, so introduce a new
helper function that may be used to achieve that.  This function,
dev_pm_qos_add_ancestor_request(), will search for the first
ancestor of the given device whose power.ignore_children flag is
set and will add a device PM QoS latency request for that ancestor
on behalf of the caller.  The request added this way may be removed
with the help of dev_pm_qos_remove_request() in the future, like
any other device PM QoS latency request.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/qos.c | 25 +++++++++++++++++++++++++
 include/linux/pm_qos.h   |  5 +++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 86de6c50fc41..edf7687615e8 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -412,3 +412,28 @@ int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier)
 	return blocking_notifier_chain_unregister(&dev_pm_notifiers, notifier);
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_remove_global_notifier);
+
+/**
+ * dev_pm_qos_add_ancestor_request - Add PM QoS request for device's ancestor.
+ * @dev: Device whose ancestor to add the request for.
+ * @req: Pointer to the preallocated handle.
+ * @value: Constraint latency value.
+ */
+int dev_pm_qos_add_ancestor_request(struct device *dev,
+				    struct dev_pm_qos_request *req, s32 value)
+{
+	struct device *ancestor = dev->parent;
+	int error = -ENODEV;
+
+	while (ancestor && !ancestor->power.ignore_children)
+		ancestor = ancestor->parent;
+
+	if (ancestor)
+		error = dev_pm_qos_add_request(ancestor, req, value);
+
+	if (error)
+		req->dev = NULL;
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request);
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 83b0ea302a80..fe247b33652d 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -91,6 +91,8 @@ int dev_pm_qos_add_global_notifier(struct notifier_block *notifier);
 int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier);
 void dev_pm_qos_constraints_init(struct device *dev);
 void dev_pm_qos_constraints_destroy(struct device *dev);
+int dev_pm_qos_add_ancestor_request(struct device *dev,
+				    struct dev_pm_qos_request *req, s32 value);
 #else
 static inline int pm_qos_update_target(struct pm_qos_constraints *c,
 				       struct plist_node *node,
@@ -150,6 +152,9 @@ static inline void dev_pm_qos_constraints_destroy(struct device *dev)
 {
 	dev->power.power_state = PMSG_INVALID;
 }
+static inline int dev_pm_qos_add_ancestor_request(struct device *dev,
+				    struct dev_pm_qos_request *req, s32 value)
+			{ return 0; }
 #endif
 
 #endif
-- 
cgit v1.2.3


From 4d25a066b69fb749a39d0d4c610689dd765a0b0e Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Wed, 21 Dec 2011 12:28:29 +0100
Subject: KVM: Don't automatically expose the TSC deadline timer in cpuid

Unlike all of the other cpuid bits, the TSC deadline timer bit is set
unconditionally, regardless of what userspace wants.

This is broken in several ways:
 - if userspace doesn't use KVM_CREATE_IRQCHIP, and doesn't emulate the TSC
   deadline timer feature, a guest that uses the feature will break
 - live migration to older host kernels that don't support the TSC deadline
   timer will cause the feature to be pulled from under the guest's feet;
   breaking it
 - guests that are broken wrt the feature will fail.

Fix by not enabling the feature automatically; instead report it to userspace.
Because the feature depends on KVM_CREATE_IRQCHIP, which we cannot guarantee
will be called, we expose it via a KVM_CAP_TSC_DEADLINE_TIMER and not
KVM_GET_SUPPORTED_CPUID.

Fixes the Illumos guest kernel, which uses the TSC deadline timer feature.

[avi: add the KVM_CAP + documentation]

Reported-by: Alexey Zaytsev <alexey.zaytsev@gmail.com>
Tested-by: Alexey Zaytsev <alexey.zaytsev@gmail.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  9 +++++++++
 arch/x86/kvm/x86.c                | 19 +++++++++----------
 include/linux/kvm.h               |  1 +
 3 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4df9af4f6132..e2a4b5287361 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1100,6 +1100,15 @@ emulate them efficiently. The fields in each entry are defined as follows:
    eax, ebx, ecx, edx: the values returned by the cpuid instruction for
          this function/index combination
 
+The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
+as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
+support.  Instead it is reported via
+
+  ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
+
+if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
+feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
+
 4.47 KVM_PPC_GET_PVINFO
 
 Capability: KVM_CAP_PPC_GET_PVINFO
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c38efd7b792e..4c938da2ba00 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -602,7 +602,6 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	u32 timer_mode_mask;
 
 	best = kvm_find_cpuid_entry(vcpu, 1, 0);
 	if (!best)
@@ -615,15 +614,12 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
 			best->ecx |= bit(X86_FEATURE_OSXSAVE);
 	}
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-		best->function == 0x1) {
-		best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
-		timer_mode_mask = 3 << 17;
-	} else
-		timer_mode_mask = 1 << 17;
-
-	if (apic)
-		apic->lapic_timer.timer_mode_mask = timer_mode_mask;
+	if (apic) {
+		if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
+			apic->lapic_timer.timer_mode_mask = 3 << 17;
+		else
+			apic->lapic_timer.timer_mode_mask = 1 << 17;
+	}
 }
 
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -2135,6 +2131,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_TSC_CONTROL:
 		r = kvm_has_tsc_control;
 		break;
+	case KVM_CAP_TSC_DEADLINE_TIMER:
+		r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
+		break;
 	default:
 		r = 0;
 		break;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c3892fc1d538..68e67e50d028 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -557,6 +557,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_MAX_VCPUS 66       /* returns max vcpus per vm */
 #define KVM_CAP_PPC_PAPR 68
 #define KVM_CAP_S390_GMAP 71
+#define KVM_CAP_TSC_DEADLINE_TIMER 72
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From b3b73ec0d7fe5bf8f950232aa58dfa0416a62372 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 26 Dec 2011 00:29:55 +0100
Subject: PM / Freezer: fix return value of
 freezable_schedule_timeout_killable()

...it should return the return code from schedule_timeout_killable(),
not the one from freezer_count().

All of the current callers ignore the return code so the bug is
harmless but it's worth fixing.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/freezer.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 7bcfe73d999b..0ab54e16a91f 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -116,9 +116,11 @@ static inline int freezer_should_skip(struct task_struct *p)
 /* Like schedule_timeout_killable(), but should not block the freezer. */
 #define freezable_schedule_timeout_killable(timeout)			\
 ({									\
+	long __retval;							\
 	freezer_do_not_count();						\
-	schedule_timeout_killable(timeout);				\
+	__retval = schedule_timeout_killable(timeout);			\
 	freezer_count();						\
+	__retval;							\
 })
 
 /*
-- 
cgit v1.2.3


From d6185f20a0efbf175e12831d0de330e4f21725aa Mon Sep 17 00:00:00 2001
From: Nadav Har'El <nyh@il.ibm.com>
Date: Thu, 22 Sep 2011 13:52:56 +0300
Subject: KVM: nVMX: Add KVM_REQ_IMMEDIATE_EXIT

This patch adds a new vcpu->requests bit, KVM_REQ_IMMEDIATE_EXIT.
This bit requests that when next entering the guest, we should run it only
for as little as possible, and exit again.

We use this new option in nested VMX: When L1 launches L2, but L0 wishes L1
to continue running so it can inject an event to it, we unfortunately cannot
just pretend to have run L2 for a little while - We must really launch L2,
otherwise certain one-off vmcs12 parameters (namely, L1 injection into L2)
will be lost. So the existing code runs L2 in this case.
But L2 could potentially run for a long time until it exits, and the
injection into L1 will be delayed. The new KVM_REQ_IMMEDIATE_EXIT allows us
to request that L2 will be entered, as necessary, but will exit as soon as
possible after entry.

Our implementation of this request uses smp_send_reschedule() to send a
self-IPI, with interrupts disabled. The interrupts remain disabled until the
guest is entered, and then, after the entry is complete (often including
processing an injection and jumping to the relevant handler), the physical
interrupt is noticed and causes an exit.

On recent Intel processors, we could have achieved the same goal by using
MTF instead of a self-IPI. Another technique worth considering in the future
is to use VM_EXIT_ACK_INTR_ON_EXIT and a highest-priority vector IPI - to
slightly improve performance by avoiding the useless interrupt handler
which ends up being called when smp_send_reschedule() is used.

Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c       | 11 +++++++----
 arch/x86/kvm/x86.c       |  7 ++++++-
 include/linux/kvm_host.h |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 579a0b51696a..d75d91465246 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3945,12 +3945,15 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
-		/* We can get here when nested_run_pending caused
-		 * vmx_interrupt_allowed() to return false. In this case, do
-		 * nothing - the interrupt will be injected later.
+	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
+		/*
+		 * We get here if vmx_interrupt_allowed() said we can't
+		 * inject to L1 now because L2 must run. Ask L2 to exit
+		 * right after entry, so we can inject to L1 more promptly.
 		 */
+		kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
 		return;
+	}
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4c938da2ba00..e24edbc7f2ec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5648,6 +5648,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	int r;
 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
 		vcpu->run->request_interrupt_window;
+	bool req_immediate_exit = 0;
 
 	if (vcpu->requests) {
 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
@@ -5687,7 +5688,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			record_steal_time(vcpu);
 		if (kvm_check_request(KVM_REQ_NMI, vcpu))
 			process_nmi(vcpu);
-
+		req_immediate_exit =
+			kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
 	}
 
 	r = kvm_mmu_reload(vcpu);
@@ -5738,6 +5740,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
+	if (req_immediate_exit)
+		smp_send_reschedule(vcpu->cpu);
+
 	kvm_guest_enter();
 
 	if (unlikely(vcpu->arch.switch_db_regs)) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d52623199978..9fedeb356b95 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -50,6 +50,7 @@
 #define KVM_REQ_APF_HALT          12
 #define KVM_REQ_STEAL_UPDATE      13
 #define KVM_REQ_NMI               14
+#define KVM_REQ_IMMEDIATE_EXIT    15
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
cgit v1.2.3


From b297e672e24687546ac74af5ae5e8c4a022b9806 Mon Sep 17 00:00:00 2001
From: Eric B Munson <emunson@mgebm.net>
Date: Mon, 10 Oct 2011 11:46:15 -0400
Subject: KVM: Fix include dependency for mmu_notifier

The kvm_host struct can include an mmu_notifier struct but mmu_notifier.h is
not included directly.

Signed-off-by: Eric B Munson <emunson@mgebm.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9fedeb356b95..c6a2ec925846 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -14,6 +14,7 @@
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/mmu_notifier.h>
 #include <linux/preempt.h>
 #include <linux/msi.h>
 #include <linux/slab.h>
-- 
cgit v1.2.3


From 7850ac5420803996e2960d15b924021f28e0dffc Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 14 Nov 2011 18:23:34 +0900
Subject: KVM: Count the number of dirty pages for dirty logging

Needed for the next patch which uses this number to decide how to write
protect a slot.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c       | 9 +++------
 include/linux/kvm_host.h | 1 +
 virt/kvm/kvm_main.c      | 4 +++-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a3b25a524c9b..220c83b0fbda 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3466,10 +3466,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				      struct kvm_dirty_log *log)
 {
-	int r, i;
+	int r;
 	struct kvm_memory_slot *memslot;
 	unsigned long n;
-	unsigned long is_dirty = 0;
 
 	mutex_lock(&kvm->slots_lock);
 
@@ -3484,11 +3483,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 	n = kvm_dirty_bitmap_bytes(memslot);
 
-	for (i = 0; !is_dirty && i < n/sizeof(long); i++)
-		is_dirty = memslot->dirty_bitmap[i];
-
 	/* If nothing is dirty, don't bother messing with page tables. */
-	if (is_dirty) {
+	if (memslot->nr_dirty_pages) {
 		struct kvm_memslots *slots, *old_slots;
 		unsigned long *dirty_bitmap;
 
@@ -3503,6 +3499,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 			goto out;
 		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
 		slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
+		slots->memslots[log->slot].nr_dirty_pages = 0;
 		slots->generation++;
 
 		old_slots = kvm->memslots;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c6a2ec925846..7c654aa46b6c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -181,6 +181,7 @@ struct kvm_memory_slot {
 	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
 	unsigned long *dirty_bitmap_head;
+	unsigned long nr_dirty_pages;
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 	unsigned long userspace_addr;
 	int user_alloc;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4c5b9a239674..af5c988cafcc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -625,6 +625,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 		return -ENOMEM;
 
 	memslot->dirty_bitmap_head = memslot->dirty_bitmap;
+	memslot->nr_dirty_pages = 0;
 	return 0;
 }
 #endif /* !CONFIG_S390 */
@@ -1491,7 +1492,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
-		__set_bit_le(rel_gfn, memslot->dirty_bitmap);
+		if (!__test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap))
+			memslot->nr_dirty_pages++;
 	}
 }
 
-- 
cgit v1.2.3


From 93a5cef07d686a0341d056b0f930a762c7174a13 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 24 Nov 2011 17:37:48 +0800
Subject: KVM: introduce KVM_MEM_SLOTS_NUM macro

Introduce KVM_MEM_SLOTS_NUM macro to instead of
KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 4 +++-
 arch/x86/kvm/mmu.c              | 2 +-
 include/linux/kvm_host.h        | 7 +++++--
 virt/kvm/kvm_main.c             | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 69b652547489..1769f3dde611 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -31,6 +31,8 @@
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+
 #define KVM_MMIO_SIZE 16
 
 #define KVM_PIO_PAGE_OFFSET 1
@@ -228,7 +230,7 @@ struct kvm_mmu_page {
 	 * One bit set per slot which has memory
 	 * in this shadow page.
 	 */
-	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
+	DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM);
 	bool unsync;
 	int root_count;          /* Currently serving as active root */
 	unsigned int unsync_children;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aecdea265f7e..715dcb4fb798 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1349,7 +1349,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 						  PAGE_SIZE);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
-	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
+	bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM);
 	sp->parent_ptes = 0;
 	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7c654aa46b6c..924df0d7ac5f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -227,11 +227,14 @@ struct kvm_irq_routing_table {};
 
 #endif
 
+#ifndef KVM_MEM_SLOTS_NUM
+#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+#endif
+
 struct kvm_memslots {
 	int nmemslots;
 	u64 generation;
-	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
-					KVM_PRIVATE_MEM_SLOTS];
+	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
 };
 
 struct kvm {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index af5c988cafcc..9ad94c9996e7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -663,7 +663,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 			(void __user *)(unsigned long)mem->userspace_addr,
 			mem->memory_size)))
 		goto out;
-	if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+	if (mem->slot >= KVM_MEM_SLOTS_NUM)
 		goto out;
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
 		goto out;
-- 
cgit v1.2.3


From be593d6286075801bba6d60fa466a39c24cc7616 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 24 Nov 2011 17:38:24 +0800
Subject: KVM: introduce update_memslots function

Introduce update_memslots to update slot which will be update to
kvm->memslots

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c       |  2 +-
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 22 +++++++++++++++-------
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index af546b768ffd..917a287d21c8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3546,7 +3546,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 		memslot = &slots->memslots[log->slot];
 		memslot->dirty_bitmap = dirty_bitmap;
 		memslot->nr_dirty_pages = 0;
-		slots->generation++;
+		update_memslots(slots, NULL);
 
 		old_slots = kvm->memslots;
 		rcu_assign_pointer(kvm->memslots, slots);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 924df0d7ac5f..23f795c66220 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -320,6 +320,7 @@ void kvm_exit(void);
 
 void kvm_get_kvm(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
+void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new);
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9ad94c9996e7..b5ed7770ced3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -630,6 +630,19 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 }
 #endif /* !CONFIG_S390 */
 
+void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
+{
+	if (new) {
+		int id = new->id;
+
+		slots->memslots[id] = *new;
+		if (id >= slots->nmemslots)
+			slots->nmemslots = id + 1;
+	}
+
+	slots->generation++;
+}
+
 /*
  * Allocate some memory and give it an address in the guest physical address
  * space.
@@ -780,10 +793,8 @@ skip_lpage:
 				GFP_KERNEL);
 		if (!slots)
 			goto out_free;
-		if (mem->slot >= slots->nmemslots)
-			slots->nmemslots = mem->slot + 1;
-		slots->generation++;
 		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+		update_memslots(slots, NULL);
 
 		old_memslots = kvm->memslots;
 		rcu_assign_pointer(kvm->memslots, slots);
@@ -815,9 +826,6 @@ skip_lpage:
 			GFP_KERNEL);
 	if (!slots)
 		goto out_free;
-	if (mem->slot >= slots->nmemslots)
-		slots->nmemslots = mem->slot + 1;
-	slots->generation++;
 
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (!npages) {
@@ -827,7 +835,7 @@ skip_lpage:
 			new.lpage_info[i] = NULL;
 	}
 
-	slots->memslots[mem->slot] = new;
+	update_memslots(slots, &new);
 	old_memslots = kvm->memslots;
 	rcu_assign_pointer(kvm->memslots, slots);
 	synchronize_srcu_expedited(&kvm->srcu);
-- 
cgit v1.2.3


From be6ba0f0962a39091c52eb9167ddea201fe80716 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 24 Nov 2011 17:39:18 +0800
Subject: KVM: introduce kvm_for_each_memslot macro

Introduce kvm_for_each_memslot to walk all valid memslot

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |  6 ++----
 arch/x86/kvm/mmu.c       | 12 ++++++------
 include/linux/kvm_host.h |  4 ++++
 virt/kvm/iommu.c         | 17 +++++++++--------
 virt/kvm/kvm_main.c      | 14 ++++++--------
 5 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 43f4c92816ef..42ad1f9c9f01 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1366,14 +1366,12 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
-	int i, j;
+	int j;
 	unsigned long base_gfn;
 
 	slots = kvm_memslots(kvm);
-	for (i = 0; i < slots->nmemslots; i++) {
-		memslot = &slots->memslots[i];
+	kvm_for_each_memslot(memslot, slots) {
 		base_gfn = memslot->base_gfn;
-
 		for (j = 0; j < memslot->npages; j++) {
 			if (memslot->rmap[j])
 				put_page((struct page *)memslot->rmap[j]);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 715dcb4fb798..d737443cdfdb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1128,15 +1128,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
 					 unsigned long data))
 {
-	int i, j;
+	int j;
 	int ret;
 	int retval = 0;
 	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
 
 	slots = kvm_memslots(kvm);
 
-	for (i = 0; i < slots->nmemslots; i++) {
-		struct kvm_memory_slot *memslot = &slots->memslots[i];
+	kvm_for_each_memslot(memslot, slots) {
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
@@ -3985,15 +3985,15 @@ nomem:
  */
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 {
-	int i;
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
 	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
 
 	slots = kvm_memslots(kvm);
 
-	for (i = 0; i < slots->nmemslots; i++)
-		nr_pages += slots->memslots[i].npages;
+	kvm_for_each_memslot(memslot, slots)
+		nr_pages += memslot->npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 23f795c66220..392af47a4353 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -308,6 +308,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
 	     idx++)
 
+#define kvm_for_each_memslot(memslot, slots)	\
+	for (memslot = &slots->memslots[0];	\
+	      memslot < slots->memslots + (slots)->nmemslots; memslot++)
+
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index a195c07fa829..4e5f7b7f1d2b 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -134,14 +134,15 @@ unmap_pages:
 
 static int kvm_iommu_map_memslots(struct kvm *kvm)
 {
-	int i, idx, r = 0;
+	int idx, r = 0;
 	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
 
 	idx = srcu_read_lock(&kvm->srcu);
 	slots = kvm_memslots(kvm);
 
-	for (i = 0; i < slots->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
+	kvm_for_each_memslot(memslot, slots) {
+		r = kvm_iommu_map_pages(kvm, memslot);
 		if (r)
 			break;
 	}
@@ -311,16 +312,16 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 {
-	int i, idx;
+	int idx;
 	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
 
 	idx = srcu_read_lock(&kvm->srcu);
 	slots = kvm_memslots(kvm);
 
-	for (i = 0; i < slots->nmemslots; i++) {
-		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
-				    slots->memslots[i].npages);
-	}
+	kvm_for_each_memslot(memslot, slots)
+		kvm_iommu_put_pages(kvm, memslot->base_gfn, memslot->npages);
+
 	srcu_read_unlock(&kvm->srcu, idx);
 
 	return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b5ed7770ced3..4c2900c5d81d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -547,11 +547,11 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 
 void kvm_free_physmem(struct kvm *kvm)
 {
-	int i;
 	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memory_slot *memslot;
 
-	for (i = 0; i < slots->nmemslots; ++i)
-		kvm_free_physmem_slot(&slots->memslots[i], NULL);
+	kvm_for_each_memslot(memslot, slots)
+		kvm_free_physmem_slot(memslot, NULL);
 
 	kfree(kvm->memslots);
 }
@@ -975,15 +975,13 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
 						gfn_t gfn)
 {
-	int i;
-
-	for (i = 0; i < slots->nmemslots; ++i) {
-		struct kvm_memory_slot *memslot = &slots->memslots[i];
+	struct kvm_memory_slot *memslot;
 
+	kvm_for_each_memslot(memslot, slots)
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
 			return memslot;
-	}
+
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 28a37544fb0223eb9805d2567b88f7360edec52a Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong.eric@gmail.com>
Date: Thu, 24 Nov 2011 19:04:35 +0800
Subject: KVM: introduce id_to_memslot function

Introduce id_to_memslot to get memslot by slot id

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  |  2 +-
 arch/powerpc/kvm/book3s.c |  2 +-
 arch/x86/kvm/vmx.c        |  6 ++++--
 arch/x86/kvm/x86.c        | 18 +++++++++---------
 include/linux/kvm_host.h  |  6 ++++++
 virt/kvm/kvm_main.c       | 13 +++++++++----
 6 files changed, 30 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 42ad1f9c9f01..92d9f1e4740f 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1818,7 +1818,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots->memslots[log->slot];
+	memslot = id_to_memslot(kvm->memslots, log->slot);
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a459479995c6..e41ac6f7dcf1 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -498,7 +498,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
-		memslot = &kvm->memslots->memslots[log->slot];
+		memslot = id_to_memslot(kvm->memslots, log->slot);
 
 		ga = memslot->base_gfn << PAGE_SHIFT;
 		ga_end = ga + (memslot->npages << PAGE_SHIFT);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ba24022f4575..8f19d91ec3e7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2711,11 +2711,13 @@ static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
 		struct kvm_memslots *slots;
+		struct kvm_memory_slot *slot;
 		gfn_t base_gfn;
 
 		slots = kvm_memslots(kvm);
-		base_gfn = slots->memslots[0].base_gfn +
-				 kvm->memslots->memslots[0].npages - 3;
+		slot = id_to_memslot(slots, 0);
+		base_gfn = slot->base_gfn + slot->npages - 3;
+
 		return base_gfn << PAGE_SHIFT;
 	}
 	return kvm->arch.tss_addr;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 917a287d21c8..b6776c613e6d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3520,7 +3520,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots->memslots[log->slot];
+	memslot = id_to_memslot(kvm->memslots, log->slot);
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -3531,27 +3531,27 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (nr_dirty_pages) {
 		struct kvm_memslots *slots, *old_slots;
-		unsigned long *dirty_bitmap;
+		unsigned long *dirty_bitmap, *dirty_bitmap_head;
 
-		dirty_bitmap = memslot->dirty_bitmap_head;
-		if (memslot->dirty_bitmap == dirty_bitmap)
-			dirty_bitmap += n / sizeof(long);
-		memset(dirty_bitmap, 0, n);
+		dirty_bitmap = memslot->dirty_bitmap;
+		dirty_bitmap_head = memslot->dirty_bitmap_head;
+		if (dirty_bitmap == dirty_bitmap_head)
+			dirty_bitmap_head += n / sizeof(long);
+		memset(dirty_bitmap_head, 0, n);
 
 		r = -ENOMEM;
 		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 		if (!slots)
 			goto out;
 		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
-		memslot = &slots->memslots[log->slot];
-		memslot->dirty_bitmap = dirty_bitmap;
+		memslot = id_to_memslot(slots, log->slot);
 		memslot->nr_dirty_pages = 0;
+		memslot->dirty_bitmap = dirty_bitmap_head;
 		update_memslots(slots, NULL);
 
 		old_slots = kvm->memslots;
 		rcu_assign_pointer(kvm->memslots, slots);
 		synchronize_srcu_expedited(&kvm->srcu);
-		dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
 		kfree(old_slots);
 
 		write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 392af47a4353..123925cd3ae8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -333,6 +333,12 @@ static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
 			|| lockdep_is_held(&kvm->slots_lock));
 }
 
+static inline struct kvm_memory_slot *
+id_to_memslot(struct kvm_memslots *slots, int id)
+{
+	return &slots->memslots[id];
+}
+
 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
 static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4c2900c5d81d..7b6084999424 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -634,8 +634,9 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
 {
 	if (new) {
 		int id = new->id;
+		struct kvm_memory_slot *old = id_to_memslot(slots, id);
 
-		slots->memslots[id] = *new;
+		*old = *new;
 		if (id >= slots->nmemslots)
 			slots->nmemslots = id + 1;
 	}
@@ -681,7 +682,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
 		goto out;
 
-	memslot = &kvm->memslots->memslots[mem->slot];
+	memslot = id_to_memslot(kvm->memslots, mem->slot);
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
@@ -788,12 +789,16 @@ skip_lpage:
 #endif /* not defined CONFIG_S390 */
 
 	if (!npages) {
+		struct kvm_memory_slot *slot;
+
 		r = -ENOMEM;
 		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
 				GFP_KERNEL);
 		if (!slots)
 			goto out_free;
-		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+		slot = id_to_memslot(slots, mem->slot);
+		slot->flags |= KVM_MEMSLOT_INVALID;
+
 		update_memslots(slots, NULL);
 
 		old_memslots = kvm->memslots;
@@ -897,7 +902,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots->memslots[log->slot];
+	memslot = id_to_memslot(kvm->memslots, log->slot);
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
-- 
cgit v1.2.3


From bf3e05bc1e2781d5d8d3ddb2d8bf2d6ec207e5cb Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 24 Nov 2011 17:40:57 +0800
Subject: KVM: sort memslots by its size and use line search

Sort memslots base on its size and use line search to find it, so that the
larger memslots have better fit

The idea is from Avi

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 18 +++++++++--
 virt/kvm/kvm_main.c      | 79 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 72 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 123925cd3ae8..9efdf5c703a5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -231,8 +231,12 @@ struct kvm_irq_routing_table {};
 #define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 #endif
 
+/*
+ * Note:
+ * memslots are not sorted by id anymore, please use id_to_memslot()
+ * to get the memslot by its id.
+ */
 struct kvm_memslots {
-	int nmemslots;
 	u64 generation;
 	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
 };
@@ -310,7 +314,8 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 
 #define kvm_for_each_memslot(memslot, slots)	\
 	for (memslot = &slots->memslots[0];	\
-	      memslot < slots->memslots + (slots)->nmemslots; memslot++)
+	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
+		memslot++)
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
@@ -336,7 +341,14 @@ static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
 static inline struct kvm_memory_slot *
 id_to_memslot(struct kvm_memslots *slots, int id)
 {
-	return &slots->memslots[id];
+	int i;
+
+	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+		if (slots->memslots[i].id == id)
+			return &slots->memslots[i];
+
+	WARN_ON(1);
+	return NULL;
 }
 
 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7b6084999424..6e8eb15dd30b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -440,6 +440,15 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
 
+static void kvm_init_memslots_id(struct kvm *kvm)
+{
+	int i;
+	struct kvm_memslots *slots = kvm->memslots;
+
+	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+		slots->memslots[i].id = i;
+}
+
 static struct kvm *kvm_create_vm(void)
 {
 	int r, i;
@@ -465,6 +474,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
 		goto out_err_nosrcu;
+	kvm_init_memslots_id(kvm);
 	if (init_srcu_struct(&kvm->srcu))
 		goto out_err_nosrcu;
 	for (i = 0; i < KVM_NR_BUSES; i++) {
@@ -630,15 +640,54 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 }
 #endif /* !CONFIG_S390 */
 
+static struct kvm_memory_slot *
+search_memslots(struct kvm_memslots *slots, gfn_t gfn)
+{
+	struct kvm_memory_slot *memslot;
+
+	kvm_for_each_memslot(memslot, slots)
+		if (gfn >= memslot->base_gfn &&
+		      gfn < memslot->base_gfn + memslot->npages)
+			return memslot;
+
+	return NULL;
+}
+
+static int cmp_memslot(const void *slot1, const void *slot2)
+{
+	struct kvm_memory_slot *s1, *s2;
+
+	s1 = (struct kvm_memory_slot *)slot1;
+	s2 = (struct kvm_memory_slot *)slot2;
+
+	if (s1->npages < s2->npages)
+		return 1;
+	if (s1->npages > s2->npages)
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Sort the memslots base on its size, so the larger slots
+ * will get better fit.
+ */
+static void sort_memslots(struct kvm_memslots *slots)
+{
+	sort(slots->memslots, KVM_MEM_SLOTS_NUM,
+	      sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
+}
+
 void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
 {
 	if (new) {
 		int id = new->id;
 		struct kvm_memory_slot *old = id_to_memslot(slots, id);
+		unsigned long npages = old->npages;
 
 		*old = *new;
-		if (id >= slots->nmemslots)
-			slots->nmemslots = id + 1;
+		if (new->npages != npages)
+			sort_memslots(slots);
 	}
 
 	slots->generation++;
@@ -980,14 +1029,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
 						gfn_t gfn)
 {
-	struct kvm_memory_slot *memslot;
-
-	kvm_for_each_memslot(memslot, slots)
-		if (gfn >= memslot->base_gfn
-		    && gfn < memslot->base_gfn + memslot->npages)
-			return memslot;
-
-	return NULL;
+	return search_memslots(slots, gfn);
 }
 
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
@@ -998,20 +1040,13 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot);
 
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
-	int i;
-	struct kvm_memslots *slots = kvm_memslots(kvm);
-
-	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *memslot = &slots->memslots[i];
+	struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
 
-		if (memslot->flags & KVM_MEMSLOT_INVALID)
-			continue;
+	if (!memslot || memslot->id >= KVM_MEMORY_SLOTS ||
+	      memslot->flags & KVM_MEMSLOT_INVALID)
+		return 0;
 
-		if (gfn >= memslot->base_gfn
-		    && gfn < memslot->base_gfn + memslot->npages)
-			return 1;
-	}
-	return 0;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
-- 
cgit v1.2.3


From f85e2cb5dbaf905e9470d3fe099b31619da431fc Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 24 Nov 2011 17:41:54 +0800
Subject: KVM: introduce a table to map slot id to index in memslots array

The operation of getting dirty log is frequent when framebuffer-based
displays are used(for example, Xwindow), so, we introduce a mapping table
to speed up id_to_memslot()

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 13 +++++++------
 virt/kvm/kvm_main.c      |  7 ++++++-
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9efdf5c703a5..8c5c30361b0d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -239,6 +239,8 @@ struct kvm_irq_routing_table {};
 struct kvm_memslots {
 	u64 generation;
 	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
+	/* The mapping table from slot id to the index in memslots[]. */
+	int id_to_index[KVM_MEM_SLOTS_NUM];
 };
 
 struct kvm {
@@ -341,14 +343,13 @@ static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
 static inline struct kvm_memory_slot *
 id_to_memslot(struct kvm_memslots *slots, int id)
 {
-	int i;
+	int index = slots->id_to_index[id];
+	struct kvm_memory_slot *slot;
 
-	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-		if (slots->memslots[i].id == id)
-			return &slots->memslots[i];
+	slot = &slots->memslots[index];
 
-	WARN_ON(1);
-	return NULL;
+	WARN_ON(slot->id != id);
+	return slot;
 }
 
 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6e8eb15dd30b..e289486edc6d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -446,7 +446,7 @@ static void kvm_init_memslots_id(struct kvm *kvm)
 	struct kvm_memslots *slots = kvm->memslots;
 
 	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-		slots->memslots[i].id = i;
+		slots->id_to_index[i] = slots->memslots[i].id = i;
 }
 
 static struct kvm *kvm_create_vm(void)
@@ -674,8 +674,13 @@ static int cmp_memslot(const void *slot1, const void *slot2)
  */
 static void sort_memslots(struct kvm_memslots *slots)
 {
+	int i;
+
 	sort(slots->memslots, KVM_MEM_SLOTS_NUM,
 	      sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
+
+	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+		slots->id_to_index[slots->memslots[i].id] = i;
 }
 
 void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
-- 
cgit v1.2.3


From d546cb406ea0d83e2d39ec14221957a24f88a622 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 15 Dec 2011 12:38:40 +0200
Subject: KVM: drop bsp_vcpu pointer from kvm struct

Drop bsp_vcpu pointer from kvm struct since its only use is incorrect
anyway.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/i8259.c     | 24 ++++++++++++++++--------
 include/linux/kvm_host.h |  1 -
 virt/kvm/ioapic.c        |  2 +-
 virt/kvm/kvm_main.c      |  4 ----
 4 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index cac4746d7ffb..b6a73537e1ef 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -262,9 +262,10 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-	int irq;
-	struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu;
+	int irq, i;
+	struct kvm_vcpu *vcpu;
 	u8 irr = s->irr, isr = s->imr;
+	bool found = false;
 
 	s->last_irr = 0;
 	s->irr = 0;
@@ -281,12 +282,19 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 	s->special_fully_nested_mode = 0;
 	s->init4 = 0;
 
-	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
-		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
-			if (irr & (1 << irq) || isr & (1 << irq)) {
-				pic_clear_isr(s, irq);
-			}
-	}
+	kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
+		if (kvm_apic_accept_pic_intr(vcpu)) {
+			found = true;
+			break;
+		}
+
+
+	if (!found)
+		return;
+
+	for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
+		if (irr & (1 << irq) || isr & (1 << irq))
+			pic_clear_isr(s, irq);
 }
 
 static void pic_ioport_write(void *opaque, u32 addr, u32 val)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8c5c30361b0d..7a080383935f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -251,7 +251,6 @@ struct kvm {
 	struct srcu_struct srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
-	struct kvm_vcpu *bsp_vcpu;
 #endif
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	atomic_t online_vcpus;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 71e2253edee7..dcaf272c26c0 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -185,7 +185,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 		irqe.dest_mode = 0; /* Physical mode. */
 		/* need to read apic_id from apic regiest since
 		 * it can be rewritten */
-		irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id;
+		irqe.dest_id = ioapic->kvm->bsp_vcpu_id;
 	}
 #endif
 	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d8bac0751666..0835c4b90d2f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1743,10 +1743,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	smp_wmb();
 	atomic_inc(&kvm->online_vcpus);
 
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
-	if (kvm->bsp_vcpu_id == id)
-		kvm->bsp_vcpu = vcpu;
-#endif
 	mutex_unlock(&kvm->lock);
 	return r;
 
-- 
cgit v1.2.3


From f5132b01386b5a67f1ff673bb2b96a507a3f7e41 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 10 Nov 2011 14:57:22 +0200
Subject: KVM: Expose a version 2 architectural PMU to a guests

Use perf_events to emulate an architectural PMU, version 2.

Based on PMU version 1 emulation by Avi Kivity.

[avi: adjust for cpuid.c]
[jan: fix anonymous field initialization for older gcc]

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  48 ++++
 arch/x86/kvm/Kconfig            |   1 +
 arch/x86/kvm/Makefile           |   2 +-
 arch/x86/kvm/cpuid.c            |   2 +
 arch/x86/kvm/pmu.c              | 533 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |  22 +-
 include/linux/kvm_host.h        |   2 +
 7 files changed, 600 insertions(+), 10 deletions(-)
 create mode 100644 arch/x86/kvm/pmu.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 020413afb285..fb60ffdb4e43 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -16,10 +16,12 @@
 #include <linux/mmu_notifier.h>
 #include <linux/tracepoint.h>
 #include <linux/cpumask.h>
+#include <linux/irq_work.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 #include <linux/kvm_types.h>
+#include <linux/perf_event.h>
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
@@ -291,6 +293,37 @@ struct kvm_mmu {
 	u64 pdptrs[4]; /* pae */
 };
 
+enum pmc_type {
+	KVM_PMC_GP = 0,
+	KVM_PMC_FIXED,
+};
+
+struct kvm_pmc {
+	enum pmc_type type;
+	u8 idx;
+	u64 counter;
+	u64 eventsel;
+	struct perf_event *perf_event;
+	struct kvm_vcpu *vcpu;
+};
+
+struct kvm_pmu {
+	unsigned nr_arch_gp_counters;
+	unsigned nr_arch_fixed_counters;
+	unsigned available_event_types;
+	u64 fixed_ctr_ctrl;
+	u64 global_ctrl;
+	u64 global_status;
+	u64 global_ovf_ctrl;
+	u64 counter_bitmask[2];
+	u64 global_ctrl_mask;
+	u8 version;
+	struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC];
+	struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED];
+	struct irq_work irq_work;
+	u64 reprogram_pmi;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -424,6 +457,8 @@ struct kvm_vcpu_arch {
 	unsigned access;
 	gfn_t mmio_gfn;
 
+	struct kvm_pmu pmu;
+
 	/* used for guest single stepping over the given code position */
 	unsigned long singlestep_rip;
 
@@ -891,4 +926,17 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
 
+int kvm_is_in_guest(void);
+
+void kvm_pmu_init(struct kvm_vcpu *vcpu);
+void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
+void kvm_pmu_reset(struct kvm_vcpu *vcpu);
+void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
+bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
+int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
+void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
+void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ca4d49ed9a6c..1a7fe868f375 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -35,6 +35,7 @@ config KVM
 	select KVM_MMIO
 	select TASKSTATS
 	select TASK_DELAY_ACCT
+	select PERF_EVENTS
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 161b76ae87c4..4f579e8dcacf 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o timer.o cpuid.o
+			   i8254.o timer.o cpuid.o pmu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ca63032bf03d..e70be46f50fc 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -45,6 +45,8 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
 		else
 			apic->lapic_timer.timer_mode_mask = 1 << 17;
 	}
+
+	kvm_pmu_cpuid_update(vcpu);
 }
 
 static int is_efer_nx(void)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
new file mode 100644
index 000000000000..7aad5446f393
--- /dev/null
+++ b/arch/x86/kvm/pmu.c
@@ -0,0 +1,533 @@
+/*
+ * Kernel-based Virtual Machine -- Performane Monitoring Unit support
+ *
+ * Copyright 2011 Red Hat, Inc. and/or its affiliates.
+ *
+ * Authors:
+ *   Avi Kivity   <avi@redhat.com>
+ *   Gleb Natapov <gleb@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <linux/perf_event.h>
+#include "x86.h"
+#include "cpuid.h"
+#include "lapic.h"
+
+static struct kvm_arch_event_perf_mapping {
+	u8 eventsel;
+	u8 unit_mask;
+	unsigned event_type;
+	bool inexact;
+} arch_events[] = {
+	/* Index must match CPUID 0x0A.EBX bit vector */
+	[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
+	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
+	[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES  },
+	[3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
+	[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
+	[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+	[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+};
+
+/* mapping between fixed pmc index and arch_events array */
+int fixed_pmc_events[] = {1, 0, 2};
+
+static bool pmc_is_gp(struct kvm_pmc *pmc)
+{
+	return pmc->type == KVM_PMC_GP;
+}
+
+static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+
+	return pmu->counter_bitmask[pmc->type];
+}
+
+static inline bool pmc_enabled(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+}
+
+static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
+					 u32 base)
+{
+	if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
+		return &pmu->gp_counters[msr - base];
+	return NULL;
+}
+
+static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
+{
+	int base = MSR_CORE_PERF_FIXED_CTR0;
+	if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
+		return &pmu->fixed_counters[msr - base];
+	return NULL;
+}
+
+static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx)
+{
+	return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx);
+}
+
+static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
+{
+	if (idx < X86_PMC_IDX_FIXED)
+		return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0);
+	else
+		return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED);
+}
+
+void kvm_deliver_pmi(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.apic)
+		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
+}
+
+static void trigger_pmi(struct irq_work *irq_work)
+{
+	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu,
+			irq_work);
+	struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu,
+			arch.pmu);
+
+	kvm_deliver_pmi(vcpu);
+}
+
+static void kvm_perf_overflow(struct perf_event *perf_event,
+			      struct perf_sample_data *data,
+			      struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+}
+
+static void kvm_perf_overflow_intr(struct perf_event *perf_event,
+		struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
+		kvm_perf_overflow(perf_event, data, regs);
+		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+		/*
+		 * Inject PMI. If vcpu was in a guest mode during NMI PMI
+		 * can be ejected on a guest mode re-entry. Otherwise we can't
+		 * be sure that vcpu wasn't executing hlt instruction at the
+		 * time of vmexit and is not going to re-enter guest mode until,
+		 * woken up. So we should wake it, but this is impossible from
+		 * NMI context. Do it from irq work instead.
+		 */
+		if (!kvm_is_in_guest())
+			irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
+		else
+			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
+	}
+}
+
+static u64 read_pmc(struct kvm_pmc *pmc)
+{
+	u64 counter, enabled, running;
+
+	counter = pmc->counter;
+
+	if (pmc->perf_event)
+		counter += perf_event_read_value(pmc->perf_event,
+						 &enabled, &running);
+
+	/* FIXME: Scaling needed? */
+
+	return counter & pmc_bitmask(pmc);
+}
+
+static void stop_counter(struct kvm_pmc *pmc)
+{
+	if (pmc->perf_event) {
+		pmc->counter = read_pmc(pmc);
+		perf_event_release_kernel(pmc->perf_event);
+		pmc->perf_event = NULL;
+	}
+}
+
+static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
+		unsigned config, bool exclude_user, bool exclude_kernel,
+		bool intr)
+{
+	struct perf_event *event;
+	struct perf_event_attr attr = {
+		.type = type,
+		.size = sizeof(attr),
+		.pinned = true,
+		.exclude_idle = true,
+		.exclude_host = 1,
+		.exclude_user = exclude_user,
+		.exclude_kernel = exclude_kernel,
+		.config = config,
+	};
+
+	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
+
+	event = perf_event_create_kernel_counter(&attr, -1, current,
+						 intr ? kvm_perf_overflow_intr :
+						 kvm_perf_overflow, pmc);
+	if (IS_ERR(event)) {
+		printk_once("kvm: pmu event creation failed %ld\n",
+				PTR_ERR(event));
+		return;
+	}
+
+	pmc->perf_event = event;
+	clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi);
+}
+
+static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select,
+		u8 unit_mask)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arch_events); i++)
+		if (arch_events[i].eventsel == event_select
+				&& arch_events[i].unit_mask == unit_mask
+				&& (pmu->available_event_types & (1 << i)))
+			break;
+
+	if (i == ARRAY_SIZE(arch_events))
+		return PERF_COUNT_HW_MAX;
+
+	return arch_events[i].event_type;
+}
+
+static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+{
+	unsigned config, type = PERF_TYPE_RAW;
+	u8 event_select, unit_mask;
+
+	pmc->eventsel = eventsel;
+
+	stop_counter(pmc);
+
+	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc))
+		return;
+
+	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+
+	if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE |
+				ARCH_PERFMON_EVENTSEL_INV |
+				ARCH_PERFMON_EVENTSEL_CMASK))) {
+		config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
+				unit_mask);
+		if (config != PERF_COUNT_HW_MAX)
+			type = PERF_TYPE_HARDWARE;
+	}
+
+	if (type == PERF_TYPE_RAW)
+		config = eventsel & X86_RAW_EVENT_MASK;
+
+	reprogram_counter(pmc, type, config,
+			!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+			!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+			eventsel & ARCH_PERFMON_EVENTSEL_INT);
+}
+
+static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
+{
+	unsigned en = en_pmi & 0x3;
+	bool pmi = en_pmi & 0x8;
+
+	stop_counter(pmc);
+
+	if (!en || !pmc_enabled(pmc))
+		return;
+
+	reprogram_counter(pmc, PERF_TYPE_HARDWARE,
+			arch_events[fixed_pmc_events[idx]].event_type,
+			!(en & 0x2), /* exclude user */
+			!(en & 0x1), /* exclude kernel */
+			pmi);
+}
+
+static inline u8 fixed_en_pmi(u64 ctrl, int idx)
+{
+	return (ctrl >> (idx * 4)) & 0xf;
+}
+
+static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
+{
+	int i;
+
+	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
+		u8 en_pmi = fixed_en_pmi(data, i);
+		struct kvm_pmc *pmc = get_fixed_pmc_idx(pmu, i);
+
+		if (fixed_en_pmi(pmu->fixed_ctr_ctrl, i) == en_pmi)
+			continue;
+
+		reprogram_fixed_counter(pmc, en_pmi, i);
+	}
+
+	pmu->fixed_ctr_ctrl = data;
+}
+
+static void reprogram_idx(struct kvm_pmu *pmu, int idx)
+{
+	struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx);
+
+	if (!pmc)
+		return;
+
+	if (pmc_is_gp(pmc))
+		reprogram_gp_counter(pmc, pmc->eventsel);
+	else {
+		int fidx = idx - X86_PMC_IDX_FIXED;
+		reprogram_fixed_counter(pmc,
+				fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx);
+	}
+}
+
+static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
+{
+	int bit;
+	u64 diff = pmu->global_ctrl ^ data;
+
+	pmu->global_ctrl = data;
+
+	for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
+		reprogram_idx(pmu, bit);
+}
+
+bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int ret;
+
+	switch (msr) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		ret = pmu->version > 1;
+		break;
+	default:
+		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)
+			|| get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0)
+			|| get_fixed_pmc(pmu, msr);
+		break;
+	}
+	return ret;
+}
+
+int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+
+	switch (index) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+		*data = pmu->fixed_ctr_ctrl;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+		*data = pmu->global_status;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		*data = pmu->global_ctrl;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		*data = pmu->global_ovf_ctrl;
+		return 0;
+	default:
+		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
+				(pmc = get_fixed_pmc(pmu, index))) {
+			*data = read_pmc(pmc);
+			return 0;
+		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
+			*data = pmc->eventsel;
+			return 0;
+		}
+	}
+	return 1;
+}
+
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+
+	switch (index) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+		if (pmu->fixed_ctr_ctrl == data)
+			return 0;
+		if (!(data & 0xfffffffffffff444)) {
+			reprogram_fixed_counters(pmu, data);
+			return 0;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+		break; /* RO MSR */
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		if (pmu->global_ctrl == data)
+			return 0;
+		if (!(data & pmu->global_ctrl_mask)) {
+			global_ctrl_changed(pmu, data);
+			return 0;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
+			pmu->global_status &= ~data;
+			pmu->global_ovf_ctrl = data;
+			return 0;
+		}
+		break;
+	default:
+		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
+				(pmc = get_fixed_pmc(pmu, index))) {
+			data = (s64)(s32)data;
+			pmc->counter += data - read_pmc(pmc);
+			return 0;
+		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
+			if (data == pmc->eventsel)
+				return 0;
+			if (!(data & 0xffffffff00200000ull)) {
+				reprogram_gp_counter(pmc, data);
+				return 0;
+			}
+		}
+	}
+	return 1;
+}
+
+int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	bool fast_mode = pmc & (1u << 31);
+	bool fixed = pmc & (1u << 30);
+	struct kvm_pmc *counters;
+	u64 ctr;
+
+	pmc &= (3u << 30) - 1;
+	if (!fixed && pmc >= pmu->nr_arch_gp_counters)
+		return 1;
+	if (fixed && pmc >= pmu->nr_arch_fixed_counters)
+		return 1;
+	counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
+	ctr = read_pmc(&counters[pmc]);
+	if (fast_mode)
+		ctr = (u32)ctr;
+	*data = ctr;
+
+	return 0;
+}
+
+void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_cpuid_entry2 *entry;
+	unsigned bitmap_len;
+
+	pmu->nr_arch_gp_counters = 0;
+	pmu->nr_arch_fixed_counters = 0;
+	pmu->counter_bitmask[KVM_PMC_GP] = 0;
+	pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+	pmu->version = 0;
+
+	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+	if (!entry)
+		return;
+
+	pmu->version = entry->eax & 0xff;
+	if (!pmu->version)
+		return;
+
+	pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
+			X86_PMC_MAX_GENERIC);
+	pmu->counter_bitmask[KVM_PMC_GP] =
+		((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
+	bitmap_len = (entry->eax >> 24) & 0xff;
+	pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
+
+	if (pmu->version == 1) {
+		pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1;
+		return;
+	}
+
+	pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
+			X86_PMC_MAX_FIXED);
+	pmu->counter_bitmask[KVM_PMC_FIXED] =
+		((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
+	pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1)
+			| (((1ull << pmu->nr_arch_fixed_counters) - 1)
+				<< X86_PMC_IDX_FIXED));
+}
+
+void kvm_pmu_init(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+	memset(pmu, 0, sizeof(*pmu));
+	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) {
+		pmu->gp_counters[i].type = KVM_PMC_GP;
+		pmu->gp_counters[i].vcpu = vcpu;
+		pmu->gp_counters[i].idx = i;
+	}
+	for (i = 0; i < X86_PMC_MAX_FIXED; i++) {
+		pmu->fixed_counters[i].type = KVM_PMC_FIXED;
+		pmu->fixed_counters[i].vcpu = vcpu;
+		pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED;
+	}
+	init_irq_work(&pmu->irq_work, trigger_pmi);
+	kvm_pmu_cpuid_update(vcpu);
+}
+
+void kvm_pmu_reset(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int i;
+
+	irq_work_sync(&pmu->irq_work);
+	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) {
+		struct kvm_pmc *pmc = &pmu->gp_counters[i];
+		stop_counter(pmc);
+		pmc->counter = pmc->eventsel = 0;
+	}
+
+	for (i = 0; i < X86_PMC_MAX_FIXED; i++)
+		stop_counter(&pmu->fixed_counters[i]);
+
+	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
+		pmu->global_ovf_ctrl = 0;
+}
+
+void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_pmu_reset(vcpu);
+}
+
+void kvm_handle_pmu_event(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	u64 bitmask;
+	int bit;
+
+	bitmask = pmu->reprogram_pmi;
+
+	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+		struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit);
+
+		if (unlikely(!pmc || !pmc->perf_event)) {
+			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+			continue;
+		}
+
+		reprogram_idx(pmu, bit);
+	}
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a646e2b57c5..08ae951ecc5c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1602,8 +1602,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	 * which we perfectly emulate ;-). Any other value should be at least
 	 * reported, some guests depend on them.
 	 */
-	case MSR_P6_EVNTSEL0:
-	case MSR_P6_EVNTSEL1:
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_EVNTSEL1:
 	case MSR_K7_EVNTSEL2:
@@ -1615,8 +1613,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	/* at least RHEL 4 unconditionally writes to the perfctr registers,
 	 * so we ignore writes to make it happy.
 	 */
-	case MSR_P6_PERFCTR0:
-	case MSR_P6_PERFCTR1:
 	case MSR_K7_PERFCTR0:
 	case MSR_K7_PERFCTR1:
 	case MSR_K7_PERFCTR2:
@@ -1653,6 +1649,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	default:
 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
 			return xen_hvm_config(vcpu, data);
+		if (kvm_pmu_msr(vcpu, msr))
+			return kvm_pmu_set_msr(vcpu, msr, data);
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
 				msr, data);
@@ -1815,10 +1813,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_K8_SYSCFG:
 	case MSR_K7_HWCR:
 	case MSR_VM_HSAVE_PA:
-	case MSR_P6_PERFCTR0:
-	case MSR_P6_PERFCTR1:
-	case MSR_P6_EVNTSEL0:
-	case MSR_P6_EVNTSEL1:
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_PERFCTR0:
 	case MSR_K8_INT_PENDING_MSG:
@@ -1929,6 +1923,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		data = 0xbe702111;
 		break;
 	default:
+		if (kvm_pmu_msr(vcpu, msr))
+			return kvm_pmu_get_msr(vcpu, msr, pdata);
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
 			return 1;
@@ -4650,7 +4646,7 @@ static void kvm_timer_init(void)
 
 static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
 
-static int kvm_is_in_guest(void)
+int kvm_is_in_guest(void)
 {
 	return __this_cpu_read(current_vcpu) != NULL;
 }
@@ -5114,6 +5110,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			process_nmi(vcpu);
 		req_immediate_exit =
 			kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
+		if (kvm_check_request(KVM_REQ_PMU, vcpu))
+			kvm_handle_pmu_event(vcpu);
+		if (kvm_check_request(KVM_REQ_PMI, vcpu))
+			kvm_deliver_pmi(vcpu);
 	}
 
 	r = kvm_mmu_reload(vcpu);
@@ -5850,6 +5850,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	kvm_async_pf_hash_reset(vcpu);
 	vcpu->arch.apf.halted = false;
 
+	kvm_pmu_reset(vcpu);
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
@@ -5934,6 +5936,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		goto fail_free_mce_banks;
 
 	kvm_async_pf_hash_reset(vcpu);
+	kvm_pmu_init(vcpu);
 
 	return 0;
 fail_free_mce_banks:
@@ -5952,6 +5955,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	int idx;
 
+	kvm_pmu_destroy(vcpu);
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7a080383935f..900c76337e8f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -52,6 +52,8 @@
 #define KVM_REQ_STEAL_UPDATE      13
 #define KVM_REQ_NMI               14
 #define KVM_REQ_IMMEDIATE_EXIT    15
+#define KVM_REQ_PMU               16
+#define KVM_REQ_PMI               17
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
cgit v1.2.3


From 98362dec835218206fdb55212667040d23d3a440 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 19 Sep 2011 13:25:22 +0200
Subject: KVM: Fix whitespace in kvm_para.h

When syncing KVM headers with QEMU I (or whoever applies the
diff) end up automatically fixing whitespaces. One of them
is in kvm_para.h.

It's a lot more consistent for people who don't do the whitespace
fixups automatically to already have fixed headers in Linux. So
remove the sparse empty line at the end of kvm_para.h and everyone's
happy.

Reported-by: Blue Swirl <blauwirbel@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 include/linux/kvm_para.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index 47a070b0520e..ff476ddaf310 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -35,4 +35,3 @@ static inline int kvm_para_has_feature(unsigned int feature)
 }
 #endif /* __KERNEL__ */
 #endif /* __LINUX_KVM_PARA_H */
-
-- 
cgit v1.2.3


From 7b482c8360d368fd495685a2c69ca4f1e7b29764 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 20 Dec 2011 22:56:45 +0100
Subject: ARM/of: allow *machine_desc.dt_compat to be const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows dt_compat to point to a constant list of compatible strings.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 arch/arm/include/asm/mach/arch.h | 2 +-
 drivers/of/fdt.c                 | 4 ++--
 include/linux/of_fdt.h           | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 2b0efc3104ac..02a718adb598 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -19,7 +19,7 @@ struct machine_desc {
 	unsigned int		nr;		/* architecture number	*/
 	const char		*name;		/* architecture name	*/
 	unsigned long		atag_offset;	/* tagged list (relative) */
-	const char		**dt_compat;	/* array of device tree
+	const char *const 	*dt_compat;	/* array of device tree
 						 * 'compatible' strings	*/
 
 	unsigned int		nr_irqs;	/* number of IRQs */
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index fd85fa298e0f..7dc8e6da858d 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -107,7 +107,7 @@ int of_fdt_is_compatible(struct boot_param_header *blob,
  * of_fdt_match - Return true if node matches a list of compatible values
  */
 int of_fdt_match(struct boot_param_header *blob, unsigned long node,
-                 const char **compat)
+                 const char *const *compat)
 {
 	unsigned int tmp, score = 0;
 
@@ -541,7 +541,7 @@ int __init of_flat_dt_is_compatible(unsigned long node, const char *compat)
 /**
  * of_flat_dt_match - Return true if node matches a list of compatible values
  */
-int __init of_flat_dt_match(unsigned long node, const char **compat)
+int __init of_flat_dt_match(unsigned long node, const char *const *compat)
 {
 	return of_fdt_match(initial_boot_params, node, compat);
 }
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index c84d900fbbb3..ed136ad698ce 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -71,7 +71,7 @@ extern int of_fdt_is_compatible(struct boot_param_header *blob,
 				unsigned long node,
 				const char *compat);
 extern int of_fdt_match(struct boot_param_header *blob, unsigned long node,
-			const char **compat);
+			const char *const *compat);
 extern void of_fdt_unflatten_tree(unsigned long *blob,
 			       struct device_node **mynodes);
 
@@ -88,7 +88,7 @@ extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
 extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
 				 unsigned long *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
-extern int of_flat_dt_match(unsigned long node, const char **matches);
+extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
-- 
cgit v1.2.3


From 8af0da93da7c40526959ab5291964581c678d3e7 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <dong.aisheng@linaro.org>
Date: Thu, 22 Dec 2011 20:19:24 +0800
Subject: dt: reform for_each_property to for_each_property_of_node

Make this macro easier to use(do not need to pass properties, a node is
enough), also change to a more sensible name as for_each_child_of_node.

Signed-off-by: Dong Aisheng <dong.aisheng@linaro.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 drivers/of/base.c  | 2 +-
 include/linux/of.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index b7072437eb8c..0181eeb88c92 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1157,7 +1157,7 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
 	if (!of_aliases)
 		return;
 
-	for_each_property(pp, of_aliases->properties) {
+	for_each_property_of_node(of_aliases, pp) {
 		const char *start = pp->name;
 		const char *end = start + strlen(start);
 		struct device_node *np;
diff --git a/include/linux/of.h b/include/linux/of.h
index 4948552d60f5..f1a490c37e06 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -219,8 +219,8 @@ extern int of_device_is_available(const struct device_node *device);
 extern const void *of_get_property(const struct device_node *node,
 				const char *name,
 				int *lenp);
-#define for_each_property(pp, properties) \
-	for (pp = properties; pp != NULL; pp = pp->next)
+#define for_each_property_of_node(dn, pp) \
+	for (pp = dn->properties; pp != NULL; pp = pp->next)
 
 extern int of_n_addr_cells(struct device_node *np);
 extern int of_n_size_cells(struct device_node *np);
-- 
cgit v1.2.3


From d446a8202c81d95f91b1682fc67e7fadd9a31389 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 9 Jun 2011 21:03:07 +0200
Subject: netfilter: xtables: move ipt_ecn to xt_ecn

Prepare the ECN match for augmentation by an IPv6 counterpart. Since
no symbol dependencies to ipv6.ko are added, having a single ecn match
module is the more so welcome.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/Kbuild         |   1 +
 include/linux/netfilter/xt_ecn.h       |  35 +++++++++
 include/linux/netfilter_ipv4/ipt_ecn.h |  31 +-------
 net/ipv4/netfilter/Kconfig             |  10 +--
 net/ipv4/netfilter/Makefile            |   1 -
 net/ipv4/netfilter/ipt_ecn.c           | 127 --------------------------------
 net/netfilter/Kconfig                  |   9 +++
 net/netfilter/Makefile                 |   1 +
 net/netfilter/xt_ecn.c                 | 128 +++++++++++++++++++++++++++++++++
 9 files changed, 180 insertions(+), 163 deletions(-)
 create mode 100644 include/linux/netfilter/xt_ecn.h
 delete mode 100644 net/ipv4/netfilter/ipt_ecn.c
 create mode 100644 net/netfilter/xt_ecn.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index e630a2ed4f18..e144f54185c0 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -43,6 +43,7 @@ header-y += xt_cpu.h
 header-y += xt_dccp.h
 header-y += xt_devgroup.h
 header-y += xt_dscp.h
+header-y += xt_ecn.h
 header-y += xt_esp.h
 header-y += xt_hashlimit.h
 header-y += xt_helper.h
diff --git a/include/linux/netfilter/xt_ecn.h b/include/linux/netfilter/xt_ecn.h
new file mode 100644
index 000000000000..065c1a537e5d
--- /dev/null
+++ b/include/linux/netfilter/xt_ecn.h
@@ -0,0 +1,35 @@
+/* iptables module for matching the ECN header in IPv4 and TCP header
+ *
+ * (C) 2002 Harald Welte <laforge@gnumonks.org>
+ *
+ * This software is distributed under GNU GPL v2, 1991
+ * 
+ * ipt_ecn.h,v 1.4 2002/08/05 19:39:00 laforge Exp
+*/
+#ifndef _XT_ECN_H
+#define _XT_ECN_H
+
+#include <linux/types.h>
+#include <linux/netfilter/xt_dscp.h>
+
+#define IPT_ECN_IP_MASK	(~XT_DSCP_MASK)
+
+#define IPT_ECN_OP_MATCH_IP	0x01
+#define IPT_ECN_OP_MATCH_ECE	0x10
+#define IPT_ECN_OP_MATCH_CWR	0x20
+
+#define IPT_ECN_OP_MATCH_MASK	0xce
+
+/* match info */
+struct ipt_ecn_info {
+	__u8 operation;
+	__u8 invert;
+	__u8 ip_ect;
+	union {
+		struct {
+			__u8 ect;
+		} tcp;
+	} proto;
+};
+
+#endif /* _XT_ECN_H */
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h
index eabf95fb7d3e..b1124ec76190 100644
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -1,35 +1,6 @@
-/* iptables module for matching the ECN header in IPv4 and TCP header
- *
- * (C) 2002 Harald Welte <laforge@gnumonks.org>
- *
- * This software is distributed under GNU GPL v2, 1991
- * 
- * ipt_ecn.h,v 1.4 2002/08/05 19:39:00 laforge Exp
-*/
 #ifndef _IPT_ECN_H
 #define _IPT_ECN_H
 
-#include <linux/types.h>
-#include <linux/netfilter/xt_dscp.h>
-
-#define IPT_ECN_IP_MASK	(~XT_DSCP_MASK)
-
-#define IPT_ECN_OP_MATCH_IP	0x01
-#define IPT_ECN_OP_MATCH_ECE	0x10
-#define IPT_ECN_OP_MATCH_CWR	0x20
-
-#define IPT_ECN_OP_MATCH_MASK	0xce
-
-/* match info */
-struct ipt_ecn_info {
-	__u8 operation;
-	__u8 invert;
-	__u8 ip_ect;
-	union {
-		struct {
-			__u8 ect;
-		} tcp;
-	} proto;
-};
+#include <linux/netfilter/xt_ecn.h>
 
 #endif /* _IPT_ECN_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 7e1f5cdaf11e..53b9c79c8025 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -76,11 +76,11 @@ config IP_NF_MATCH_AH
 config IP_NF_MATCH_ECN
 	tristate '"ecn" match support'
 	depends on NETFILTER_ADVANCED
-	help
-	  This option adds a `ECN' match, which allows you to match against
-	  the IPv4 and TCP header ECN fields.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	select NETFILTER_XT_MATCH_ECN
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MATCH_ECN.
 
 config IP_NF_MATCH_RPFILTER
 	tristate '"rpfilter" reverse path filter match support'
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 123dd88cea53..213a462b739b 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -49,7 +49,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 
 # matches
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
-obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
 obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
 
 # targets
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
deleted file mode 100644
index 2b57e52c746c..000000000000
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/* IP tables module for matching the value of the IPv4 and TCP ECN bits
- *
- * (C) 2002 by Harald Welte <laforge@gnumonks.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/tcp.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_ecn.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
-MODULE_LICENSE("GPL");
-
-static inline bool match_ip(const struct sk_buff *skb,
-			    const struct ipt_ecn_info *einfo)
-{
-	return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
-	       !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
-}
-
-static inline bool match_tcp(const struct sk_buff *skb,
-			     const struct ipt_ecn_info *einfo,
-			     bool *hotdrop)
-{
-	struct tcphdr _tcph;
-	const struct tcphdr *th;
-
-	/* In practice, TCP match does this, so can't fail.  But let's
-	 * be good citizens.
-	 */
-	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		*hotdrop = false;
-		return false;
-	}
-
-	if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
-		if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
-			if (th->ece == 1)
-				return false;
-		} else {
-			if (th->ece == 0)
-				return false;
-		}
-	}
-
-	if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
-		if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
-			if (th->cwr == 1)
-				return false;
-		} else {
-			if (th->cwr == 0)
-				return false;
-		}
-	}
-
-	return true;
-}
-
-static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
-{
-	const struct ipt_ecn_info *info = par->matchinfo;
-
-	if (info->operation & IPT_ECN_OP_MATCH_IP)
-		if (!match_ip(skb, info))
-			return false;
-
-	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
-		if (!match_tcp(skb, info, &par->hotdrop))
-			return false;
-	}
-
-	return true;
-}
-
-static int ecn_mt_check(const struct xt_mtchk_param *par)
-{
-	const struct ipt_ecn_info *info = par->matchinfo;
-	const struct ipt_ip *ip = par->entryinfo;
-
-	if (info->operation & IPT_ECN_OP_MATCH_MASK)
-		return -EINVAL;
-
-	if (info->invert & IPT_ECN_OP_MATCH_MASK)
-		return -EINVAL;
-
-	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
-	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
-		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static struct xt_match ecn_mt_reg __read_mostly = {
-	.name		= "ecn",
-	.family		= NFPROTO_IPV4,
-	.match		= ecn_mt,
-	.matchsize	= sizeof(struct ipt_ecn_info),
-	.checkentry	= ecn_mt_check,
-	.me		= THIS_MODULE,
-};
-
-static int __init ecn_mt_init(void)
-{
-	return xt_register_match(&ecn_mt_reg);
-}
-
-static void __exit ecn_mt_exit(void)
-{
-	xt_unregister_match(&ecn_mt_reg);
-}
-
-module_init(ecn_mt_init);
-module_exit(ecn_mt_exit);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index bac93ba60778..20388a97df66 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -778,6 +778,15 @@ config NETFILTER_XT_MATCH_DSCP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_ECN
+	tristate '"ecn" match support'
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option adds an "ECN" match, which allows you to match against
+	the IPv4 and TCP header ECN fields.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
 config NETFILTER_XT_MATCH_ESP
 	tristate '"esp" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index b2eee4df8168..40f4c3d636c5 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_ECN) += xt_ecn.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c
new file mode 100644
index 000000000000..2c198f5e3efb
--- /dev/null
+++ b/net/netfilter/xt_ecn.c
@@ -0,0 +1,128 @@
+/* IP tables module for matching the value of the IPv4 and TCP ECN bits
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ecn.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_ecn");
+
+static inline bool match_ip(const struct sk_buff *skb,
+			    const struct ipt_ecn_info *einfo)
+{
+	return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
+	       !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
+}
+
+static inline bool match_tcp(const struct sk_buff *skb,
+			     const struct ipt_ecn_info *einfo,
+			     bool *hotdrop)
+{
+	struct tcphdr _tcph;
+	const struct tcphdr *th;
+
+	/* In practice, TCP match does this, so can't fail.  But let's
+	 * be good citizens.
+	 */
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		*hotdrop = false;
+		return false;
+	}
+
+	if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
+		if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
+			if (th->ece == 1)
+				return false;
+		} else {
+			if (th->ece == 0)
+				return false;
+		}
+	}
+
+	if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
+		if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
+			if (th->cwr == 1)
+				return false;
+		} else {
+			if (th->cwr == 0)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct ipt_ecn_info *info = par->matchinfo;
+
+	if (info->operation & IPT_ECN_OP_MATCH_IP)
+		if (!match_ip(skb, info))
+			return false;
+
+	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
+		if (!match_tcp(skb, info, &par->hotdrop))
+			return false;
+	}
+
+	return true;
+}
+
+static int ecn_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct ipt_ecn_info *info = par->matchinfo;
+	const struct ipt_ip *ip = par->entryinfo;
+
+	if (info->operation & IPT_ECN_OP_MATCH_MASK)
+		return -EINVAL;
+
+	if (info->invert & IPT_ECN_OP_MATCH_MASK)
+		return -EINVAL;
+
+	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
+	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
+		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match ecn_mt_reg __read_mostly = {
+	.name		= "ecn",
+	.family		= NFPROTO_IPV4,
+	.match		= ecn_mt,
+	.matchsize	= sizeof(struct ipt_ecn_info),
+	.checkentry	= ecn_mt_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init ecn_mt_init(void)
+{
+	return xt_register_match(&ecn_mt_reg);
+}
+
+static void __exit ecn_mt_exit(void)
+{
+	xt_unregister_match(&ecn_mt_reg);
+}
+
+module_init(ecn_mt_init);
+module_exit(ecn_mt_exit);
-- 
cgit v1.2.3


From a4c6f9d3636db538025f9622c008192a0835cc23 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 9 Jun 2011 21:15:37 +0200
Subject: netfilter: xtables: give xt_ecn its own name

Use the new macro and struct names in xt_ecn.h, and put the old
definitions into a definition-forwarding ipt_ecn.h.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_ecn.h       | 12 ++++++------
 include/linux/netfilter_ipv4/ipt_ecn.h | 11 ++++++++++-
 net/netfilter/xt_ecn.c                 | 34 +++++++++++++++++-----------------
 3 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_ecn.h b/include/linux/netfilter/xt_ecn.h
index 065c1a537e5d..7158fca364f2 100644
--- a/include/linux/netfilter/xt_ecn.h
+++ b/include/linux/netfilter/xt_ecn.h
@@ -12,16 +12,16 @@
 #include <linux/types.h>
 #include <linux/netfilter/xt_dscp.h>
 
-#define IPT_ECN_IP_MASK	(~XT_DSCP_MASK)
+#define XT_ECN_IP_MASK	(~XT_DSCP_MASK)
 
-#define IPT_ECN_OP_MATCH_IP	0x01
-#define IPT_ECN_OP_MATCH_ECE	0x10
-#define IPT_ECN_OP_MATCH_CWR	0x20
+#define XT_ECN_OP_MATCH_IP	0x01
+#define XT_ECN_OP_MATCH_ECE	0x10
+#define XT_ECN_OP_MATCH_CWR	0x20
 
-#define IPT_ECN_OP_MATCH_MASK	0xce
+#define XT_ECN_OP_MATCH_MASK	0xce
 
 /* match info */
-struct ipt_ecn_info {
+struct xt_ecn_info {
 	__u8 operation;
 	__u8 invert;
 	__u8 ip_ect;
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h
index b1124ec76190..0e0c063dbf60 100644
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -2,5 +2,14 @@
 #define _IPT_ECN_H
 
 #include <linux/netfilter/xt_ecn.h>
+#define ipt_ecn_info xt_ecn_info
 
-#endif /* _IPT_ECN_H */
+enum {
+	IPT_ECN_IP_MASK       = XT_ECN_IP_MASK,
+	IPT_ECN_OP_MATCH_IP   = XT_ECN_OP_MATCH_IP,
+	IPT_ECN_OP_MATCH_ECE  = XT_ECN_OP_MATCH_ECE,
+	IPT_ECN_OP_MATCH_CWR  = XT_ECN_OP_MATCH_CWR,
+	IPT_ECN_OP_MATCH_MASK = XT_ECN_OP_MATCH_MASK,
+};
+
+#endif /* IPT_ECN_H */
diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c
index 2c198f5e3efb..3ebb3dcace65 100644
--- a/net/netfilter/xt_ecn.c
+++ b/net/netfilter/xt_ecn.c
@@ -15,8 +15,8 @@
 #include <linux/tcp.h>
 
 #include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_ecn.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_ecn.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
@@ -24,14 +24,14 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_ecn");
 
 static inline bool match_ip(const struct sk_buff *skb,
-			    const struct ipt_ecn_info *einfo)
+			    const struct xt_ecn_info *einfo)
 {
-	return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
-	       !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
+	return ((ip_hdr(skb)->tos & XT_ECN_IP_MASK) == einfo->ip_ect) ^
+	       !!(einfo->invert & XT_ECN_OP_MATCH_IP);
 }
 
 static inline bool match_tcp(const struct sk_buff *skb,
-			     const struct ipt_ecn_info *einfo,
+			     const struct xt_ecn_info *einfo,
 			     bool *hotdrop)
 {
 	struct tcphdr _tcph;
@@ -46,8 +46,8 @@ static inline bool match_tcp(const struct sk_buff *skb,
 		return false;
 	}
 
-	if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
-		if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
+	if (einfo->operation & XT_ECN_OP_MATCH_ECE) {
+		if (einfo->invert & XT_ECN_OP_MATCH_ECE) {
 			if (th->ece == 1)
 				return false;
 		} else {
@@ -56,8 +56,8 @@ static inline bool match_tcp(const struct sk_buff *skb,
 		}
 	}
 
-	if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
-		if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
+	if (einfo->operation & XT_ECN_OP_MATCH_CWR) {
+		if (einfo->invert & XT_ECN_OP_MATCH_CWR) {
 			if (th->cwr == 1)
 				return false;
 		} else {
@@ -71,13 +71,13 @@ static inline bool match_tcp(const struct sk_buff *skb,
 
 static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	const struct ipt_ecn_info *info = par->matchinfo;
+	const struct xt_ecn_info *info = par->matchinfo;
 
-	if (info->operation & IPT_ECN_OP_MATCH_IP)
+	if (info->operation & XT_ECN_OP_MATCH_IP)
 		if (!match_ip(skb, info))
 			return false;
 
-	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
+	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR)) {
 		if (!match_tcp(skb, info, &par->hotdrop))
 			return false;
 	}
@@ -87,16 +87,16 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 static int ecn_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ipt_ecn_info *info = par->matchinfo;
+	const struct xt_ecn_info *info = par->matchinfo;
 	const struct ipt_ip *ip = par->entryinfo;
 
-	if (info->operation & IPT_ECN_OP_MATCH_MASK)
+	if (info->operation & XT_ECN_OP_MATCH_MASK)
 		return -EINVAL;
 
-	if (info->invert & IPT_ECN_OP_MATCH_MASK)
+	if (info->invert & XT_ECN_OP_MATCH_MASK)
 		return -EINVAL;
 
-	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
+	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
 	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
 		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
 		return -EINVAL;
@@ -109,7 +109,7 @@ static struct xt_match ecn_mt_reg __read_mostly = {
 	.name		= "ecn",
 	.family		= NFPROTO_IPV4,
 	.match		= ecn_mt,
-	.matchsize	= sizeof(struct ipt_ecn_info),
+	.matchsize	= sizeof(struct xt_ecn_info),
 	.checkentry	= ecn_mt_check,
 	.me		= THIS_MODULE,
 };
-- 
cgit v1.2.3


From 42c5d52f2bbf68add2d7a6982753993e0c75a119 Mon Sep 17 00:00:00 2001
From: Richard Zhao <richard.zhao@linaro.org>
Date: Tue, 15 Nov 2011 14:47:56 +0800
Subject: clk: add helper functions clk_prepare_enable and
 clk_disable_unprepare

It's for migrating to generic clk framework API.

The helper functions  help cases clk_enable/clk_disable is used
in non-atomic context.
For example, Call clk_enable in probe and clk_disable in remove.

Signed-off-by: Richard Zhao <richard.zhao@linaro.org>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Cc: Russell King <linux@arm.linux.org.uk>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
---
 include/linux/clk.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index 7213b52b2c0e..b9d46fa154b4 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -107,6 +107,28 @@ static inline void clk_unprepare(struct clk *clk)
 }
 #endif
 
+/* clk_prepare_enable helps cases using clk_enable in non-atomic context. */
+static inline int clk_prepare_enable(struct clk *clk)
+{
+	int ret;
+
+	ret = clk_prepare(clk);
+	if (ret)
+		return ret;
+	ret = clk_enable(clk);
+	if (ret)
+		clk_unprepare(clk);
+
+	return ret;
+}
+
+/* clk_disable_unprepare helps cases using clk_disable in non-atomic context. */
+static inline void clk_disable_unprepare(struct clk *clk)
+{
+	clk_disable(clk);
+	clk_unprepare(clk);
+}
+
 /**
  * clk_get_rate - obtain the current clock rate (in Hz) for a clock source.
  *		  This is only valid once the clock source has been enabled.
-- 
cgit v1.2.3


From 3ecdd0515287afbcde352077d59e4028dcfbb685 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Tue, 13 Dec 2011 09:13:54 -0600
Subject: dt: add empty of_get_node/of_put_node functions

Add empty of_get_node/of_put_node functions for !CONFIG_OF builds.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of.h | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f1a490c37e06..9abd3ec3c2ac 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -65,6 +65,20 @@ struct device_node {
 #endif
 };
 
+#if defined(CONFIG_SPARC) || !defined(CONFIG_OF)
+/* Dummy ref counting routines - to be implemented later */
+static inline struct device_node *of_node_get(struct device_node *node)
+{
+	return node;
+}
+static inline void of_node_put(struct device_node *node)
+{
+}
+#else
+extern struct device_node *of_node_get(struct device_node *node);
+extern void of_node_put(struct device_node *node);
+#endif
+
 #ifdef CONFIG_OF
 
 /* Pointer for first entry in chain of all nodes. */
@@ -95,21 +109,6 @@ static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
 
 extern struct device_node *of_find_all_nodes(struct device_node *prev);
 
-#if defined(CONFIG_SPARC)
-/* Dummy ref counting routines - to be implemented later */
-static inline struct device_node *of_node_get(struct device_node *node)
-{
-	return node;
-}
-static inline void of_node_put(struct device_node *node)
-{
-}
-
-#else
-extern struct device_node *of_node_get(struct device_node *node);
-extern void of_node_put(struct device_node *node);
-#endif
-
 /*
  * OF address retrieval & translation
  */
-- 
cgit v1.2.3


From 1ba37268cd19e5a2a80924bfe8618bf1ba3e8249 Mon Sep 17 00:00:00 2001
From: Yongqiang Yang <xiaoqiangnk@gmail.com>
Date: Wed, 28 Dec 2011 17:46:46 -0500
Subject: jbd2: clear revoked flag on buffers before a new transaction started

Currently, we clear revoked flag only when a block is reused.  However,
this can tigger a false journal error.  Consider a situation when a block
is used as a meta block and is deleted(revoked) in ordered mode, then the
block is allocated as a data block to a file.  At this moment, user changes
the file's journal mode from ordered to journaled and truncates the file.
The block will be considered re-revoked by journal because it has revoked
flag still pending from the last transaction and an assertion triggers.

We fix the problem by keeping the revoked status more uptodate - we clear
revoked flag when switching revoke tables to reflect there is no revoked
buffers in current transaction any more.

Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c     |  6 ++++++
 fs/jbd2/revoke.c     | 34 ++++++++++++++++++++++++++++++++++
 include/linux/jbd2.h |  1 +
 3 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 68d704db787f..5069b8475150 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -429,6 +429,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 	jbd_debug(3, "JBD2: commit phase 1\n");
 
+	/*
+	 * Clear revoked flag to reflect there is no revoked buffers
+	 * in the next transaction which is going to be started.
+	 */
+	jbd2_clear_buffer_revoked_flags(journal);
+
 	/*
 	 * Switch to a new revoke table.
 	 */
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 69fd93588118..30b2867d6cc9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -47,6 +47,10 @@
  *   overwriting the new data.  We don't even need to clear the revoke
  *   bit here.
  *
+ * We cache revoke status of a buffer in the current transaction in b_states
+ * bits.  As the name says, revokevalid flag indicates that the cached revoke
+ * status of a buffer is valid and we can rely on the cached status.
+ *
  * Revoke information on buffers is a tri-state value:
  *
  * RevokeValid clear:	no cached revoke status, need to look it up
@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 	return did_revoke;
 }
 
+/*
+ * journal_clear_revoked_flag clears revoked flag of buffers in
+ * revoke table to reflect there is no revoked buffers in the next
+ * transaction which is going to be started.
+ */
+void jbd2_clear_buffer_revoked_flags(journal_t *journal)
+{
+	struct jbd2_revoke_table_s *revoke = journal->j_revoke;
+	int i = 0;
+
+	for (i = 0; i < revoke->hash_size; i++) {
+		struct list_head *hash_list;
+		struct list_head *list_entry;
+		hash_list = &revoke->hash_table[i];
+
+		list_for_each(list_entry, hash_list) {
+			struct jbd2_revoke_record_s *record;
+			struct buffer_head *bh;
+			record = (struct jbd2_revoke_record_s *)list_entry;
+			bh = __find_get_block(journal->j_fs_dev,
+					      record->blocknr,
+					      journal->j_blocksize);
+			if (bh) {
+				clear_buffer_revoked(bh);
+				__brelse(bh);
+			}
+		}
+	}
+}
+
 /* journal_switch_revoke table select j_revoke for next transaction
  * we do not want to suspend any processing until all revokes are
  * written -bzzz
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 2092ea21e469..5557baefed60 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1151,6 +1151,7 @@ extern int	jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
 extern int	jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
 extern void	jbd2_journal_clear_revoke(journal_t *);
 extern void	jbd2_journal_switch_revoke_table(journal_t *journal);
+extern void	jbd2_clear_buffer_revoked_flags(journal_t *journal);
 
 /*
  * The log thread user interface:
-- 
cgit v1.2.3


From cc1d327232759647ea56725eab1c6b16c92d52fa Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Mon, 14 Nov 2011 08:48:18 -0300
Subject: [media] v4l: Add new alpha component control

The V4L2_CID_ALPHA_COMPONENT control is intended for the video capture
or memory-to-memory devices that are capable of setting up the per-pixel
alpha component to some arbitrary value. It allows to set the alpha
component for all pixels to an arbitrary value.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/compat.xml         | 11 ++++++++++
 Documentation/DocBook/media/v4l/controls.xml       | 25 ++++++++++++++++------
 .../DocBook/media/v4l/pixfmt-packed-rgb.xml        |  7 ++++--
 drivers/media/video/v4l2-ctrls.c                   |  1 +
 include/linux/videodev2.h                          |  6 +++---
 5 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml
index 8b44a43f4542..12ba26262d32 100644
--- a/Documentation/DocBook/media/v4l/compat.xml
+++ b/Documentation/DocBook/media/v4l/compat.xml
@@ -2379,6 +2379,17 @@ that used it. It was originally scheduled for removal in 2.6.35.
       </orderedlist>
     </section>
 
+    <section>
+      <title>V4L2 in Linux 3.3</title>
+      <orderedlist>
+        <listitem>
+	  <para>Added <constant>V4L2_CID_ALPHA_COMPONENT</constant> control
+	    to the <link linkend="control">User controls class</link>.
+	  </para>
+        </listitem>
+      </orderedlist>
+    </section>
+
     <section id="other">
       <title>Relation of V4L2 to other Linux multimedia APIs</title>
 
diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index c0422c622337..a1be37897ad7 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -323,12 +323,6 @@ minimum value disables backlight compensation.</entry>
 	    <entry>Switch on or off the illuminator 1 or 2 of the device
 		(usually a microscope).</entry>
 	  </row>
-	  <row>
-	    <entry><constant>V4L2_CID_LASTP1</constant></entry>
-	    <entry></entry>
-	    <entry>End of the predefined control IDs (currently
-<constant>V4L2_CID_ILLUMINATORS_2</constant> + 1).</entry>
-	  </row>
 	  <row>
 	    <entry><constant>V4L2_CID_MIN_BUFFERS_FOR_CAPTURE</constant></entry>
 	    <entry>integer</entry>
@@ -345,6 +339,25 @@ and used as a hint to determine the number of OUTPUT buffers to pass to REQBUFS.
 The value is the minimum number of OUTPUT buffers that is necessary for hardware
 to work.</entry>
 	  </row>
+	  <row id="v4l2-alpha-component">
+	    <entry><constant>V4L2_CID_ALPHA_COMPONENT</constant></entry>
+	    <entry>integer</entry>
+	    <entry> Sets the alpha color component on the capture device or on
+	    the capture buffer queue of a mem-to-mem device. When a mem-to-mem
+	    device produces frame format that includes an alpha component
+	    (e.g. <link linkend="rgb-formats">packed RGB image formats</link>)
+	    and the alpha value is not defined by the mem-to-mem input data
+	    this control lets you select the alpha component value of all
+	    pixels. It is applicable to any pixel format that contains an alpha
+	    component.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry><constant>V4L2_CID_LASTP1</constant></entry>
+	    <entry></entry>
+	    <entry>End of the predefined control IDs (currently
+	      <constant>V4L2_CID_ALPHA_COMPONENT</constant> + 1).</entry>
+	  </row>
 	  <row>
 	    <entry><constant>V4L2_CID_PRIVATE_BASE</constant></entry>
 	    <entry></entry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
index ba56536622f2..166c8d65e4f7 100644
--- a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
@@ -428,8 +428,11 @@ colorspace <constant>V4L2_COLORSPACE_SRGB</constant>.</para>
     <para>Bit 7 is the most significant bit. The value of a = alpha
 bits is undefined when reading from the driver, ignored when writing
 to the driver, except when alpha blending has been negotiated for a
-<link linkend="overlay">Video Overlay</link> or <link
-linkend="osd">Video Output Overlay</link>.</para>
+<link linkend="overlay">Video Overlay</link> or <link linkend="osd">
+Video Output Overlay</link> or when alpha component has been configured
+for a <link linkend="capture">Video Capture</link> by means of <link
+linkend="v4l2-alpha-component"> <constant>V4L2_CID_ALPHA_COMPONENT
+</constant> </link> control.</para>
 
     <example>
       <title><constant>V4L2_PIX_FMT_BGR24</constant> 4 &times; 4 pixel
diff --git a/drivers/media/video/v4l2-ctrls.c b/drivers/media/video/v4l2-ctrls.c
index 0f415dade05a..39266153499f 100644
--- a/drivers/media/video/v4l2-ctrls.c
+++ b/drivers/media/video/v4l2-ctrls.c
@@ -467,6 +467,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_ILLUMINATORS_2:		return "Illuminator 2";
 	case V4L2_CID_MIN_BUFFERS_FOR_CAPTURE:	return "Minimum Number of Capture Buffers";
 	case V4L2_CID_MIN_BUFFERS_FOR_OUTPUT:	return "Minimum Number of Output Buffers";
+	case V4L2_CID_ALPHA_COMPONENT:		return "Alpha Component";
 
 	/* MPEG controls */
 	/* Keep the order of the 'case's the same as in videodev2.h! */
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 3d62631839bc..2965906a02c9 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1204,10 +1204,10 @@ enum v4l2_colorfx {
 #define V4L2_CID_MIN_BUFFERS_FOR_CAPTURE	(V4L2_CID_BASE+39)
 #define V4L2_CID_MIN_BUFFERS_FOR_OUTPUT		(V4L2_CID_BASE+40)
 
-/* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+41)
+#define V4L2_CID_ALPHA_COMPONENT		(V4L2_CID_BASE+41)
 
-/* Minimum number of buffer neede by the device */
+/* last CID + 1 */
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+42)
 
 /*  MPEG-class control IDs defined by V4L2 */
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit v1.2.3


From e6fe2371bdd3713d0b227e9cd7f905e127ff81a0 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:52:21 +0000
Subject: sock_diag: Arrange sock_diag.h such that it is exportable to
 userspace

Properly toss existing components around the ifdef __KERNEL__
and include the header into the header-y target.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild      |  1 +
 include/linux/sock_diag.h | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 0b091b32267d..8e484d660bc3 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -195,6 +195,7 @@ header-y += igmp.h
 header-y += in.h
 header-y += in6.h
 header-y += in_route.h
+header-y += sock_diag.h
 header-y += inet_diag.h
 header-y += inotify.h
 header-y += input.h
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 379d5dccf8e1..66bc18ef4fa4 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -1,16 +1,19 @@
 #ifndef __SOCK_DIAG_H__
 #define __SOCK_DIAG_H__
 
-#define SOCK_DIAG_BY_FAMILY 20
+#include <linux/types.h>
 
-struct sk_buff;
-struct nlmsghdr;
+#define SOCK_DIAG_BY_FAMILY 20
 
 struct sock_diag_req {
 	__u8	sdiag_family;
 	__u8	sdiag_protocol;
 };
 
+#ifdef __KERNEL__
+struct sk_buff;
+struct nlmsghdr;
+
 struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
@@ -26,4 +29,5 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie);
 void sock_diag_save_cookie(void *sk, __u32 *cookie);
 
 extern struct sock *sock_diag_nlsk;
+#endif /* KERNEL */
 #endif
-- 
cgit v1.2.3


From 288461e1546fa4162fa237eeed8ea09a16521dcd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:52:51 +0000
Subject: unix_diag: Include unix_diag.h into header-y target

The headers check complains it should include the linux/types.h
withing, thus add this one.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild      | 1 +
 include/linux/unix_diag.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 8e484d660bc3..c94e71781b79 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -197,6 +197,7 @@ header-y += in6.h
 header-y += in_route.h
 header-y += sock_diag.h
 header-y += inet_diag.h
+header-y += unix_diag.h
 header-y += inotify.h
 header-y += input.h
 header-y += ioctl.h
diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 3f7afb007d70..a5ce0f325745 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -1,6 +1,8 @@
 #ifndef __UNIX_DIAG_H__
 #define __UNIX_DIAG_H__
 
+#include <linux/types.h>
+
 struct unix_diag_req {
 	__u8	sdiag_family;
 	__u8	sdiag_protocol;
-- 
cgit v1.2.3


From 5d2e5f274f9e9a06fb934dd45260e2616a9992e6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:53:13 +0000
Subject: sock_diag: Introduce the meminfo nla core (v2)

Add a routine that dumps memory-related values of a socket.
It's made as an array to make it possible to add more stuff
here later without breaking compatibility.

Since v1: The SK_MEMINFO_ constants are in userspace
visible part of sock_diag.h, the rest is under __KERNEL__.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h | 15 +++++++++++++++
 net/core/sock_diag.c      | 23 +++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 66bc18ef4fa4..251729a47880 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -10,9 +10,22 @@ struct sock_diag_req {
 	__u8	sdiag_protocol;
 };
 
+enum {
+	SK_MEMINFO_RMEM_ALLOC,
+	SK_MEMINFO_RCVBUF,
+	SK_MEMINFO_WMEM_ALLOC,
+	SK_MEMINFO_SNDBUF,
+	SK_MEMINFO_FWD_ALLOC,
+	SK_MEMINFO_WMEM_QUEUED,
+	SK_MEMINFO_OPTMEM,
+
+	SK_MEMINFO_VARS,
+};
+
 #ifdef __KERNEL__
 struct sk_buff;
 struct nlmsghdr;
+struct sock;
 
 struct sock_diag_handler {
 	__u8 family;
@@ -28,6 +41,8 @@ void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlms
 int sock_diag_check_cookie(void *sk, __u32 *cookie);
 void sock_diag_save_cookie(void *sk, __u32 *cookie);
 
+int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
+
 extern struct sock *sock_diag_nlsk;
 #endif /* KERNEL */
 #endif
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 711bdefe7753..b9868e1fd62c 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -4,6 +4,8 @@
 #include <net/netlink.h>
 #include <net/net_namespace.h>
 #include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
 
 #include <linux/inet_diag.h>
 #include <linux/sock_diag.h>
@@ -31,6 +33,27 @@ void sock_diag_save_cookie(void *sk, __u32 *cookie)
 }
 EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
 
+int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
+{
+	__u32 *mem;
+
+	mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32)));
+
+	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+	mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+	mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+	mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+
+	return 0;
+
+rtattr_failure:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
+
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
 {
 	mutex_lock(&sock_diag_table_mutex);
-- 
cgit v1.2.3


From c0636faa539ec4205ec50e80844a5b0454b4bbaa Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:53:32 +0000
Subject: inet_diag: Add the SKMEMINFO extension

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 3 ++-
 net/ipv4/inet_diag.c      | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index afa5d5c74169..34e8d52c1925 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -108,9 +108,10 @@ enum {
 	INET_DIAG_CONG,
 	INET_DIAG_TOS,
 	INET_DIAG_TCLASS,
+	INET_DIAG_SKMEMINFO,
 };
 
-#define INET_DIAG_MAX INET_DIAG_TCLASS
+#define INET_DIAG_MAX INET_DIAG_SKMEMINFO
 
 
 /* INET_DIAG_MEM */
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fb2e47ff59f7..2240a8e8c44d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -136,6 +136,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		minfo->idiag_tmem = sk_wmem_alloc_get(sk);
 	}
 
+	if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
+		if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
+			goto rtattr_failure;
+
 	if (icsk == NULL) {
 		r->idiag_rqueue = r->idiag_wqueue = 0;
 		goto out;
-- 
cgit v1.2.3


From 257b529876cb45ec791eaa89e3d2ee0d16b49383 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 09:27:43 +0000
Subject: unix_diag: Add the MEMINFO extension

[ Fix indentation of sock_diag*() calls. -DaveM ]

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  2 ++
 net/unix/diag.c           | 20 ++++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index a5ce0f325745..93fdb782468a 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -18,6 +18,7 @@ struct unix_diag_req {
 #define UDIAG_SHOW_PEER		0x00000004	/* show peer socket info */
 #define UDIAG_SHOW_ICONS	0x00000008	/* show pending connections */
 #define UDIAG_SHOW_RQLEN	0x00000010	/* show skb receive queue len */
+#define UDIAG_SHOW_MEMINFO	0x00000020	/* show memory info of a socket */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -35,6 +36,7 @@ enum {
 	UNIX_DIAG_PEER,
 	UNIX_DIAG_ICONS,
 	UNIX_DIAG_RQLEN,
+	UNIX_DIAG_MEMINFO,
 
 	UNIX_DIAG_MAX,
 };
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c5bdbcb1c30b..98945f29da4f 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -127,23 +127,27 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	sock_diag_save_cookie(sk, rep->udiag_cookie);
 
 	if ((req->udiag_show & UDIAG_SHOW_NAME) &&
-			sk_diag_dump_name(sk, skb))
+	    sk_diag_dump_name(sk, skb))
 		goto nlmsg_failure;
 
 	if ((req->udiag_show & UDIAG_SHOW_VFS) &&
-			sk_diag_dump_vfs(sk, skb))
+	    sk_diag_dump_vfs(sk, skb))
 		goto nlmsg_failure;
 
 	if ((req->udiag_show & UDIAG_SHOW_PEER) &&
-			sk_diag_dump_peer(sk, skb))
+	    sk_diag_dump_peer(sk, skb))
 		goto nlmsg_failure;
 
 	if ((req->udiag_show & UDIAG_SHOW_ICONS) &&
-			sk_diag_dump_icons(sk, skb))
+	    sk_diag_dump_icons(sk, skb))
 		goto nlmsg_failure;
 
 	if ((req->udiag_show & UDIAG_SHOW_RQLEN) &&
-			sk_diag_show_rqlen(sk, skb))
+	    sk_diag_show_rqlen(sk, skb))
+		goto nlmsg_failure;
+
+	if ((req->udiag_show & UDIAG_SHOW_MEMINFO) &&
+	    sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO))
 		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -191,9 +195,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			if (!(req->udiag_states & (1 << sk->sk_state)))
 				goto next;
 			if (sk_diag_dump(sk, skb, req,
-						NETLINK_CB(cb->skb).pid,
-						cb->nlh->nlmsg_seq,
-						NLM_F_MULTI) < 0)
+					 NETLINK_CB(cb->skb).pid,
+					 cb->nlh->nlmsg_seq,
+					 NLM_F_MULTI) < 0)
 				goto done;
 next:
 			num++;
-- 
cgit v1.2.3


From c9da99e6475f92653139e43f3c30c0cd011a0fd8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:54:39 +0000
Subject: unix_diag: Fixup RQLEN extension report

While it's not too late fix the recently added RQLEN diag extension
to report rqlen and wqlen in the same way as TCP does.

I.e. for listening sockets the ack backlog length (which is the input
queue length for socket) in rqlen and the max ack backlog length in
wqlen, and what the CINQ/OUTQ ioctls do for established.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  5 +++++
 net/unix/diag.c           | 13 ++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 93fdb782468a..b1d2bf16b33c 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -46,4 +46,9 @@ struct unix_diag_vfs {
 	__u32	udiag_vfs_dev;
 };
 
+struct unix_diag_rqlen {
+	__u32	udiag_rqueue;
+	__u32	udiag_wqueue;
+};
+
 #endif
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 98945f29da4f..6b7697fd911b 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -101,7 +101,18 @@ rtattr_failure:
 
 static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
 {
-	RTA_PUT_U32(nlskb, UNIX_DIAG_RQLEN, sk->sk_receive_queue.qlen);
+	struct unix_diag_rqlen *rql;
+
+	rql = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_RQLEN, sizeof(*rql));
+
+	if (sk->sk_state == TCP_LISTEN) {
+		rql->udiag_rqueue = sk->sk_receive_queue.qlen;
+		rql->udiag_wqueue = sk->sk_max_ack_backlog;
+	} else {
+		rql->udiag_rqueue = (__u32)unix_inq_len(sk);
+		rql->udiag_wqueue = (__u32)unix_outq_len(sk);
+	}
+
 	return 0;
 
 rtattr_failure:
-- 
cgit v1.2.3


From 8de8594a79ae43b08d115c94f09373f6c673f202 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 26 Dec 2011 20:22:50 -0300
Subject: [media] dvb-core: be sure that drivers won't use DVBv3 internally

Now that all frontends are implementing DVBv5, don't export the
DVBv3 specific stuff to the drivers. Only the core should be
aware of that, as it will keep providing DVBv3 backward compatibility.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c | 3 +++
 drivers/media/dvb/dvb-core/dvb_frontend.h | 2 ++
 include/linux/dvb/frontend.h              | 6 ++++--
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index b1ab866743fd..55ca5521bca6 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -25,6 +25,9 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  */
 
+/* Enables DVBv3 compatibility bits at the headers */
+#define __DVB_CORE__
+
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 93715d6755f4..676481c8ad78 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -315,6 +315,7 @@ struct dvb_frontend_ops {
 	int (*get_property)(struct dvb_frontend* fe, struct dtv_property* tvp);
 };
 
+#ifdef __DVB_CORE__
 #define MAX_EVENT 8
 
 struct dvb_fe_events {
@@ -325,6 +326,7 @@ struct dvb_fe_events {
 	wait_queue_head_t	  wait_queue;
 	struct mutex		  mtx;
 };
+#endif
 
 struct dtv_frontend_properties {
 
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index a3c762383f88..7e7cb64f56d8 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -181,6 +181,7 @@ typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_32K,
 } fe_transmit_mode_t;
 
+#if defined(__DVB_CORE__) || !defined (__KERNEL__)
 typedef enum fe_bandwidth {
 	BANDWIDTH_8_MHZ,
 	BANDWIDTH_7_MHZ,
@@ -190,7 +191,7 @@ typedef enum fe_bandwidth {
 	BANDWIDTH_10_MHZ,
 	BANDWIDTH_1_712_MHZ,
 } fe_bandwidth_t;
-
+#endif
 
 typedef enum fe_guard_interval {
 	GUARD_INTERVAL_1_32,
@@ -213,6 +214,7 @@ typedef enum fe_hierarchy {
 } fe_hierarchy_t;
 
 
+#if defined(__DVB_CORE__) || !defined (__KERNEL__)
 struct dvb_qpsk_parameters {
 	__u32		symbol_rate;  /* symbol rate in Symbols per second */
 	fe_code_rate_t	fec_inner;    /* forward error correction (see above) */
@@ -251,11 +253,11 @@ struct dvb_frontend_parameters {
 	} u;
 };
 
-
 struct dvb_frontend_event {
 	fe_status_t status;
 	struct dvb_frontend_parameters parameters;
 };
+#endif
 
 /* S2API Commands */
 #define DTV_UNDEFINED		0
-- 
cgit v1.2.3


From 935a521066113e16c15b842852ed681bbbdfbc63 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Mon, 2 Jan 2012 18:49:32 +0900
Subject: regulator: add regulator_force_disable() definition for
 !CONFIG_REGULATOR

regulator_force_disable() was omitted in consumer.h for
!CONFIG_REGULATOR case.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/regulator/consumer.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index f7756d146c61..889ab5aaa85b 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -212,6 +212,11 @@ static inline int regulator_disable(struct regulator *regulator)
 	return 0;
 }
 
+static inline int regulator_force_disable(struct regulator *regulator)
+{
+	return 0;
+}
+
 static inline int regulator_disable_deferred(struct regulator *regulator,
 					     int ms)
 {
-- 
cgit v1.2.3


From 3712a3c488987849613a4ad74129e67e40b12b38 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Fri, 21 Oct 2011 12:25:53 -0600
Subject: pinctrl: add explicit gpio_disable_free pinmux_op

Some pinctrl drivers (Tegra at least) program a pin to be a GPIO in a
completely different manner than they select which function to mux out of
that pin. In order to support a single "free" pinmux_op, the driver would
need to maintain a per-pin state of requested-for-gpio vs. requested-for-
function. However, that's a lot of work when the core already has explicit
separate paths for gpio request/free and function request/free.

So, add a gpio_disable_free op to struct pinmux_ops, and make pin_free()
call it when appropriate.

When doing this, I noticed that when calling pin_request():

    !!gpio == (gpio_range != NULL)

... and so I collapsed those two parameters in both pin_request(), and
when adding writing the new code in pin_free().

Also, for pin_free():

    !!free_func == (gpio_range != NULL)

However, I didn't want pin_free() to know about the GPIO function naming
special case, so instead, I reworked pin_free() to always return the pin's
previously requested function, and now pinmux_free_gpio() calls
kfree(function). This is much more balanced with the allocation having
been performed in pinmux_request_gpio().

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinmux.c       | 39 +++++++++++++++++++++++++--------------
 include/linux/pinctrl/pinmux.h |  3 +++
 2 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 17c3931d5ef0..c77aee5508fb 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -94,12 +94,11 @@ struct pinmux_hog {
  * @function: a functional name to give to this pin, passed to the driver
  *	so it knows what function to mux in, e.g. the string "gpioNN"
  *	means that you want to mux in the pin for use as GPIO number NN
- * @gpio: if this request concerns a single GPIO pin
  * @gpio_range: the range matching the GPIO pin if this is a request for a
  *	single GPIO pin
  */
 static int pin_request(struct pinctrl_dev *pctldev,
-		       int pin, const char *function, bool gpio,
+		       int pin, const char *function,
 		       struct pinctrl_gpio_range *gpio_range)
 {
 	struct pin_desc *desc;
@@ -143,7 +142,7 @@ static int pin_request(struct pinctrl_dev *pctldev,
 	 * If there is no kind of request function for the pin we just assume
 	 * we got it by default and proceed.
 	 */
-	if (gpio && ops->gpio_request_enable)
+	if (gpio_range && ops->gpio_request_enable)
 		/* This requests and enables a single GPIO pin */
 		status = ops->gpio_request_enable(pctldev, gpio_range, pin);
 	else if (ops->request)
@@ -173,29 +172,39 @@ out:
  * pin_free() - release a single muxed in pin so something else can be muxed
  * @pctldev: pin controller device handling this pin
  * @pin: the pin to free
- * @free_func: whether to free the pin's assigned function name string
+ * @gpio_range: the range matching the GPIO pin if this is a request for a
+ *	single GPIO pin
  */
-static void pin_free(struct pinctrl_dev *pctldev, int pin, int free_func)
+static const char *pin_free(struct pinctrl_dev *pctldev, int pin,
+			    struct pinctrl_gpio_range *gpio_range)
 {
 	const struct pinmux_ops *ops = pctldev->desc->pmxops;
 	struct pin_desc *desc;
+	const char *func;
 
 	desc = pin_desc_get(pctldev, pin);
 	if (desc == NULL) {
 		dev_err(&pctldev->dev,
 			"pin is not registered so it cannot be freed\n");
-		return;
+		return NULL;
 	}
 
-	if (ops->free)
+	/*
+	 * If there is no kind of request function for the pin we just assume
+	 * we got it by default and proceed.
+	 */
+	if (gpio_range && ops->gpio_disable_free)
+		ops->gpio_disable_free(pctldev, gpio_range, pin);
+	else if (ops->free)
 		ops->free(pctldev, pin);
 
 	spin_lock(&desc->lock);
-	if (free_func)
-		kfree(desc->mux_function);
+	func = desc->mux_function;
 	desc->mux_function = NULL;
 	spin_unlock(&desc->lock);
 	module_put(pctldev->owner);
+
+	return func;
 }
 
 /**
@@ -225,7 +234,7 @@ int pinmux_request_gpio(unsigned gpio)
 	if (!function)
 		return -EINVAL;
 
-	ret = pin_request(pctldev, pin, function, true, range);
+	ret = pin_request(pctldev, pin, function, range);
 	if (ret < 0)
 		kfree(function);
 
@@ -243,6 +252,7 @@ void pinmux_free_gpio(unsigned gpio)
 	struct pinctrl_gpio_range *range;
 	int ret;
 	int pin;
+	const char *func;
 
 	ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range);
 	if (ret)
@@ -251,7 +261,8 @@ void pinmux_free_gpio(unsigned gpio)
 	/* Convert to the pin controllers number space */
 	pin = gpio - range->base;
 
-	pin_free(pctldev, pin, true);
+	func = pin_free(pctldev, pin, range);
+	kfree(func);
 }
 EXPORT_SYMBOL_GPL(pinmux_free_gpio);
 
@@ -341,7 +352,7 @@ static int acquire_pins(struct pinctrl_dev *pctldev,
 
 	/* Try to allocate all pins in this group, one by one */
 	for (i = 0; i < num_pins; i++) {
-		ret = pin_request(pctldev, pins[i], func, false, NULL);
+		ret = pin_request(pctldev, pins[i], func, NULL);
 		if (ret) {
 			dev_err(&pctldev->dev,
 				"could not get pin %d for function %s "
@@ -351,7 +362,7 @@ static int acquire_pins(struct pinctrl_dev *pctldev,
 			/* On error release all taken pins */
 			i--; /* this pin just failed */
 			for (; i >= 0; i--)
-				pin_free(pctldev, pins[i], false);
+				pin_free(pctldev, pins[i], NULL);
 			return -ENODEV;
 		}
 	}
@@ -381,7 +392,7 @@ static void release_pins(struct pinctrl_dev *pctldev,
 		return;
 	}
 	for (i = 0; i < num_pins; i++)
-		pin_free(pctldev, pins[i], false);
+		pin_free(pctldev, pins[i], NULL);
 }
 
 /**
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index 3c430e797efc..350e32a98c6a 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -73,6 +73,9 @@ struct pinmux_ops {
 	int (*gpio_request_enable) (struct pinctrl_dev *pctldev,
 				    struct pinctrl_gpio_range *range,
 				    unsigned offset);
+	void (*gpio_disable_free) (struct pinctrl_dev *pctldev,
+				   struct pinctrl_gpio_range *range,
+				   unsigned offset);
 };
 
 /* External interface to pinmux */
-- 
cgit v1.2.3


From 3c739ad0df5eb41cd7adad879eda6aa09879eb76 Mon Sep 17 00:00:00 2001
From: Chanho Park <chanho61.park@samsung.com>
Date: Fri, 11 Nov 2011 18:47:58 +0900
Subject: pinctrl: add a pin_base for sparse gpio-ranges

This patch enables mapping a base offset of gpio ranges with
a pin offset even if does'nt matched. A base of pinctrl_gpio_range
means a base offset of gpio. However, we cannot convert gpio to pin
number for sparse gpio ranges just only using a gpio base offset.
We can convert a gpio to real pin number(even if not matched) using
a new pin_base which means a base pin offset of requested gpio range.
Now, the pin control subsystem passes the pin base offset to the
pinmux driver.

For example, let's assume below two gpio ranges in the system.

static struct pinctrl_gpio_range gpio_range_a = {
    .name = "chip a",
    .id = 0,
    .base = 32,
    .pin_base = 32,
    .npins = 16,
    .gc = &chip_a;
};

static struct pinctrl_gpio_range gpio_range_b = {
    .name = "chip b",
    .id = 0,
    .base = 48,
    .pin_base = 64,
    .npins = 8,
    .gc = &chip_b;
};

We can calucalate a exact pin ranges even if doesn't matched with gpio ranges.

chip a:
    gpio-range : [32 .. 47]
    pin-range  : [32 .. 47]
chip b:
    gpio-range : [48 .. 55]
    pin-range  : [64 .. 71]

Signed-off-by: Chanho Park <chanho61.park@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt       | 48 ++++++++++++++++++++---------------------
 drivers/pinctrl/pinmux-sirf.c   |  6 +++++-
 drivers/pinctrl/pinmux-u300.c   |  1 +
 drivers/pinctrl/pinmux.c        |  4 ++--
 include/linux/pinctrl/pinctrl.h |  2 ++
 include/linux/pinctrl/pinmux.h  |  2 +-
 6 files changed, 34 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 0a8b2250062a..43ba411d1571 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -214,19 +214,20 @@ static struct pinctrl_gpio_range gpio_range_a = {
 	.name = "chip a",
 	.id = 0,
 	.base = 32,
+	.pin_base = 32,
 	.npins = 16,
 	.gc = &chip_a;
 };
 
-static struct pinctrl_gpio_range gpio_range_a = {
+static struct pinctrl_gpio_range gpio_range_b = {
 	.name = "chip b",
 	.id = 0,
 	.base = 48,
+	.pin_base = 64,
 	.npins = 8,
 	.gc = &chip_b;
 };
 
-
 {
 	struct pinctrl_dev *pctl;
 	...
@@ -235,11 +236,24 @@ static struct pinctrl_gpio_range gpio_range_a = {
 }
 
 So this complex system has one pin controller handling two different
-GPIO chips. Chip a has 16 pins and chip b has 8 pins. They are mapped in
-the global GPIO pin space at:
+GPIO chips. "chip a" has 16 pins and "chip b" has 8 pins. The "chip a" and
+"chip b" have different .pin_base, which means a start pin number of the
+GPIO range.
+
+The GPIO range of "chip a" starts from the GPIO base of 32 and actual
+pin range also starts from 32. However "chip b" has different starting
+offset for the GPIO range and pin range. The GPIO range of "chip b" starts
+from GPIO number 48, while the pin range of "chip b" starts from 64.
 
-chip a: [32 .. 47]
-chip b: [48 .. 55]
+We can convert a gpio number to actual pin number using this "pin_base".
+They are mapped in the global GPIO pin space at:
+
+chip a:
+ - GPIO range : [32 .. 47]
+ - pin range  : [32 .. 47]
+chip b:
+ - GPIO range : [48 .. 55]
+ - pin range  : [64 .. 71]
 
 When GPIO-specific functions in the pin control subsystem are called, these
 ranges will be used to look up the appropriate pin controller by inspecting
@@ -249,28 +263,12 @@ will be called on that specific pin controller.
 
 For all functionalities dealing with pin biasing, pin muxing etc, the pin
 controller subsystem will subtract the range's .base offset from the passed
-in gpio pin number, and pass that on to the pin control driver, so the driver
-will get an offset into its handled number range. Further it is also passed
+in gpio number, and add the ranges's .pin_base offset to retrive a pin number.
+After that, the subsystem passes it on to the pin control driver, so the driver
+will get an pin number into its handled number range. Further it is also passed
 the range ID value, so that the pin controller knows which range it should
 deal with.
 
-For example: if a user issues pinctrl_gpio_set_foo(50), the pin control
-subsystem will find that the second range on this pin controller matches,
-subtract the base 48 and call the
-pinctrl_driver_gpio_set_foo(pinctrl, range, 2) where the latter function has
-this signature:
-
-int pinctrl_driver_gpio_set_foo(struct pinctrl_dev *pctldev,
-    struct pinctrl_gpio_range *rangeid,
-    unsigned offset);
-
-Now the driver knows that we want to do some GPIO-specific operation on the
-second GPIO range handled by "chip b", at offset 2 in that specific range.
-
-(If the GPIO subsystem is ever refactored to use a local per-GPIO controller
-pin space, this mapping will need to be augmented accordingly.)
-
-
 PINMUX interfaces
 =================
 
diff --git a/drivers/pinctrl/pinmux-sirf.c b/drivers/pinctrl/pinmux-sirf.c
index d848d9764378..99e688e07ea0 100644
--- a/drivers/pinctrl/pinmux-sirf.c
+++ b/drivers/pinctrl/pinmux-sirf.c
@@ -1067,7 +1067,7 @@ static int sirfsoc_pinmux_request_gpio(struct pinctrl_dev *pmxdev,
 	spmx = pinctrl_dev_get_drvdata(pmxdev);
 
 	muxval = readl(spmx->gpio_virtbase + SIRFSOC_GPIO_PAD_EN(group));
-	muxval = muxval | (1 << offset);
+	muxval = muxval | (1 << (offset - range->pin_base));
 	writel(muxval, spmx->gpio_virtbase + SIRFSOC_GPIO_PAD_EN(group));
 
 	return 0;
@@ -1100,21 +1100,25 @@ static struct pinctrl_gpio_range sirfsoc_gpio_ranges[] = {
 		.name = "sirfsoc-gpio*",
 		.id = 0,
 		.base = 0,
+		.pin_base = 0,
 		.npins = 32,
 	}, {
 		.name = "sirfsoc-gpio*",
 		.id = 1,
 		.base = 32,
+		.pin_base = 32,
 		.npins = 32,
 	}, {
 		.name = "sirfsoc-gpio*",
 		.id = 2,
 		.base = 64,
+		.pin_base = 64,
 		.npins = 32,
 	}, {
 		.name = "sirfsoc-gpio*",
 		.id = 3,
 		.base = 96,
+		.pin_base = 96,
 		.npins = 19,
 	},
 };
diff --git a/drivers/pinctrl/pinmux-u300.c b/drivers/pinctrl/pinmux-u300.c
index 145a84dc16e6..bcf61bee7763 100644
--- a/drivers/pinctrl/pinmux-u300.c
+++ b/drivers/pinctrl/pinmux-u300.c
@@ -1026,6 +1026,7 @@ static struct pinctrl_gpio_range u300_gpio_range = {
 	.name = "COH901*",
 	.id = 0,
 	.base = 0,
+	.pin_base = 0,
 	.npins = 64,
 };
 
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index ee3aba78c3dd..92aa13ee2208 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -229,7 +229,7 @@ int pinmux_request_gpio(unsigned gpio)
 		return -EINVAL;
 
 	/* Convert to the pin controllers number space */
-	pin = gpio - range->base;
+	pin = gpio - range->base + range->pin_base;
 
 	/* Conjure some name stating what chip and pin this is taken by */
 	snprintf(gpiostr, 15, "%s:%d", range->name, gpio);
@@ -263,7 +263,7 @@ void pinmux_free_gpio(unsigned gpio)
 		return;
 
 	/* Convert to the pin controllers number space */
-	pin = gpio - range->base;
+	pin = gpio - range->base + range->pin_base;
 
 	func = pin_free(pctldev, pin, range);
 	kfree(func);
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 04c011038f32..f17fac4b51f1 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -45,6 +45,7 @@ struct pinctrl_pin_desc {
  * @name: a name for the chip in this range
  * @id: an ID number for the chip in this range
  * @base: base offset of the GPIO range
+ * @pin_base: base pin number of the GPIO range
  * @npins: number of pins in the GPIO range, including the base number
  * @gc: an optional pointer to a gpio_chip
  */
@@ -53,6 +54,7 @@ struct pinctrl_gpio_range {
 	const char *name;
 	unsigned int id;
 	unsigned int base;
+	unsigned int pin_base;
 	unsigned int npins;
 	struct gpio_chip *gc;
 };
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index 350e32a98c6a..bb7a9792f1ea 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -52,7 +52,7 @@ struct pinctrl_dev;
  * @disable: disable a certain muxing selector with a certain pin group
  * @gpio_request_enable: requests and enables GPIO on a certain pin.
  *	Implement this only if you can mux every pin individually as GPIO. The
- *	affected GPIO range is passed along with an offset into that
+ *	affected GPIO range is passed along with an offset(pin number) into that
  *	specific GPIO range - function selectors and pin groups are orthogonal
  *	to this, the core will however make sure the pins do not collide
  */
-- 
cgit v1.2.3


From 542e704f3ffee1dc4539c9e8191e4dc215220f5e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 14 Nov 2011 10:06:22 +0100
Subject: pinctrl: GPIO direction support for muxing

When requesting a single GPIO pin to be muxed in, some controllers
will need to poke a different value into the control register
depending on whether the pin will be used for GPIO output or GPIO
input. So create pinmux counterparts to gpio_direction_[input|output]
in the pinctrl framework.

ChangeLog v1->v2:
- This also amends the documentation to make it clear the this
  function and associated machinery is *ONLY* intended as a backend
  to gpiolib machinery, not for everyone and his dog to start playing
  around with pins.
ChangeLog v2->v3:
- Don't pass an argument to the common request function, instead
  provide pinmux_* counterparts to the gpio_direction_[input|output]
  calls, simpler and anyone can understand it.
ChangeLog v3->v4:
- Fix numerous spelling mistakes and dangling text in documentation.
  Add Ack and Rewewed-by.

Cc: Igor Grinberg <grinberg@compulab.co.il>
Acked-by: Stephen Warren <swarren@nvidia.com>
Reviewed-by: Thomas Abraham <thomas.abraham@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt      | 32 ++++++++++++++++------
 drivers/pinctrl/pinmux.c       | 61 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pinctrl/pinmux.h | 24 ++++++++++++++++-
 3 files changed, 108 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 43ba411d1571..3846264c5973 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -645,6 +645,17 @@ All the above functions are mandatory to implement for a pinmux driver.
 Pinmux interaction with the GPIO subsystem
 ==========================================
 
+The public pinmux API contains two functions named pinmux_request_gpio()
+and pinmux_free_gpio(). These two functions shall *ONLY* be called from
+gpiolib-based drivers as part of their gpio_request() and
+gpio_free() semantics. Likewise the pinmux_gpio_direction_[input|output]
+shall only be called from within respective gpio_direction_[input|output]
+gpiolib implementation.
+
+NOTE that platforms and individual drivers shall *NOT* request GPIO pins to be
+muxed in. Instead, implement a proper gpiolib driver and have that driver
+request proper muxing for its pins.
+
 The function list could become long, especially if you can convert every
 individual pin into a GPIO pin independent of any other pins, and then try
 the approach to define every pin as a function.
@@ -652,19 +663,24 @@ the approach to define every pin as a function.
 In this case, the function array would become 64 entries for each GPIO
 setting and then the device functions.
 
-For this reason there is an additional function a pinmux driver can implement
-to enable only GPIO on an individual pin: .gpio_request_enable(). The same
-.free() function as for other functions is assumed to be usable also for
-GPIO pins.
+For this reason there are two functions a pinmux driver can implement
+to enable only GPIO on an individual pin: .gpio_request_enable() and
+.gpio_disable_free().
 
 This function will pass in the affected GPIO range identified by the pin
 controller core, so you know which GPIO pins are being affected by the request
 operation.
 
-Alternatively it is fully allowed to use named functions for each GPIO
-pin, the pinmux_request_gpio() will attempt to obtain the function "gpioN"
-where "N" is the global GPIO pin number if no special GPIO-handler is
-registered.
+If your driver needs to have an indication from the framework of whether the
+GPIO pin shall be used for input or output you can implement the
+.gpio_set_direction() function. As described this shall be called from the
+gpiolib driver and the affected GPIO range, pin offset and desired direction
+will be passed along to this function.
+
+Alternatively to using these special functions, it is fully allowed to use
+named functions for each GPIO pin, the pinmux_request_gpio() will attempt to
+obtain the function "gpioN" where "N" is the global GPIO pin number if no
+special GPIO-handler is registered.
 
 
 Pinmux board/machine configuration
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 92aa13ee2208..f3e4f031fe1c 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -214,6 +214,10 @@ static const char *pin_free(struct pinctrl_dev *pctldev, int pin,
 /**
  * pinmux_request_gpio() - request a single pin to be muxed in as GPIO
  * @gpio: the GPIO pin number from the GPIO subsystem number space
+ *
+ * This function should *ONLY* be used from gpiolib-based GPIO drivers,
+ * as part of their gpio_request() semantics, platforms and individual drivers
+ * shall *NOT* request GPIO pins to be muxed in.
  */
 int pinmux_request_gpio(unsigned gpio)
 {
@@ -249,6 +253,10 @@ EXPORT_SYMBOL_GPL(pinmux_request_gpio);
 /**
  * pinmux_free_gpio() - free a single pin, currently used as GPIO
  * @gpio: the GPIO pin number from the GPIO subsystem number space
+ *
+ * This function should *ONLY* be used from gpiolib-based GPIO drivers,
+ * as part of their gpio_free() semantics, platforms and individual drivers
+ * shall *NOT* request GPIO pins to be muxed out.
  */
 void pinmux_free_gpio(unsigned gpio)
 {
@@ -270,6 +278,59 @@ void pinmux_free_gpio(unsigned gpio)
 }
 EXPORT_SYMBOL_GPL(pinmux_free_gpio);
 
+static int pinmux_gpio_direction(unsigned gpio, bool input)
+{
+	struct pinctrl_dev *pctldev;
+	struct pinctrl_gpio_range *range;
+	const struct pinmux_ops *ops;
+	int ret;
+	int pin;
+
+	ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range);
+	if (ret)
+		return ret;
+
+	ops = pctldev->desc->pmxops;
+
+	/* Convert to the pin controllers number space */
+	pin = gpio - range->base + range->pin_base;
+
+	if (ops->gpio_set_direction)
+		ret = ops->gpio_set_direction(pctldev, range, pin, input);
+	else
+		ret = 0;
+
+	return ret;
+}
+
+/**
+ * pinmux_gpio_direction_input() - request a GPIO pin to go into input mode
+ * @gpio: the GPIO pin number from the GPIO subsystem number space
+ *
+ * This function should *ONLY* be used from gpiolib-based GPIO drivers,
+ * as part of their gpio_direction_input() semantics, platforms and individual
+ * drivers shall *NOT* touch pinmux GPIO calls.
+ */
+int pinmux_gpio_direction_input(unsigned gpio)
+{
+	return pinmux_gpio_direction(gpio, true);
+}
+EXPORT_SYMBOL_GPL(pinmux_gpio_direction_input);
+
+/**
+ * pinmux_gpio_direction_output() - request a GPIO pin to go into output mode
+ * @gpio: the GPIO pin number from the GPIO subsystem number space
+ *
+ * This function should *ONLY* be used from gpiolib-based GPIO drivers,
+ * as part of their gpio_direction_output() semantics, platforms and individual
+ * drivers shall *NOT* touch pinmux GPIO calls.
+ */
+int pinmux_gpio_direction_output(unsigned gpio)
+{
+	return pinmux_gpio_direction(gpio, false);
+}
+EXPORT_SYMBOL_GPL(pinmux_gpio_direction_output);
+
 /**
  * pinmux_register_mappings() - register a set of pinmux mappings
  * @maps: the pinmux mappings table to register
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index bb7a9792f1ea..937b3e2fa36f 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -54,7 +54,13 @@ struct pinctrl_dev;
  *	Implement this only if you can mux every pin individually as GPIO. The
  *	affected GPIO range is passed along with an offset(pin number) into that
  *	specific GPIO range - function selectors and pin groups are orthogonal
- *	to this, the core will however make sure the pins do not collide
+ *	to this, the core will however make sure the pins do not collide.
+ * @gpio_disable_free: free up GPIO muxing on a certain pin, the reverse of
+ *	@gpio_request_enable
+ * @gpio_set_direction: Since controllers may need different configurations
+ *	depending on whether the GPIO is configured as input or output,
+ *	a direction selector function may be implemented as a backing
+ *	to the GPIO controllers that need pin muxing.
  */
 struct pinmux_ops {
 	int (*request) (struct pinctrl_dev *pctldev, unsigned offset);
@@ -76,11 +82,17 @@ struct pinmux_ops {
 	void (*gpio_disable_free) (struct pinctrl_dev *pctldev,
 				   struct pinctrl_gpio_range *range,
 				   unsigned offset);
+	int (*gpio_set_direction) (struct pinctrl_dev *pctldev,
+				   struct pinctrl_gpio_range *range,
+				   unsigned offset,
+				   bool input);
 };
 
 /* External interface to pinmux */
 extern int pinmux_request_gpio(unsigned gpio);
 extern void pinmux_free_gpio(unsigned gpio);
+extern int pinmux_gpio_direction_input(unsigned gpio);
+extern int pinmux_gpio_direction_output(unsigned gpio);
 extern struct pinmux * __must_check pinmux_get(struct device *dev, const char *name);
 extern void pinmux_put(struct pinmux *pmx);
 extern int pinmux_enable(struct pinmux *pmx);
@@ -97,6 +109,16 @@ static inline void pinmux_free_gpio(unsigned gpio)
 {
 }
 
+static inline int pinmux_gpio_direction_input(unsigned gpio)
+{
+	return 0;
+}
+
+static inline int pinmux_gpio_direction_output(unsigned gpio)
+{
+	return 0;
+}
+
 static inline struct pinmux * __must_check pinmux_get(struct device *dev, const char *name)
 {
 	return NULL;
-- 
cgit v1.2.3


From 97607d157c133ab18dfcd77fa836e37fa950a44a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 29 Nov 2011 12:52:39 +0100
Subject: pinctrl: make a copy of pinmux map

This makes a deep copy of the pinmux function map instead of
keeping the copy supplied from the platform around. This makes
it possible to tag the platforms map with __initdata as is also
done as part of this patch.

Rationale: a certain target platform (PXA) has numerous
pinmux maps, many of which will be lying around unused after
boot in a multi-platform binary. Instead, deep-copy the one
we're going to use and tag them all __initdata so they go away
after boot.

ChangeLog v1->v2:
- Fixup the deep copy, missed a few items on the struct,
  plus mark bool member non-const since we're making runtime
  copies if this stuff now.
ChangeLog v2->v3:
- Make a shallow copy (just copy the array of map structs)
  as Arnd noticed, string constants never get discarded by the
  kernel anyway, so these pointers may be safely copied over.

Reviewed-by: Arnd Bergmann <arnd.bergmann@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt       |  4 ++--
 arch/arm/mach-u300/core.c       |  2 +-
 drivers/pinctrl/pinmux.c        | 44 +++++++++++++++++++++++++++++++----------
 include/linux/pinctrl/machine.h |  2 +-
 4 files changed, 38 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 3846264c5973..c8fd136eac83 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -697,7 +697,7 @@ spi on the second function mapping:
 
 #include <linux/pinctrl/machine.h>
 
-static const struct pinmux_map pmx_mapping[] = {
+static const struct pinmux_map __initdata pmx_mapping[] = {
 	{
 		.ctrl_dev_name = "pinctrl.0",
 		.function = "spi0",
@@ -734,7 +734,7 @@ Since the above construct is pretty common there is a helper macro to make
 it even more compact which assumes you want to use pinctrl.0 and position
 0 for mapping, for example:
 
-static struct pinmux_map pmx_mapping[] = {
+static struct pinmux_map __initdata pmx_mapping[] = {
        PINMUX_MAP_PRIMARY("I2CMAP", "i2c0", "foo-i2c.0"),
 };
 
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index ac0791e924bc..839fa15c4eb6 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -1605,7 +1605,7 @@ static struct platform_device pinmux_device = {
 };
 
 /* Pinmux settings */
-static struct pinmux_map u300_pinmux_map[] = {
+static struct pinmux_map __initdata u300_pinmux_map[] = {
 	/* anonymous maps for chip power and EMIFs */
 	PINMUX_MAP_PRIMARY_SYS_HOG("POWER", "power"),
 	PINMUX_MAP_PRIMARY_SYS_HOG("EMIF0", "emif0"),
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index f3e4f031fe1c..f6e7d583998c 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -21,6 +21,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <linux/string.h>
 #include <linux/sysfs.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -33,7 +34,7 @@ static DEFINE_MUTEX(pinmux_list_mutex);
 static LIST_HEAD(pinmux_list);
 
 /* Global pinmux maps, we allow one set only */
-static struct pinmux_map const *pinmux_maps;
+static struct pinmux_map *pinmux_maps;
 static unsigned pinmux_maps_num;
 
 /**
@@ -333,7 +334,9 @@ EXPORT_SYMBOL_GPL(pinmux_gpio_direction_output);
 
 /**
  * pinmux_register_mappings() - register a set of pinmux mappings
- * @maps: the pinmux mappings table to register
+ * @maps: the pinmux mappings table to register, this should be marked with
+ *	__initdata so it can be discarded after boot, this function will
+ *	perform a shallow copy for the mapping entries.
  * @num_maps: the number of maps in the mapping table
  *
  * Only call this once during initialization of your machine, the function is
@@ -344,32 +347,48 @@ EXPORT_SYMBOL_GPL(pinmux_gpio_direction_output);
 int __init pinmux_register_mappings(struct pinmux_map const *maps,
 				    unsigned num_maps)
 {
+	int ret = 0;
 	int i;
 
-	if (pinmux_maps != NULL) {
+	if (pinmux_maps_num != 0) {
 		pr_err("pinmux mappings already registered, you can only "
 		       "register one set of maps\n");
 		return -EINVAL;
 	}
 
 	pr_debug("add %d pinmux maps\n", num_maps);
+
+	/*
+	 * Make a copy of the map array - string pointers will end up in the
+	 * kernel const section anyway so these do not need to be deep copied.
+	 */
+	pinmux_maps = kmemdup(maps, sizeof(struct pinmux_map) * num_maps,
+			      GFP_KERNEL);
+	if (!pinmux_maps)
+		return -ENOMEM;
+
 	for (i = 0; i < num_maps; i++) {
-		/* Sanity check the mapping */
+		/* Sanity check the mapping while copying it */
 		if (!maps[i].name) {
 			pr_err("failed to register map %d: "
 			       "no map name given\n", i);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto err_out_free;
 		}
+
 		if (!maps[i].ctrl_dev && !maps[i].ctrl_dev_name) {
 			pr_err("failed to register map %s (%d): "
 			       "no pin control device given\n",
 			       maps[i].name, i);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto err_out_free;
 		}
+
 		if (!maps[i].function) {
 			pr_err("failed to register map %s (%d): "
 			       "no function ID given\n", maps[i].name, i);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto err_out_free;
 		}
 
 		if (!maps[i].dev && !maps[i].dev_name)
@@ -380,12 +399,17 @@ int __init pinmux_register_mappings(struct pinmux_map const *maps,
 			pr_debug("register map %s, function %s\n",
 				 maps[i].name,
 				 maps[i].function);
-	}
 
-	pinmux_maps = maps;
-	pinmux_maps_num = num_maps;
+		pinmux_maps_num++;
+	}
 
 	return 0;
+
+err_out_free:
+	kfree(pinmux_maps);
+	pinmux_maps = NULL;
+	pinmux_maps_num = 0;
+	return ret;
 }
 
 /**
diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index 88863531d862..f5372319d999 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -48,7 +48,7 @@ struct pinmux_map {
 	const char *group;
 	struct device *dev;
 	const char *dev_name;
-	const bool hog_on_boot;
+	bool hog_on_boot;
 };
 
 /*
-- 
cgit v1.2.3


From ae6b4d8588f4fc95520b0e62c4b1f474c82191a9 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 19 Oct 2011 18:14:33 +0200
Subject: pinctrl: add a pin config interface

This add per-pin and per-group pin config interfaces for biasing,
driving and other such electronic properties. The details of passed
configurations are passed in an opaque unsigned long which may be
dereferences to integer types, structs or lists on either side
of the configuration interface.

ChangeLog v1->v2:
- Clear split of terminology: we now have pin controllers, and
  those may support two interfaces using vtables: pin
  multiplexing and pin configuration.
- Break out pin configuration to its own C file, controllers may
  implement only config without mux, and vice versa, so keep each
  sub-functionality of pin controllers separate. Introduce
  CONFIG_PINCONF in Kconfig.
- Implement some core logic around pin configuration in the
  pinconf.c file.
- Remove UNKNOWN config states, these were just surplus baggage.
- Remove FLOAT config state - HIGH_IMPEDANCE should be enough for
  everyone.
- PIN_CONFIG_POWER_SOURCE added to handle switching the power
  supply for the pin logic between different sources
- Explicit DISABLE config enums to turn schmitt-trigger,
  wakeup etc OFF.
- Update documentation to reflect all the recent reasoning.
ChangeLog v2->v3:
- Twist API around to pass around arrays of config tuples instead
  of (param, value) pairs everywhere.
- Explicit drive strength semantics for push/pull and similar
  drive modes, this shall be the number of drive stages vs
  nominal load impedance, which should match the actual
  electronics used in push/pull CMOS or TTY totempoles.
- Drop load capacitance configuration - I probably don't know
  what I'm doing here so leave it out.
- Drop PIN_CONFIG_INPUT_SCHMITT_OFF, instead the argument zero to
  PIN_CONFIG_INPUT_SCHMITT turns schmitt trigger off.
- Drop PIN_CONFIG_NORMAL_POWER_MODE and have a well defined
  argument to PIN_CONFIG_LOW_POWER_MODE to get out of it instead.
- Drop PIN_CONFIG_WAKEUP_ENABLE/DISABLE and just use
  PIN_CONFIG_WAKEUP with defined value zero to turn wakeup off.
- Add PIN_CONFIG_INPUT_DEBOUNCE for configuring debounce time
  on input lines.
- Fix a bug when we tried to configure pins for pin controllers
  without pinconf support.
- Initialized debugfs properly so it works.
- Initialize the mutex properly and lock around config tampering
  sections.
- Check the return value from get_initial_config() properly.
ChangeLog v3->v4:
- Export the pin_config_get(), pin_config_set() and
  pin_config_group() functions.
- Drop the entire concept of just getting initial config and
  keeping track of pin states internally, instead ask the pins
  what state they are in. Previous idea was plain wrong, if the
  device cannot keep track of its state, the driver should do
  it.
- Drop the generic configuration layout, it seems this impose
  too much restriction on some pin controllers, so let them do
  things the way they want and split off support for generic
  config as an optional add-on.
ChangeLog v4->v5:
- Introduce two symmetric driver calls for group configuration,
  .pin_config_group_[get|set] and corresponding external calls.
- Remove generic semantic meanings of return values from config
  calls, these belong in the generic config patch. Just pass the
  return value through instead.
- Add a debugfs entry "pinconf-groups" to read status from group
  configuration only, also slam in a per-group debug callback in
  the pinconf_ops so custom drivers can display something
  meaningful for their pins.
- Fix some dangling newline.
- Drop dangling #else clause.
- Update documentation to match the above.
ChangeLog v5->v6:
- Change to using a pin name as parameter for the
  [get|set]_config() functions, as suggested by Stephen Warren.
  This is more natural as names will be what a developer has
  access to in written documentation etc.
ChangeLog v6->v7:
- Refactor out by-pin and by-name get/set functions, only expose
  the by-name functions externally, expose the by-pin functions
  internally.
- Show supported pin control functionality in the debugfs
  pinctrl-devices file.

Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt       |  96 ++++++++++++-
 drivers/pinctrl/Kconfig         |   5 +-
 drivers/pinctrl/Makefile        |   1 +
 drivers/pinctrl/core.c          |  43 +++++-
 drivers/pinctrl/core.h          |   5 +
 drivers/pinctrl/pinconf.c       | 302 ++++++++++++++++++++++++++++++++++++++++
 drivers/pinctrl/pinconf.h       |  36 +++++
 include/linux/pinctrl/pinconf.h |  96 +++++++++++++
 include/linux/pinctrl/pinctrl.h |   8 +-
 9 files changed, 582 insertions(+), 10 deletions(-)
 create mode 100644 drivers/pinctrl/pinconf.c
 create mode 100644 drivers/pinctrl/pinconf.h
 create mode 100644 include/linux/pinctrl/pinconf.h

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index c8fd136eac83..6d23fa84ee47 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -7,12 +7,9 @@ This subsystem deals with:
 
 - Multiplexing of pins, pads, fingers (etc) see below for details
 
-The intention is to also deal with:
-
-- Software-controlled biasing and driving mode specific pins, such as
-  pull-up/down, open drain etc, load capacitance configuration when controlled
-  by software, etc.
-
+- Configuration of pins, pads, fingers (etc), such as software-controlled
+  biasing and driving mode specific pins, such as pull-up/down, open drain,
+  load capacitance etc.
 
 Top-level interface
 ===================
@@ -88,6 +85,11 @@ int __init foo_probe(void)
 		pr_err("could not register foo pin driver\n");
 }
 
+To enable the pinctrl subsystem and the subgroups for PINMUX and PINCONF and
+selected drivers, you need to select them from your machine's Kconfig entry,
+since these are so tightly integrated with the machines they are used on.
+See for example arch/arm/mach-u300/Kconfig for an example.
+
 Pins usually have fancier names than this. You can find these in the dataheet
 for your chip. Notice that the core pinctrl.h file provides a fancy macro
 called PINCTRL_PIN() to create the struct entries. As you can see I enumerated
@@ -193,6 +195,88 @@ structure, for example specific register ranges associated with each group
 and so on.
 
 
+Pin configuration
+=================
+
+Pins can sometimes be software-configured in an various ways, mostly related
+to their electronic properties when used as inputs or outputs. For example you
+may be able to make an output pin high impedance, or "tristate" meaning it is
+effectively disconnected. You may be able to connect an input pin to VDD or GND
+using a certain resistor value - pull up and pull down - so that the pin has a
+stable value when nothing is driving the rail it is connected to, or when it's
+unconnected.
+
+For example, a platform may do this:
+
+ret = pin_config_set(dev, "FOO_GPIO_PIN", PLATFORM_X_PULL_UP);
+
+To pull up a pin to VDD. The pin configuration driver implements callbacks for
+changing pin configuration in the pin controller ops like this:
+
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinconf.h>
+#include "platform_x_pindefs.h"
+
+int foo_pin_config_get(struct pinctrl_dev *pctldev,
+		    unsigned offset,
+		    unsigned long *config)
+{
+	struct my_conftype conf;
+
+	... Find setting for pin @ offset ...
+
+	*config = (unsigned long) conf;
+}
+
+int foo_pin_config_set(struct pinctrl_dev *pctldev,
+		    unsigned offset,
+		    unsigned long config)
+{
+	struct my_conftype *conf = (struct my_conftype *) config;
+
+	switch (conf) {
+		case PLATFORM_X_PULL_UP:
+		...
+		}
+	}
+}
+
+int foo_pin_config_group_get (struct pinctrl_dev *pctldev,
+		    unsigned selector,
+		    unsigned long *config)
+{
+	...
+}
+
+int foo_pin_config_group_set (struct pinctrl_dev *pctldev,
+		    unsigned selector,
+		    unsigned long config)
+{
+	...
+}
+
+static struct pinconf_ops foo_pconf_ops = {
+	.pin_config_get = foo_pin_config_get,
+	.pin_config_set = foo_pin_config_set,
+	.pin_config_group_get = foo_pin_config_group_get,
+	.pin_config_group_set = foo_pin_config_group_set,
+};
+
+/* Pin config operations are handled by some pin controller */
+static struct pinctrl_desc foo_desc = {
+	...
+	.confops = &foo_pconf_ops,
+};
+
+Since some controllers have special logic for handling entire groups of pins
+they can exploit the special whole-group pin control function. The
+pin_config_group_set() callback is allowed to return the error code -EAGAIN,
+for groups it does not want to handle, or if it just wants to do some
+group-level handling and then fall through to iterate over all pins, in which
+case each individual pin will be treated by separate pin_config_set() calls as
+well.
+
+
 Interaction with the GPIO subsystem
 ===================================
 
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index e963da41e948..c63c72102989 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -12,7 +12,10 @@ menu "Pin controllers"
 	depends on PINCTRL
 
 config PINMUX
-	bool "Support pinmux controllers"
+	bool "Support pin multiplexing controllers"
+
+config PINCONF
+	bool "Support pin configuration controllers"
 
 config DEBUG_PINCTRL
 	bool "Debug PINCTRL calls"
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index 5f3e4d65465a..c046f78dd7f7 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_DEBUG_PINCTRL)	+= -DDEBUG
 
 obj-$(CONFIG_PINCTRL)		+= core.o
 obj-$(CONFIG_PINMUX)		+= pinmux.o
+obj-$(CONFIG_PINCONF)		+= pinconf.o
 obj-$(CONFIG_PINMUX_SIRF)	+= pinmux-sirf.o
 obj-$(CONFIG_PINMUX_U300)	+= pinmux-u300.o
 obj-$(CONFIG_PINCTRL_COH901)	+= pinctrl-coh901.o
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 4955a68d618f..034b1ad38b32 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -28,6 +28,7 @@
 #include <linux/pinctrl/machine.h>
 #include "core.h"
 #include "pinmux.h"
+#include "pinconf.h"
 
 /* Global list of pin control devices */
 static DEFINE_MUTEX(pinctrldev_list_mutex);
@@ -100,6 +101,30 @@ struct pin_desc *pin_desc_get(struct pinctrl_dev *pctldev, unsigned int pin)
 	return pindesc;
 }
 
+/**
+ * pin_get_from_name() - look up a pin number from a name
+ * @pctldev: the pin control device to lookup the pin on
+ * @name: the name of the pin to look up
+ */
+int pin_get_from_name(struct pinctrl_dev *pctldev, const char *name)
+{
+	unsigned pin;
+
+	/* The highest pin number need to be included in the loop, thus <= */
+	for (pin = 0; pin <= pctldev->desc->maxpin; pin++) {
+		struct pin_desc *desc;
+
+		desc = pin_desc_get(pctldev, pin);
+		/* Pin space may be sparse */
+		if (desc == NULL)
+			continue;
+		if (desc->name && !strcmp(name, desc->name))
+			return pin;
+	}
+
+	return -EINVAL;
+}
+
 /**
  * pin_is_valid() - check if pin exists on controller
  * @pctldev: the pin control device to check the pin on
@@ -160,6 +185,7 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev,
 	pindesc = kzalloc(sizeof(*pindesc), GFP_KERNEL);
 	if (pindesc == NULL)
 		return -ENOMEM;
+
 	spin_lock_init(&pindesc->lock);
 
 	/* Set owner */
@@ -409,11 +435,15 @@ static int pinctrl_devices_show(struct seq_file *s, void *what)
 {
 	struct pinctrl_dev *pctldev;
 
-	seq_puts(s, "name [pinmux]\n");
+	seq_puts(s, "name [pinmux] [pinconf]\n");
 	mutex_lock(&pinctrldev_list_mutex);
 	list_for_each_entry(pctldev, &pinctrldev_list, node) {
 		seq_printf(s, "%s ", pctldev->desc->name);
 		if (pctldev->desc->pmxops)
+			seq_puts(s, "yes ");
+		else
+			seq_puts(s, "no ");
+		if (pctldev->desc->confops)
 			seq_puts(s, "yes");
 		else
 			seq_puts(s, "no");
@@ -492,6 +522,7 @@ static void pinctrl_init_device_debugfs(struct pinctrl_dev *pctldev)
 	debugfs_create_file("gpio-ranges", S_IFREG | S_IRUGO,
 			    device_root, pctldev, &pinctrl_gpioranges_ops);
 	pinmux_init_device_debugfs(device_root, pctldev);
+	pinconf_init_device_debugfs(device_root, pctldev);
 }
 
 static void pinctrl_init_debugfs(void)
@@ -548,6 +579,16 @@ struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
 		}
 	}
 
+	/* If we're implementing pinconfig, check the ops for sanity */
+	if (pctldesc->confops) {
+		ret = pinconf_check_ops(pctldesc->confops);
+		if (ret) {
+			pr_err("%s pin config ops lacks necessary functions\n",
+			       pctldesc->name);
+			return NULL;
+		}
+	}
+
 	pctldev = kzalloc(sizeof(struct pinctrl_dev), GFP_KERNEL);
 	if (pctldev == NULL)
 		return NULL;
diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h
index 74dee439dcf0..3f5b911acf18 100644
--- a/drivers/pinctrl/core.h
+++ b/drivers/pinctrl/core.h
@@ -9,6 +9,10 @@
  * License terms: GNU General Public License (GPL) version 2
  */
 
+#include <linux/pinctrl/pinconf.h>
+
+struct pinctrl_gpio_range;
+
 /**
  * struct pinctrl_dev - pin control class device
  * @node: node to include this pin controller in the global pin controller list
@@ -66,6 +70,7 @@ struct pin_desc {
 struct pinctrl_dev *get_pinctrl_dev_from_dev(struct device *dev,
 					     const char *dev_name);
 struct pin_desc *pin_desc_get(struct pinctrl_dev *pctldev, unsigned int pin);
+int pin_get_from_name(struct pinctrl_dev *pctldev, const char *name);
 int pinctrl_get_device_gpio_range(unsigned gpio,
 				  struct pinctrl_dev **outdev,
 				  struct pinctrl_gpio_range **outrange);
diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c
new file mode 100644
index 000000000000..9195eefe258a
--- /dev/null
+++ b/drivers/pinctrl/pinconf.c
@@ -0,0 +1,302 @@
+/*
+ * Core driver for the pin config portions of the pin control subsystem
+ *
+ * Copyright (C) 2011 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#define pr_fmt(fmt) "pinconfig core: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/pinctrl/machine.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinconf.h>
+#include "core.h"
+#include "pinconf.h"
+
+int pin_config_get_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
+			   unsigned long *config)
+{
+	const struct pinconf_ops *ops = pctldev->desc->confops;
+
+	if (!ops || !ops->pin_config_get) {
+		dev_err(&pctldev->dev, "cannot get pin configuration, missing "
+			"pin_config_get() function in driver\n");
+		return -EINVAL;
+	}
+
+	return ops->pin_config_get(pctldev, pin, config);
+}
+
+/**
+ * pin_config_get() - get the configuration of a single pin parameter
+ * @pctldev: pin controller device for this pin
+ * @name: name of the pin to get the config for
+ * @config: the config pointed to by this argument will be filled in with the
+ *	current pin state, it can be used directly by drivers as a numeral, or
+ *	it can be dereferenced to any struct.
+ */
+int pin_config_get(struct pinctrl_dev *pctldev, const char *name,
+			  unsigned long *config)
+{
+	int pin;
+
+	pin = pin_get_from_name(pctldev, name);
+	if (pin < 0)
+		return pin;
+
+	return pin_config_get_for_pin(pctldev, pin, config);
+}
+EXPORT_SYMBOL(pin_config_get);
+
+int pin_config_set_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
+			   unsigned long config)
+{
+	const struct pinconf_ops *ops = pctldev->desc->confops;
+	int ret;
+
+	if (!ops || !ops->pin_config_set) {
+		dev_err(&pctldev->dev, "cannot configure pin, missing "
+			"config function in driver\n");
+		return -EINVAL;
+	}
+
+	ret = ops->pin_config_set(pctldev, pin, config);
+	if (ret) {
+		dev_err(&pctldev->dev,
+			"unable to set pin configuration on pin %d\n", pin);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * pin_config_set() - set the configuration of a single pin parameter
+ * @pctldev: pin controller device for this pin
+ * @name: name of the pin to set the config for
+ * @config: the config in this argument will contain the desired pin state, it
+ *	can be used directly by drivers as a numeral, or it can be dereferenced
+ *	to any struct.
+ */
+int pin_config_set(struct pinctrl_dev *pctldev, const char *name,
+		   unsigned long config)
+{
+	int pin;
+
+	pin = pin_get_from_name(pctldev, name);
+	if (pin < 0)
+		return pin;
+
+	return pin_config_set_for_pin(pctldev, pin, config);
+}
+EXPORT_SYMBOL(pin_config_set);
+
+int pin_config_group_get(struct pinctrl_dev *pctldev, const char *pin_group,
+			 unsigned long *config)
+{
+	const struct pinconf_ops *ops = pctldev->desc->confops;
+	int selector;
+
+	if (!ops || !ops->pin_config_group_get) {
+		dev_err(&pctldev->dev, "cannot get configuration for pin "
+			"group, missing group config get function in "
+			"driver\n");
+		return -EINVAL;
+	}
+
+	selector = pinctrl_get_group_selector(pctldev, pin_group);
+	if (selector < 0)
+		return selector;
+
+	return ops->pin_config_group_get(pctldev, selector, config);
+}
+EXPORT_SYMBOL(pin_config_group_get);
+
+
+int pin_config_group_set(struct pinctrl_dev *pctldev, const char *pin_group,
+			 unsigned long config)
+{
+	const struct pinctrl_ops *pctlops = pctldev->desc->pctlops;
+	const struct pinconf_ops *ops = pctldev->desc->confops;
+	int selector;
+	const unsigned *pins;
+	unsigned num_pins;
+	int ret;
+	int i;
+
+	if (!ops || (!ops->pin_config_group_set && !ops->pin_config_set)) {
+		dev_err(&pctldev->dev, "cannot configure pin group, missing "
+			"config function in driver\n");
+		return -EINVAL;
+	}
+
+	selector = pinctrl_get_group_selector(pctldev, pin_group);
+	if (selector < 0)
+		return selector;
+
+	ret = pctlops->get_group_pins(pctldev, selector, &pins, &num_pins);
+	if (ret) {
+		dev_err(&pctldev->dev, "cannot configure pin group, error "
+			"getting pins\n");
+		return ret;
+	}
+
+	/*
+	 * If the pin controller supports handling entire groups we use that
+	 * capability.
+	 */
+	if (ops->pin_config_group_set) {
+		ret = ops->pin_config_group_set(pctldev, selector, config);
+		/*
+		 * If the pin controller prefer that a certain group be handled
+		 * pin-by-pin as well, it returns -EAGAIN.
+		 */
+		if (ret != -EAGAIN)
+			return ret;
+	}
+
+	/*
+	 * If the controller cannot handle entire groups, we configure each pin
+	 * individually.
+	 */
+	if (!ops->pin_config_set)
+		return 0;
+
+	for (i = 0; i < num_pins; i++) {
+		ret = ops->pin_config_set(pctldev, pins[i], config);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(pin_config_group_set);
+
+int pinconf_check_ops(const struct pinconf_ops *ops)
+{
+	/* We must be able to read out pin status */
+	if (!ops->pin_config_get && !ops->pin_config_group_get)
+		return -EINVAL;
+	/* We have to be able to config the pins in SOME way */
+	if (!ops->pin_config_set && !ops->pin_config_group_set)
+		return -EINVAL;
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+static void pinconf_dump_pin(struct pinctrl_dev *pctldev,
+			     struct seq_file *s, int pin)
+{
+	const struct pinconf_ops *ops = pctldev->desc->confops;
+
+	if (ops && ops->pin_config_dbg_show)
+		ops->pin_config_dbg_show(pctldev, s, pin);
+}
+
+static int pinconf_pins_show(struct seq_file *s, void *what)
+{
+	struct pinctrl_dev *pctldev = s->private;
+	unsigned pin;
+
+	seq_puts(s, "Pin config settings per pin\n");
+	seq_puts(s, "Format: pin (name): pinmux setting array\n");
+
+	/* The highest pin number need to be included in the loop, thus <= */
+	for (pin = 0; pin <= pctldev->desc->maxpin; pin++) {
+		struct pin_desc *desc;
+
+		desc = pin_desc_get(pctldev, pin);
+		/* Pin space may be sparse */
+		if (desc == NULL)
+			continue;
+
+		seq_printf(s, "pin %d (%s):", pin,
+			   desc->name ? desc->name : "unnamed");
+
+		pinconf_dump_pin(pctldev, s, pin);
+
+		seq_printf(s, "\n");
+	}
+
+	return 0;
+}
+
+static void pinconf_dump_group(struct pinctrl_dev *pctldev,
+			       struct seq_file *s, unsigned selector,
+			       const char *gname)
+{
+	const struct pinconf_ops *ops = pctldev->desc->confops;
+
+	if (ops && ops->pin_config_group_dbg_show)
+		ops->pin_config_group_dbg_show(pctldev, s, selector);
+}
+
+static int pinconf_groups_show(struct seq_file *s, void *what)
+{
+	struct pinctrl_dev *pctldev = s->private;
+	const struct pinctrl_ops *pctlops = pctldev->desc->pctlops;
+	const struct pinconf_ops *ops = pctldev->desc->confops;
+	unsigned selector = 0;
+
+	if (!ops || !ops->pin_config_group_get)
+		return 0;
+
+	seq_puts(s, "Pin config settings per pin group\n");
+	seq_puts(s, "Format: group (name): pinmux setting array\n");
+
+	while (pctlops->list_groups(pctldev, selector) >= 0) {
+		const char *gname = pctlops->get_group_name(pctldev, selector);
+
+		seq_printf(s, "%u (%s):", selector, gname);
+		pinconf_dump_group(pctldev, s, selector, gname);
+		selector++;
+	}
+
+	return 0;
+}
+
+static int pinconf_pins_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pinconf_pins_show, inode->i_private);
+}
+
+static int pinconf_groups_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pinconf_groups_show, inode->i_private);
+}
+
+static const struct file_operations pinconf_pins_ops = {
+	.open		= pinconf_pins_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations pinconf_groups_ops = {
+	.open		= pinconf_groups_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void pinconf_init_device_debugfs(struct dentry *devroot,
+			 struct pinctrl_dev *pctldev)
+{
+	debugfs_create_file("pinconf-pins", S_IFREG | S_IRUGO,
+			    devroot, pctldev, &pinconf_pins_ops);
+	debugfs_create_file("pinconf-groups", S_IFREG | S_IRUGO,
+			    devroot, pctldev, &pinconf_groups_ops);
+}
+
+#endif
diff --git a/drivers/pinctrl/pinconf.h b/drivers/pinctrl/pinconf.h
new file mode 100644
index 000000000000..e7dc6165032a
--- /dev/null
+++ b/drivers/pinctrl/pinconf.h
@@ -0,0 +1,36 @@
+/*
+ * Internal interface between the core pin control system and the
+ * pin config portions
+ *
+ * Copyright (C) 2011 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ * Based on bits of regulator core, gpio core and clk core
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifdef CONFIG_PINCONF
+
+int pinconf_check_ops(const struct pinconf_ops *ops);
+void pinconf_init_device_debugfs(struct dentry *devroot,
+				 struct pinctrl_dev *pctldev);
+int pin_config_get_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
+			   unsigned long *config);
+int pin_config_set_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
+			   unsigned long config);
+
+#else
+
+static inline int pinconf_check_ops(const struct pinconf_ops *ops)
+{
+	return 0;
+}
+
+static inline void pinconf_init_device_debugfs(struct dentry *devroot,
+					       struct pinctrl_dev *pctldev)
+{
+}
+
+#endif
diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h
new file mode 100644
index 000000000000..d5b72e6e261d
--- /dev/null
+++ b/include/linux/pinctrl/pinconf.h
@@ -0,0 +1,96 @@
+/*
+ * Interface the pinconfig portions of the pinctrl subsystem
+ *
+ * Copyright (C) 2011 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ * This interface is used in the core to keep track of pins.
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#ifndef __LINUX_PINCTRL_PINCONF_H
+#define __LINUX_PINCTRL_PINCONF_H
+
+#ifdef CONFIG_PINCONF
+
+struct pinctrl_dev;
+
+/**
+ * struct pinconf_ops - pin config operations, to be implemented by
+ * pin configuration capable drivers.
+ * @pin_config_get: get the config of a certain pin, if the requested config
+ *	is not available on this controller this should return -ENOTSUPP
+ *	and if it is available but disabled it should return -EINVAL
+ * @pin_config_get: get the config of a certain pin
+ * @pin_config_set: configure an individual pin
+ * @pin_config_group_get: get configurations for an entire pin group
+ * @pin_config_group_set: configure all pins in a group
+ * @pin_config_dbg_show: optional debugfs display hook that will provide
+ *	per-device info for a certain pin in debugfs
+ * @pin_config_group_dbg_show: optional debugfs display hook that will provide
+ *	per-device info for a certain group in debugfs
+ */
+struct pinconf_ops {
+	int (*pin_config_get) (struct pinctrl_dev *pctldev,
+			       unsigned pin,
+			       unsigned long *config);
+	int (*pin_config_set) (struct pinctrl_dev *pctldev,
+			       unsigned pin,
+			       unsigned long config);
+	int (*pin_config_group_get) (struct pinctrl_dev *pctldev,
+				     unsigned selector,
+				     unsigned long *config);
+	int (*pin_config_group_set) (struct pinctrl_dev *pctldev,
+				     unsigned selector,
+				     unsigned long config);
+	void (*pin_config_dbg_show) (struct pinctrl_dev *pctldev,
+				     struct seq_file *s,
+				     unsigned offset);
+	void (*pin_config_group_dbg_show) (struct pinctrl_dev *pctldev,
+					   struct seq_file *s,
+					   unsigned selector);
+};
+
+extern int pin_config_get(struct pinctrl_dev *pctldev, const char *name,
+			  unsigned long *config);
+extern int pin_config_set(struct pinctrl_dev *pctldev, const char *name,
+			  unsigned long config);
+extern int pin_config_group_get(struct pinctrl_dev *pctldev,
+				const char *pin_group,
+				unsigned long *config);
+extern int pin_config_group_set(struct pinctrl_dev *pctldev,
+				const char *pin_group,
+				unsigned long config);
+
+#else
+
+static inline int pin_config_get(struct pinctrl_dev *pctldev, const char *name,
+				 unsigned long *config)
+{
+	return 0;
+}
+
+static inline int pin_config_set(struct pinctrl_dev *pctldev, const char *name,
+				 unsigned long config)
+{
+	return 0;
+}
+
+static inline int pin_config_group_get(struct pinctrl_dev *pctldev,
+				       const char *pin_group,
+				       unsigned long *config)
+{
+	return 0;
+}
+
+static inline int pin_config_group_set(struct pinctrl_dev *pctldev,
+				       const char *pin_group,
+				       unsigned long config)
+{
+	return 0;
+}
+
+#endif
+
+#endif /* __LINUX_PINCTRL_PINCONF_H */
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index f17fac4b51f1..9809a94f151b 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -21,6 +21,7 @@
 
 struct pinctrl_dev;
 struct pinmux_ops;
+struct pinconf_ops;
 struct gpio_chip;
 
 /**
@@ -97,7 +98,9 @@ struct pinctrl_ops {
  *	but may be equal to npins if you have no holes in the pin range.
  * @pctlops: pin control operation vtable, to support global concepts like
  *	grouping of pins, this is optional.
- * @pmxops: pinmux operation vtable, if you support pinmuxing in your driver
+ * @pmxops: pinmux operations vtable, if you support pinmuxing in your driver
+ * @confops: pin config operations vtable, if you support pin configuration in
+ *	your driver
  * @owner: module providing the pin controller, used for refcounting
  */
 struct pinctrl_desc {
@@ -107,6 +110,7 @@ struct pinctrl_desc {
 	unsigned int maxpin;
 	struct pinctrl_ops *pctlops;
 	struct pinmux_ops *pmxops;
+	struct pinconf_ops *confops;
 	struct module *owner;
 };
 
@@ -125,7 +129,7 @@ extern void *pinctrl_dev_get_drvdata(struct pinctrl_dev *pctldev);
 
 struct pinctrl_dev;
 
-/* Sufficiently stupid default function when pinctrl is not in use */
+/* Sufficiently stupid default functions when pinctrl is not in use */
 static inline bool pin_is_valid(struct pinctrl_dev *pctldev, int pin)
 {
 	return pin >= 0;
-- 
cgit v1.2.3


From 1ddb6ff03c0cdec58c6cfdbada95acddcce4a7b7 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Fri, 9 Dec 2011 16:59:03 -0700
Subject: pinctrl: implement PINMUX_MAP_SYS_HOG

This is the same as PINMUX_MAP_PRIMARY_SYS_HOG, except that it allows
you to specify a particular control device.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/machine.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index f5372319d999..0ca32eb63b67 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -65,6 +65,14 @@ struct pinmux_map {
 #define PINMUX_MAP_SYS(a, b, c) \
 	{ .name = a, .ctrl_dev_name = b, .function = c }
 
+/*
+ * Convenience macro to map a system function onto a certain pinctrl device,
+ * to be hogged by the pinmux core until the system shuts down.
+ */
+#define PINMUX_MAP_SYS_HOG(a, b, c) \
+	{ .name = a, .ctrl_dev_name = b, .function = c, \
+	  .hog_on_boot = true }
+
 /*
  * Convenience macro to map a function onto the primary device pinctrl device
  * this is especially helpful on systems that have only one pin controller
-- 
cgit v1.2.3


From 51cd24ee625c348654114032499914d0311e5832 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Fri, 9 Dec 2011 16:59:05 -0700
Subject: pinctrl: don't create a device for each pin controller

Pin controllers should already be instantiated as a device, so there's
no need for the pinctrl core to create a new struct device for each
controller.

This allows the controller's real name to be used in the mux mapping
table, rather than e.g. "pinctrl.0", "pinctrl.1", etc.

This necessitates removal of the PINMUX_MAP_PRIMARY*() macros, since
their sole purpose was to hard-code the .ctrl_dev_name field to be
"pinctrl.0".

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt       | 26 +++++++++++------------
 drivers/pinctrl/core.c          | 42 ++++++++++---------------------------
 drivers/pinctrl/core.h          |  2 +-
 drivers/pinctrl/pinconf.c       | 12 +++++------
 drivers/pinctrl/pinmux.c        | 46 ++++++++++++++++++++---------------------
 include/linux/pinctrl/machine.h | 25 ----------------------
 6 files changed, 54 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 6d23fa84ee47..f08064368291 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -783,17 +783,17 @@ spi on the second function mapping:
 
 static const struct pinmux_map __initdata pmx_mapping[] = {
 	{
-		.ctrl_dev_name = "pinctrl.0",
+		.ctrl_dev_name = "pinctrl-foo",
 		.function = "spi0",
 		.dev_name = "foo-spi.0",
 	},
 	{
-		.ctrl_dev_name = "pinctrl.0",
+		.ctrl_dev_name = "pinctrl-foo",
 		.function = "i2c0",
 		.dev_name = "foo-i2c.0",
 	},
 	{
-		.ctrl_dev_name = "pinctrl.0",
+		.ctrl_dev_name = "pinctrl-foo",
 		.function = "mmc0",
 		.dev_name = "foo-mmc.0",
 	},
@@ -815,7 +815,7 @@ You register this pinmux mapping to the pinmux subsystem by simply:
        ret = pinmux_register_mappings(pmx_mapping, ARRAY_SIZE(pmx_mapping));
 
 Since the above construct is pretty common there is a helper macro to make
-it even more compact which assumes you want to use pinctrl.0 and position
+it even more compact which assumes you want to use pinctrl-foo and position
 0 for mapping, for example:
 
 static struct pinmux_map __initdata pmx_mapping[] = {
@@ -832,14 +832,14 @@ As it is possible to map a function to different groups of pins an optional
 ...
 {
 	.name = "spi0-pos-A",
-	.ctrl_dev_name = "pinctrl.0",
+	.ctrl_dev_name = "pinctrl-foo",
 	.function = "spi0",
 	.group = "spi0_0_grp",
 	.dev_name = "foo-spi.0",
 },
 {
 	.name = "spi0-pos-B",
-	.ctrl_dev_name = "pinctrl.0",
+	.ctrl_dev_name = "pinctrl-foo",
 	.function = "spi0",
 	.group = "spi0_1_grp",
 	.dev_name = "foo-spi.0",
@@ -858,42 +858,42 @@ case), we define a mapping like this:
 ...
 {
 	.name "2bit"
-	.ctrl_dev_name = "pinctrl.0",
+	.ctrl_dev_name = "pinctrl-foo",
 	.function = "mmc0",
 	.group = "mmc0_1_grp",
 	.dev_name = "foo-mmc.0",
 },
 {
 	.name "4bit"
-	.ctrl_dev_name = "pinctrl.0",
+	.ctrl_dev_name = "pinctrl-foo",
 	.function = "mmc0",
 	.group = "mmc0_1_grp",
 	.dev_name = "foo-mmc.0",
 },
 {
 	.name "4bit"
-	.ctrl_dev_name = "pinctrl.0",
+	.ctrl_dev_name = "pinctrl-foo",
 	.function = "mmc0",
 	.group = "mmc0_2_grp",
 	.dev_name = "foo-mmc.0",
 },
 {
 	.name "8bit"
-	.ctrl_dev_name = "pinctrl.0",
+	.ctrl_dev_name = "pinctrl-foo",
 	.function = "mmc0",
 	.group = "mmc0_1_grp",
 	.dev_name = "foo-mmc.0",
 },
 {
 	.name "8bit"
-	.ctrl_dev_name = "pinctrl.0",
+	.ctrl_dev_name = "pinctrl-foo",
 	.function = "mmc0",
 	.group = "mmc0_2_grp",
 	.dev_name = "foo-mmc.0",
 },
 {
 	.name "8bit"
-	.ctrl_dev_name = "pinctrl.0",
+	.ctrl_dev_name = "pinctrl-foo",
 	.function = "mmc0",
 	.group = "mmc0_3_grp",
 	.dev_name = "foo-mmc.0",
@@ -996,7 +996,7 @@ like this:
 
 {
 	.name "POWERMAP"
-	.ctrl_dev_name = "pinctrl.0",
+	.ctrl_dev_name = "pinctrl-foo",
 	.function = "power_func",
 	.hog_on_boot = true,
 },
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 034b1ad38b32..160fb5aae591 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -34,12 +34,6 @@
 static DEFINE_MUTEX(pinctrldev_list_mutex);
 static LIST_HEAD(pinctrldev_list);
 
-static void pinctrl_dev_release(struct device *dev)
-{
-	struct pinctrl_dev *pctldev = dev_get_drvdata(dev);
-	kfree(pctldev);
-}
-
 const char *pinctrl_dev_get_name(struct pinctrl_dev *pctldev)
 {
 	/* We're not allowed to register devices without name */
@@ -71,14 +65,14 @@ struct pinctrl_dev *get_pinctrl_dev_from_dev(struct device *dev,
 
 	mutex_lock(&pinctrldev_list_mutex);
 	list_for_each_entry(pctldev, &pinctrldev_list, node) {
-		if (dev &&  &pctldev->dev == dev) {
+		if (dev && pctldev->dev == dev) {
 			/* Matched on device pointer */
 			found = true;
 			break;
 		}
 
 		if (devname &&
-		    !strcmp(dev_name(&pctldev->dev), devname)) {
+		    !strcmp(dev_name(pctldev->dev), devname)) {
 			/* Matched on device name */
 			found = true;
 			break;
@@ -325,7 +319,7 @@ int pinctrl_get_group_selector(struct pinctrl_dev *pctldev,
 		const char *gname = pctlops->get_group_name(pctldev,
 							    group_selector);
 		if (!strcmp(gname, pin_group)) {
-			dev_dbg(&pctldev->dev,
+			dev_dbg(pctldev->dev,
 				"found group selector %u for %s\n",
 				group_selector,
 				pin_group);
@@ -335,7 +329,7 @@ int pinctrl_get_group_selector(struct pinctrl_dev *pctldev,
 		group_selector++;
 	}
 
-	dev_err(&pctldev->dev, "does not have pin group %s\n",
+	dev_err(pctldev->dev, "does not have pin group %s\n",
 		pin_group);
 
 	return -EINVAL;
@@ -508,11 +502,11 @@ static void pinctrl_init_device_debugfs(struct pinctrl_dev *pctldev)
 {
 	static struct dentry *device_root;
 
-	device_root = debugfs_create_dir(dev_name(&pctldev->dev),
+	device_root = debugfs_create_dir(dev_name(pctldev->dev),
 					 debugfs_root);
 	if (IS_ERR(device_root) || !device_root) {
 		pr_warn("failed to create debugfs directory for %s\n",
-			dev_name(&pctldev->dev));
+			dev_name(pctldev->dev));
 		return;
 	}
 	debugfs_create_file("pins", S_IFREG | S_IRUGO,
@@ -560,7 +554,6 @@ static void pinctrl_init_debugfs(void)
 struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
 				    struct device *dev, void *driver_data)
 {
-	static atomic_t pinmux_no = ATOMIC_INIT(0);
 	struct pinctrl_dev *pctldev;
 	int ret;
 
@@ -601,18 +594,7 @@ struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
 	spin_lock_init(&pctldev->pin_desc_tree_lock);
 	INIT_LIST_HEAD(&pctldev->gpio_ranges);
 	mutex_init(&pctldev->gpio_ranges_lock);
-
-	/* Register device */
-	pctldev->dev.parent = dev;
-	dev_set_name(&pctldev->dev, "pinctrl.%d",
-		     atomic_inc_return(&pinmux_no) - 1);
-	pctldev->dev.release = pinctrl_dev_release;
-	ret = device_register(&pctldev->dev);
-	if (ret != 0) {
-		pr_err("error in device registration\n");
-		goto out_reg_dev_err;
-	}
-	dev_set_drvdata(&pctldev->dev, pctldev);
+	pctldev->dev = dev;
 
 	/* Register all the pins */
 	pr_debug("try to register %d pins on %s...\n",
@@ -622,7 +604,7 @@ struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
 		pr_err("error during pin registration\n");
 		pinctrl_free_pindescs(pctldev, pctldesc->pins,
 				      pctldesc->npins);
-		goto out_reg_pins_err;
+		goto out_err;
 	}
 
 	pinctrl_init_device_debugfs(pctldev);
@@ -632,10 +614,8 @@ struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
 	pinmux_hog_maps(pctldev);
 	return pctldev;
 
-out_reg_pins_err:
-	device_del(&pctldev->dev);
-out_reg_dev_err:
-	put_device(&pctldev->dev);
+out_err:
+	kfree(pctldev);
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(pinctrl_register);
@@ -659,7 +639,7 @@ void pinctrl_unregister(struct pinctrl_dev *pctldev)
 	/* Destroy descriptor tree */
 	pinctrl_free_pindescs(pctldev, pctldev->desc->pins,
 			      pctldev->desc->npins);
-	device_unregister(&pctldev->dev);
+	kfree(pctldev);
 }
 EXPORT_SYMBOL_GPL(pinctrl_unregister);
 
diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h
index 3f5b911acf18..5375582566a5 100644
--- a/drivers/pinctrl/core.h
+++ b/drivers/pinctrl/core.h
@@ -38,7 +38,7 @@ struct pinctrl_dev {
 	spinlock_t pin_desc_tree_lock;
 	struct list_head gpio_ranges;
 	struct mutex gpio_ranges_lock;
-	struct device dev;
+	struct device *dev;
 	struct module *owner;
 	void *driver_data;
 #ifdef CONFIG_PINMUX
diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c
index 9195eefe258a..124762b57024 100644
--- a/drivers/pinctrl/pinconf.c
+++ b/drivers/pinctrl/pinconf.c
@@ -29,7 +29,7 @@ int pin_config_get_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
 	const struct pinconf_ops *ops = pctldev->desc->confops;
 
 	if (!ops || !ops->pin_config_get) {
-		dev_err(&pctldev->dev, "cannot get pin configuration, missing "
+		dev_err(pctldev->dev, "cannot get pin configuration, missing "
 			"pin_config_get() function in driver\n");
 		return -EINVAL;
 	}
@@ -65,14 +65,14 @@ int pin_config_set_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
 	int ret;
 
 	if (!ops || !ops->pin_config_set) {
-		dev_err(&pctldev->dev, "cannot configure pin, missing "
+		dev_err(pctldev->dev, "cannot configure pin, missing "
 			"config function in driver\n");
 		return -EINVAL;
 	}
 
 	ret = ops->pin_config_set(pctldev, pin, config);
 	if (ret) {
-		dev_err(&pctldev->dev,
+		dev_err(pctldev->dev,
 			"unable to set pin configuration on pin %d\n", pin);
 		return ret;
 	}
@@ -108,7 +108,7 @@ int pin_config_group_get(struct pinctrl_dev *pctldev, const char *pin_group,
 	int selector;
 
 	if (!ops || !ops->pin_config_group_get) {
-		dev_err(&pctldev->dev, "cannot get configuration for pin "
+		dev_err(pctldev->dev, "cannot get configuration for pin "
 			"group, missing group config get function in "
 			"driver\n");
 		return -EINVAL;
@@ -135,7 +135,7 @@ int pin_config_group_set(struct pinctrl_dev *pctldev, const char *pin_group,
 	int i;
 
 	if (!ops || (!ops->pin_config_group_set && !ops->pin_config_set)) {
-		dev_err(&pctldev->dev, "cannot configure pin group, missing "
+		dev_err(pctldev->dev, "cannot configure pin group, missing "
 			"config function in driver\n");
 		return -EINVAL;
 	}
@@ -146,7 +146,7 @@ int pin_config_group_set(struct pinctrl_dev *pctldev, const char *pin_group,
 
 	ret = pctlops->get_group_pins(pctldev, selector, &pins, &num_pins);
 	if (ret) {
-		dev_err(&pctldev->dev, "cannot configure pin group, error "
+		dev_err(pctldev->dev, "cannot configure pin group, error "
 			"getting pins\n");
 		return ret;
 	}
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index a11681b4bd91..3bcc64137ddc 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -106,24 +106,24 @@ static int pin_request(struct pinctrl_dev *pctldev,
 	const struct pinmux_ops *ops = pctldev->desc->pmxops;
 	int status = -EINVAL;
 
-	dev_dbg(&pctldev->dev, "request pin %d for %s\n", pin, function);
+	dev_dbg(pctldev->dev, "request pin %d for %s\n", pin, function);
 
 	desc = pin_desc_get(pctldev, pin);
 	if (desc == NULL) {
-		dev_err(&pctldev->dev,
+		dev_err(pctldev->dev,
 			"pin is not registered so it cannot be requested\n");
 		goto out;
 	}
 
 	if (!function) {
-		dev_err(&pctldev->dev, "no function name given\n");
+		dev_err(pctldev->dev, "no function name given\n");
 		return -EINVAL;
 	}
 
 	spin_lock(&desc->lock);
 	if (desc->mux_function) {
 		spin_unlock(&desc->lock);
-		dev_err(&pctldev->dev,
+		dev_err(pctldev->dev,
 			"pin already requested\n");
 		goto out;
 	}
@@ -132,7 +132,7 @@ static int pin_request(struct pinctrl_dev *pctldev,
 
 	/* Let each pin increase references to this module */
 	if (!try_module_get(pctldev->owner)) {
-		dev_err(&pctldev->dev,
+		dev_err(pctldev->dev,
 			"could not increase module refcount for pin %d\n",
 			pin);
 		status = -EINVAL;
@@ -152,7 +152,7 @@ static int pin_request(struct pinctrl_dev *pctldev,
 		status = 0;
 
 	if (status)
-		dev_err(&pctldev->dev, "->request on device %s failed "
+		dev_err(pctldev->dev, "->request on device %s failed "
 		       "for pin %d\n",
 		       pctldev->desc->name, pin);
 out_free_pin:
@@ -163,7 +163,7 @@ out_free_pin:
 	}
 out:
 	if (status)
-		dev_err(&pctldev->dev, "pin-%d (%s) status %d\n",
+		dev_err(pctldev->dev, "pin-%d (%s) status %d\n",
 		       pin, function ? : "?", status);
 
 	return status;
@@ -189,7 +189,7 @@ static const char *pin_free(struct pinctrl_dev *pctldev, int pin,
 
 	desc = pin_desc_get(pctldev, pin);
 	if (desc == NULL) {
-		dev_err(&pctldev->dev,
+		dev_err(pctldev->dev,
 			"pin is not registered so it cannot be freed\n");
 		return NULL;
 	}
@@ -434,14 +434,14 @@ static int acquire_pins(struct pinctrl_dev *pctldev,
 	if (ret)
 		return ret;
 
-	dev_dbg(&pctldev->dev, "requesting the %u pins from group %u\n",
+	dev_dbg(pctldev->dev, "requesting the %u pins from group %u\n",
 		num_pins, group_selector);
 
 	/* Try to allocate all pins in this group, one by one */
 	for (i = 0; i < num_pins; i++) {
 		ret = pin_request(pctldev, pins[i], func, NULL);
 		if (ret) {
-			dev_err(&pctldev->dev,
+			dev_err(pctldev->dev,
 				"could not get pin %d for function %s "
 				"on device %s - conflicting mux mappings?\n",
 				pins[i], func ? : "(undefined)",
@@ -473,7 +473,7 @@ static void release_pins(struct pinctrl_dev *pctldev,
 	ret = pctlops->get_group_pins(pctldev, group_selector,
 				      &pins, &num_pins);
 	if (ret) {
-		dev_err(&pctldev->dev, "could not get pins to release for "
+		dev_err(pctldev->dev, "could not get pins to release for "
 			"group selector %d\n",
 			group_selector);
 		return;
@@ -525,7 +525,7 @@ static int pinmux_check_pin_group(struct pinctrl_dev *pctldev,
 			return -EINVAL;
 		ret = pinctrl_get_group_selector(pctldev, groups[0]);
 		if (ret < 0) {
-			dev_err(&pctldev->dev,
+			dev_err(pctldev->dev,
 				"function %s wants group %s but the pin "
 				"controller does not seem to have that group\n",
 				pmxops->get_function_name(pctldev, func_selector),
@@ -534,7 +534,7 @@ static int pinmux_check_pin_group(struct pinctrl_dev *pctldev,
 		}
 
 		if (num_groups > 1)
-			dev_dbg(&pctldev->dev,
+			dev_dbg(pctldev->dev,
 				"function %s support more than one group, "
 				"default-selecting first group %s (%d)\n",
 				pmxops->get_function_name(pctldev, func_selector),
@@ -544,13 +544,13 @@ static int pinmux_check_pin_group(struct pinctrl_dev *pctldev,
 		return ret;
 	}
 
-	dev_dbg(&pctldev->dev,
+	dev_dbg(pctldev->dev,
 		"check if we have pin group %s on controller %s\n",
 		pin_group, pinctrl_dev_get_name(pctldev));
 
 	ret = pinctrl_get_group_selector(pctldev, pin_group);
 	if (ret < 0) {
-		dev_dbg(&pctldev->dev,
+		dev_dbg(pctldev->dev,
 			"%s does not support pin group %s with function %s\n",
 			pinctrl_dev_get_name(pctldev),
 			pin_group,
@@ -627,7 +627,7 @@ static int pinmux_enable_muxmap(struct pinctrl_dev *pctldev,
 	 */
 
 	if (pmx->pctldev && pmx->pctldev != pctldev) {
-		dev_err(&pctldev->dev,
+		dev_err(pctldev->dev,
 			"different pin control devices given for device %s, "
 			"function %s\n",
 			devname,
@@ -650,7 +650,7 @@ static int pinmux_enable_muxmap(struct pinctrl_dev *pctldev,
 	 */
 	if (pmx->func_selector != UINT_MAX &&
 	    pmx->func_selector != func_selector) {
-		dev_err(&pctldev->dev,
+		dev_err(pctldev->dev,
 			"dual function defines in the map for device %s\n",
 		       devname);
 		return -EINVAL;
@@ -756,7 +756,7 @@ struct pinmux *pinmux_get(struct device *dev, const char *name)
 		}
 
 		pr_debug("in map, found pctldev %s to handle function %s",
-			 dev_name(&pctldev->dev), map->function);
+			 dev_name(pctldev->dev), map->function);
 
 
 		/*
@@ -932,7 +932,7 @@ static int pinmux_hog_map(struct pinctrl_dev *pctldev,
 		 * without any problems, so then we can hog pinmuxes for
 		 * all devices that just want a static pin mux at this point.
 		 */
-		dev_err(&pctldev->dev, "map %s wants to hog a non-system "
+		dev_err(pctldev->dev, "map %s wants to hog a non-system "
 			"pinmux, this is not going to work\n", map->name);
 		return -EINVAL;
 	}
@@ -944,7 +944,7 @@ static int pinmux_hog_map(struct pinctrl_dev *pctldev,
 	pmx = pinmux_get(NULL, map->name);
 	if (IS_ERR(pmx)) {
 		kfree(hog);
-		dev_err(&pctldev->dev,
+		dev_err(pctldev->dev,
 			"could not get the %s pinmux mapping for hogging\n",
 			map->name);
 		return PTR_ERR(pmx);
@@ -954,7 +954,7 @@ static int pinmux_hog_map(struct pinctrl_dev *pctldev,
 	if (ret) {
 		pinmux_put(pmx);
 		kfree(hog);
-		dev_err(&pctldev->dev,
+		dev_err(pctldev->dev,
 			"could not enable the %s pinmux mapping for hogging\n",
 			map->name);
 		return ret;
@@ -963,7 +963,7 @@ static int pinmux_hog_map(struct pinctrl_dev *pctldev,
 	hog->map = map;
 	hog->pmx = pmx;
 
-	dev_info(&pctldev->dev, "hogged map %s, function %s\n", map->name,
+	dev_info(pctldev->dev, "hogged map %s, function %s\n", map->name,
 		 map->function);
 	mutex_lock(&pctldev->pinmux_hogs_lock);
 	list_add(&hog->node, &pctldev->pinmux_hogs);
@@ -982,7 +982,7 @@ static int pinmux_hog_map(struct pinctrl_dev *pctldev,
  */
 int pinmux_hog_maps(struct pinctrl_dev *pctldev)
 {
-	struct device *dev = &pctldev->dev;
+	struct device *dev = pctldev->dev;
 	const char *devname = dev_name(dev);
 	int ret;
 	int i;
diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index 0ca32eb63b67..ad430e05a7ba 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -73,31 +73,6 @@ struct pinmux_map {
 	{ .name = a, .ctrl_dev_name = b, .function = c, \
 	  .hog_on_boot = true }
 
-/*
- * Convenience macro to map a function onto the primary device pinctrl device
- * this is especially helpful on systems that have only one pin controller
- * or need to set up a lot of mappings on the primary controller.
- */
-#define PINMUX_MAP_PRIMARY(a, b, c) \
-	{ .name = a, .ctrl_dev_name = "pinctrl.0", .function = b, \
-	  .dev_name = c }
-
-/*
- * Convenience macro to map a system function onto the primary pinctrl device.
- * System functions are not assigned to a particular device.
- */
-#define PINMUX_MAP_PRIMARY_SYS(a, b) \
-	{ .name = a, .ctrl_dev_name = "pinctrl.0", .function = b }
-
-/*
- * Convenience macro to map a system function onto the primary pinctrl device,
- * to be hogged by the pinmux core until the system shuts down.
- */
-#define PINMUX_MAP_PRIMARY_SYS_HOG(a, b) \
-	{ .name = a, .ctrl_dev_name = "pinctrl.0", .function = b, \
-	  .hog_on_boot = true }
-
-
 #ifdef CONFIG_PINMUX
 
 extern int pinmux_register_mappings(struct pinmux_map const *map,
-- 
cgit v1.2.3


From 23750196ef472e9249958d5165b0bb292518c710 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 14 Dec 2011 09:30:08 +0100
Subject: pinctrl: add a group-specific hog macro

To create elegant tables for pinmux hogs on the PXA MMP platform,
we need this hog macro that can specify both function and group in
one go.

Acked-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/machine.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index ad430e05a7ba..d0aecb7f6fb9 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -73,6 +73,15 @@ struct pinmux_map {
 	{ .name = a, .ctrl_dev_name = b, .function = c, \
 	  .hog_on_boot = true }
 
+/*
+ * Convenience macro to map a system function onto a certain pinctrl device
+ * using a specified group, to be hogged by the pinmux core until the system
+ * shuts down.
+ */
+#define PINMUX_MAP_SYS_HOG_GROUP(a, b, c, d)		\
+	{ .name = a, .ctrl_dev_name = b, .function = c, .group = d, \
+	  .hog_on_boot = true }
+
 #ifdef CONFIG_PINMUX
 
 extern int pinmux_register_mappings(struct pinmux_map const *map,
-- 
cgit v1.2.3


From 63fd5984a9b2214cba7dd7dd7b5a75cf40dde39f Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Thu, 15 Dec 2011 16:57:16 -0700
Subject: pinctrl: add "struct seq_file;" to pinconf.h

This allows one to include pinconf.h without having to include other
headers first.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h
index d5b72e6e261d..8c2c88ed46b1 100644
--- a/include/linux/pinctrl/pinconf.h
+++ b/include/linux/pinctrl/pinconf.h
@@ -15,6 +15,7 @@
 #ifdef CONFIG_PINCONF
 
 struct pinctrl_dev;
+struct seq_file;
 
 /**
  * struct pinconf_ops - pin config operations, to be implemented by
-- 
cgit v1.2.3


From 43699dea1ea21a0d5786317a794cb2ba27a6f4fe Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Thu, 15 Dec 2011 16:57:17 -0700
Subject: pinctrl: pass name instead of device to pin_config_*

Obtaining a "struct pinctrl_dev *" is difficult for code not directly
related to the pinctrl subsystem. However, the device name of the pinctrl
device is fairly well known. So, modify pin_config_*() to take the device
name instead of the "struct pinctrl_dev *".

Signed-off-by: Stephen Warren <swarren@nvidia.com>
[rebased on top of refactoring code]
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt       |  2 +-
 drivers/pinctrl/pinconf.c       | 41 ++++++++++++++++++++++++++++++++---------
 include/linux/pinctrl/pinconf.h | 16 ++++++++--------
 3 files changed, 41 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index f08064368291..44321d3227e8 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -208,7 +208,7 @@ unconnected.
 
 For example, a platform may do this:
 
-ret = pin_config_set(dev, "FOO_GPIO_PIN", PLATFORM_X_PULL_UP);
+ret = pin_config_set("foo-dev", "FOO_GPIO_PIN", PLATFORM_X_PULL_UP);
 
 To pull up a pin to VDD. The pin configuration driver implements callbacks for
 changing pin configuration in the pin controller ops like this:
diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c
index 124762b57024..57dbb4b478db 100644
--- a/drivers/pinctrl/pinconf.c
+++ b/drivers/pinctrl/pinconf.c
@@ -39,17 +39,22 @@ int pin_config_get_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
 
 /**
  * pin_config_get() - get the configuration of a single pin parameter
- * @pctldev: pin controller device for this pin
+ * @dev_name: name of the pin controller device for this pin
  * @name: name of the pin to get the config for
  * @config: the config pointed to by this argument will be filled in with the
  *	current pin state, it can be used directly by drivers as a numeral, or
  *	it can be dereferenced to any struct.
  */
-int pin_config_get(struct pinctrl_dev *pctldev, const char *name,
+int pin_config_get(const char *dev_name, const char *name,
 			  unsigned long *config)
 {
+	struct pinctrl_dev *pctldev;
 	int pin;
 
+	pctldev = get_pinctrl_dev_from_dev(NULL, dev_name);
+	if (!pctldev)
+		return -EINVAL;
+
 	pin = pin_get_from_name(pctldev, name);
 	if (pin < 0)
 		return pin;
@@ -82,17 +87,22 @@ int pin_config_set_for_pin(struct pinctrl_dev *pctldev, unsigned pin,
 
 /**
  * pin_config_set() - set the configuration of a single pin parameter
- * @pctldev: pin controller device for this pin
+ * @dev_name: name of pin controller device for this pin
  * @name: name of the pin to set the config for
  * @config: the config in this argument will contain the desired pin state, it
  *	can be used directly by drivers as a numeral, or it can be dereferenced
  *	to any struct.
  */
-int pin_config_set(struct pinctrl_dev *pctldev, const char *name,
+int pin_config_set(const char *dev_name, const char *name,
 		   unsigned long config)
 {
+	struct pinctrl_dev *pctldev;
 	int pin;
 
+	pctldev = get_pinctrl_dev_from_dev(NULL, dev_name);
+	if (!pctldev)
+		return -EINVAL;
+
 	pin = pin_get_from_name(pctldev, name);
 	if (pin < 0)
 		return pin;
@@ -101,12 +111,18 @@ int pin_config_set(struct pinctrl_dev *pctldev, const char *name,
 }
 EXPORT_SYMBOL(pin_config_set);
 
-int pin_config_group_get(struct pinctrl_dev *pctldev, const char *pin_group,
+int pin_config_group_get(const char *dev_name, const char *pin_group,
 			 unsigned long *config)
 {
-	const struct pinconf_ops *ops = pctldev->desc->confops;
+	struct pinctrl_dev *pctldev;
+	const struct pinconf_ops *ops;
 	int selector;
 
+	pctldev = get_pinctrl_dev_from_dev(NULL, dev_name);
+	if (!pctldev)
+		return -EINVAL;
+	ops = pctldev->desc->confops;
+
 	if (!ops || !ops->pin_config_group_get) {
 		dev_err(pctldev->dev, "cannot get configuration for pin "
 			"group, missing group config get function in "
@@ -123,17 +139,24 @@ int pin_config_group_get(struct pinctrl_dev *pctldev, const char *pin_group,
 EXPORT_SYMBOL(pin_config_group_get);
 
 
-int pin_config_group_set(struct pinctrl_dev *pctldev, const char *pin_group,
+int pin_config_group_set(const char *dev_name, const char *pin_group,
 			 unsigned long config)
 {
-	const struct pinctrl_ops *pctlops = pctldev->desc->pctlops;
-	const struct pinconf_ops *ops = pctldev->desc->confops;
+	struct pinctrl_dev *pctldev;
+	const struct pinconf_ops *ops;
+	const struct pinctrl_ops *pctlops;
 	int selector;
 	const unsigned *pins;
 	unsigned num_pins;
 	int ret;
 	int i;
 
+	pctldev = get_pinctrl_dev_from_dev(NULL, dev_name);
+	if (!pctldev)
+		return -EINVAL;
+	ops = pctldev->desc->confops;
+	pctlops = pctldev->desc->pctlops;
+
 	if (!ops || (!ops->pin_config_group_set && !ops->pin_config_set)) {
 		dev_err(pctldev->dev, "cannot configure pin group, missing "
 			"config function in driver\n");
diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h
index 8c2c88ed46b1..477922cf043a 100644
--- a/include/linux/pinctrl/pinconf.h
+++ b/include/linux/pinctrl/pinconf.h
@@ -53,39 +53,39 @@ struct pinconf_ops {
 					   unsigned selector);
 };
 
-extern int pin_config_get(struct pinctrl_dev *pctldev, const char *name,
+extern int pin_config_get(const char *dev_name, const char *name,
 			  unsigned long *config);
-extern int pin_config_set(struct pinctrl_dev *pctldev, const char *name,
+extern int pin_config_set(const char *dev_name, const char *name,
 			  unsigned long config);
-extern int pin_config_group_get(struct pinctrl_dev *pctldev,
+extern int pin_config_group_get(const char *dev_name,
 				const char *pin_group,
 				unsigned long *config);
-extern int pin_config_group_set(struct pinctrl_dev *pctldev,
+extern int pin_config_group_set(const char *dev_name,
 				const char *pin_group,
 				unsigned long config);
 
 #else
 
-static inline int pin_config_get(struct pinctrl_dev *pctldev, const char *name,
+static inline int pin_config_get(const char *dev_name, const char *name,
 				 unsigned long *config)
 {
 	return 0;
 }
 
-static inline int pin_config_set(struct pinctrl_dev *pctldev, const char *name,
+static inline int pin_config_set(const char *dev_name, const char *name,
 				 unsigned long config)
 {
 	return 0;
 }
 
-static inline int pin_config_group_get(struct pinctrl_dev *pctldev,
+static inline int pin_config_group_get(const char *dev_name,
 				       const char *pin_group,
 				       unsigned long *config)
 {
 	return 0;
 }
 
-static inline int pin_config_group_set(struct pinctrl_dev *pctldev,
+static inline int pin_config_group_set(const char *dev_name,
 				       const char *pin_group,
 				       unsigned long config)
 {
-- 
cgit v1.2.3


From 0d2006bbf09e817f125ba1e42b2549bc2c5d7351 Mon Sep 17 00:00:00 2001
From: Chanho Park <chanho61.park@samsung.com>
Date: Tue, 3 Jan 2012 16:47:51 +0900
Subject: pinctrl: remove unnecessary max pin number

This patch removes maxpin member in the pin control descriptor
because we don't need this value as we enumerate a pin space
using offset.

Signed-off-by: Chanho Park <chanho61.park@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/core.c          | 1 -
 drivers/pinctrl/pinctrl-sirf.c  | 1 -
 drivers/pinctrl/pinctrl-u300.c  | 1 -
 include/linux/pinctrl/pinctrl.h | 5 -----
 4 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 79c56d90fcc5..569bdb3ef104 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -354,7 +354,6 @@ static int pinctrl_pins_show(struct seq_file *s, void *what)
 	unsigned i, pin;
 
 	seq_printf(s, "registered pins: %d\n", pctldev->desc->npins);
-	seq_printf(s, "max pin number: %d\n", pctldev->desc->maxpin);
 
 	/* The pin number can be retrived from the pin controller descriptor */
 	for (i = 0; i < pctldev->desc->npins; i++) {
diff --git a/drivers/pinctrl/pinctrl-sirf.c b/drivers/pinctrl/pinctrl-sirf.c
index 99e688e07ea0..6b3534cc051a 100644
--- a/drivers/pinctrl/pinctrl-sirf.c
+++ b/drivers/pinctrl/pinctrl-sirf.c
@@ -1086,7 +1086,6 @@ static struct pinctrl_desc sirfsoc_pinmux_desc = {
 	.name = DRIVER_NAME,
 	.pins = sirfsoc_pads,
 	.npins = ARRAY_SIZE(sirfsoc_pads),
-	.maxpin = SIRFSOC_NUM_PADS - 1,
 	.pctlops = &sirfsoc_pctrl_ops,
 	.pmxops = &sirfsoc_pinmux_ops,
 	.owner = THIS_MODULE,
diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c
index 7e89b367a7e5..c8d02f1c2b5e 100644
--- a/drivers/pinctrl/pinctrl-u300.c
+++ b/drivers/pinctrl/pinctrl-u300.c
@@ -1048,7 +1048,6 @@ static struct pinctrl_desc u300_pmx_desc = {
 	.name = DRIVER_NAME,
 	.pins = u300_pads,
 	.npins = ARRAY_SIZE(u300_pads),
-	.maxpin = U300_NUM_PADS-1,
 	.pctlops = &u300_pctrl_ops,
 	.pmxops = &u300_pmx_ops,
 	.owner = THIS_MODULE,
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 9809a94f151b..8bd22ee7aa09 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -92,10 +92,6 @@ struct pinctrl_ops {
  *	this pin controller
  * @npins: number of descriptors in the array, usually just ARRAY_SIZE()
  *	of the pins field above
- * @maxpin: since pin spaces may be sparse, there can he "holes" in the
- *	pin range, this attribute gives the maximum pin number in the
- *	total range. This should not be lower than npins for example,
- *	but may be equal to npins if you have no holes in the pin range.
  * @pctlops: pin control operation vtable, to support global concepts like
  *	grouping of pins, this is optional.
  * @pmxops: pinmux operations vtable, if you support pinmuxing in your driver
@@ -107,7 +103,6 @@ struct pinctrl_desc {
 	const char *name;
 	struct pinctrl_pin_desc const *pins;
 	unsigned int npins;
-	unsigned int maxpin;
 	struct pinctrl_ops *pctlops;
 	struct pinmux_ops *pmxops;
 	struct pinconf_ops *confops;
-- 
cgit v1.2.3


From e1de2f423462a5c6ba2c902dff1f5ddd8d3dbde3 Mon Sep 17 00:00:00 2001
From: Donggeun Kim <dg77.kim@samsung.com>
Date: Tue, 3 Jan 2012 16:22:03 +0900
Subject: regulator: add regulator_bulk_force_disable function

This patch allows consumers to forcibly disable multiple regulator
clients in a single API call.

Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c           | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/regulator/consumer.h |  8 ++++++++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index eb750a1ed7fe..ca86f39a0fdc 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2490,6 +2490,43 @@ err:
 }
 EXPORT_SYMBOL_GPL(regulator_bulk_disable);
 
+/**
+ * regulator_bulk_force_disable - force disable multiple regulator consumers
+ *
+ * @num_consumers: Number of consumers
+ * @consumers:     Consumer data; clients are stored here.
+ * @return         0 on success, an errno on failure
+ *
+ * This convenience API allows consumers to forcibly disable multiple regulator
+ * clients in a single API call.
+ * NOTE: This should be used for situations when device damage will
+ * likely occur if the regulators are not disabled (e.g. over temp).
+ * Although regulator_force_disable function call for some consumers can
+ * return error numbers, the function is called for all consumers.
+ */
+int regulator_bulk_force_disable(int num_consumers,
+			   struct regulator_bulk_data *consumers)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < num_consumers; i++)
+		consumers[i].ret =
+			    regulator_force_disable(consumers[i].consumer);
+
+	for (i = 0; i < num_consumers; i++) {
+		if (consumers[i].ret != 0) {
+			ret = consumers[i].ret;
+			goto out;
+		}
+	}
+
+	return 0;
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_bulk_force_disable);
+
 /**
  * regulator_bulk_free - free multiple regulator consumers
  *
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 889ab5aaa85b..f2698a0edfc4 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -149,6 +149,8 @@ int regulator_bulk_enable(int num_consumers,
 			  struct regulator_bulk_data *consumers);
 int regulator_bulk_disable(int num_consumers,
 			   struct regulator_bulk_data *consumers);
+int regulator_bulk_force_disable(int num_consumers,
+			   struct regulator_bulk_data *consumers);
 void regulator_bulk_free(int num_consumers,
 			 struct regulator_bulk_data *consumers);
 
@@ -247,6 +249,12 @@ static inline int regulator_bulk_disable(int num_consumers,
 	return 0;
 }
 
+static inline int regulator_bulk_force_disable(int num_consumers,
+					struct regulator_bulk_data *consumers)
+{
+	return 0;
+}
+
 static inline void regulator_bulk_free(int num_consumers,
 				       struct regulator_bulk_data *consumers)
 {
-- 
cgit v1.2.3


From 30e053248da178cf6154bb7e950dc8713567e3fa Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jan 2012 13:14:29 +0100
Subject: security: Fix security_old_inode_init_security() when CONFIG_SECURITY
 is not set

Commit 1e39f384bb01 ("evm: fix build problems") makes the stub version
of security_old_inode_init_security() return 0 when CONFIG_SECURITY is
not set.

But that makes callers such as reiserfs_security_init() assume that
security_old_inode_init_security() has set name, value, and len
arguments properly - but security_old_inode_init_security() left them
uninitialized which then results in interesting failures.

Revert security_old_inode_init_security() to the old behavior of
returning EOPNOTSUPP since both callers (reiserfs and ocfs2) handle this
just fine.

[ Also fixed the S_PRIVATE(inode) case of the actual non-stub
  security_old_inode_init_security() function to return EOPNOTSUPP
  for the same reason, as pointed out by Mimi Zohar.

  It got incorrectly changed to match the new function in commit
  fb88c2b6cbb1: "evm: fix security/security_old_init_security return
  code".   - Linus ]

Reported-by: Jorge Bastos <mysql.jorge@decimal.pt>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/security.h | 2 +-
 security/security.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 19d8e04e1688..e8c619d39291 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2056,7 +2056,7 @@ static inline int security_old_inode_init_security(struct inode *inode,
 						   char **name, void **value,
 						   size_t *len)
 {
-	return 0;
+	return -EOPNOTSUPP;
 }
 
 static inline int security_inode_create(struct inode *dir,
diff --git a/security/security.c b/security/security.c
index 0c6cc69c8f86..e2f684aeb70c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -381,7 +381,7 @@ int security_old_inode_init_security(struct inode *inode, struct inode *dir,
 				     void **value, size_t *len)
 {
 	if (unlikely(IS_PRIVATE(inode)))
-		return 0;
+		return -EOPNOTSUPP;
 	return security_ops->inode_init_security(inode, dir, qstr, name, value,
 						 len);
 }
-- 
cgit v1.2.3


From 43c6759e73907e4c8e6624f70f5c4a860518b203 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@dowhile0.org>
Date: Tue, 3 Jan 2012 20:23:18 -0500
Subject: net: phy: smsc: Move SMSC PHY constants to <linux/smscphy.h>

SMSC generation 4 LAN chips integrate an IEEE 802.3 ethernet physical layer.
The ethernet driver for this family of devices needs to access the SMSC PHY
registers and bit-fields.

So, this patch moves these constants to a place where it can be used for both
the PHY and LAN drivers.

Signed-off-by: Javier Martinez Canillas <javier@dowhile0.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smsc911x.h |  4 ++++
 drivers/net/phy/smsc.c               | 21 +--------------------
 include/linux/smscphy.h              | 25 +++++++++++++++++++++++++
 3 files changed, 30 insertions(+), 20 deletions(-)
 create mode 100644 include/linux/smscphy.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/smsc/smsc911x.h b/drivers/net/ethernet/smsc/smsc911x.h
index 8d67aacf8867..938ecf290813 100644
--- a/drivers/net/ethernet/smsc/smsc911x.h
+++ b/drivers/net/ethernet/smsc/smsc911x.h
@@ -401,4 +401,8 @@
 #include <asm/smsc911x.h>
 #endif
 
+#ifdef CONFIG_SMSC_PHY
+#include <linux/smscphy.h>
+#endif
+
 #endif				/* __SMSC911X_H__ */
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 342505c976d6..fc3e7e96c88c 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -22,26 +22,7 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 #include <linux/netdevice.h>
-
-#define MII_LAN83C185_ISF 29 /* Interrupt Source Flags */
-#define MII_LAN83C185_IM  30 /* Interrupt Mask */
-#define MII_LAN83C185_CTRL_STATUS 17 /* Mode/Status Register */
-
-#define MII_LAN83C185_ISF_INT1 (1<<1) /* Auto-Negotiation Page Received */
-#define MII_LAN83C185_ISF_INT2 (1<<2) /* Parallel Detection Fault */
-#define MII_LAN83C185_ISF_INT3 (1<<3) /* Auto-Negotiation LP Ack */
-#define MII_LAN83C185_ISF_INT4 (1<<4) /* Link Down */
-#define MII_LAN83C185_ISF_INT5 (1<<5) /* Remote Fault Detected */
-#define MII_LAN83C185_ISF_INT6 (1<<6) /* Auto-Negotiation complete */
-#define MII_LAN83C185_ISF_INT7 (1<<7) /* ENERGYON */
-
-#define MII_LAN83C185_ISF_INT_ALL (0x0e)
-
-#define MII_LAN83C185_ISF_INT_PHYLIB_EVENTS \
-	(MII_LAN83C185_ISF_INT6 | MII_LAN83C185_ISF_INT4 | \
-	 MII_LAN83C185_ISF_INT7)
-
-#define MII_LAN83C185_EDPWRDOWN	(1 << 13) /* EDPWRDOWN */
+#include <linux/smscphy.h>
 
 static int smsc_phy_config_intr(struct phy_device *phydev)
 {
diff --git a/include/linux/smscphy.h b/include/linux/smscphy.h
new file mode 100644
index 000000000000..ce718cbce435
--- /dev/null
+++ b/include/linux/smscphy.h
@@ -0,0 +1,25 @@
+#ifndef __LINUX_SMSCPHY_H__
+#define __LINUX_SMSCPHY_H__
+
+#define MII_LAN83C185_ISF 29 /* Interrupt Source Flags */
+#define MII_LAN83C185_IM  30 /* Interrupt Mask */
+#define MII_LAN83C185_CTRL_STATUS 17 /* Mode/Status Register */
+
+#define MII_LAN83C185_ISF_INT1 (1<<1) /* Auto-Negotiation Page Received */
+#define MII_LAN83C185_ISF_INT2 (1<<2) /* Parallel Detection Fault */
+#define MII_LAN83C185_ISF_INT3 (1<<3) /* Auto-Negotiation LP Ack */
+#define MII_LAN83C185_ISF_INT4 (1<<4) /* Link Down */
+#define MII_LAN83C185_ISF_INT5 (1<<5) /* Remote Fault Detected */
+#define MII_LAN83C185_ISF_INT6 (1<<6) /* Auto-Negotiation complete */
+#define MII_LAN83C185_ISF_INT7 (1<<7) /* ENERGYON */
+
+#define MII_LAN83C185_ISF_INT_ALL (0x0e)
+
+#define MII_LAN83C185_ISF_INT_PHYLIB_EVENTS \
+	(MII_LAN83C185_ISF_INT6 | MII_LAN83C185_ISF_INT4 | \
+	 MII_LAN83C185_ISF_INT7)
+
+#define MII_LAN83C185_EDPWRDOWN (1 << 13) /* EDPWRDOWN */
+#define MII_LAN83C185_ENERGYON  (1 << 1)  /* ENERGYON */
+
+#endif /* __LINUX_SMSCPHY_H__ */
-- 
cgit v1.2.3


From 5ede7b1cfa8201418fb35e12f770e9e7c2559a4d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 23 Oct 2011 18:49:54 -0400
Subject: pull manipulations of rpc_cred inside alloc_nfs_open_context()

No need to duplicate them in both callers; make it return
ERR_PTR(-ENOMEM) on allocation failure instead of NULL and
it'll be able to report rpc_lookup_cred() failures just
fine.  Callers are much happier that way...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c           | 13 +------------
 fs/nfs/inode.c         | 40 ++++++++++++++++++++--------------------
 include/linux/nfs_fs.h |  2 +-
 3 files changed, 22 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ac2899098147..23be134b3193 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1368,18 +1368,7 @@ static fmode_t flags_to_mode(int flags)
 
 static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags)
 {
-	struct nfs_open_context *ctx;
-	struct rpc_cred *cred;
-	fmode_t fmode = flags_to_mode(open_flags);
-
-	cred = rpc_lookup_cred();
-	if (IS_ERR(cred))
-		return ERR_CAST(cred);
-	ctx = alloc_nfs_open_context(dentry, cred, fmode);
-	put_rpccred(cred);
-	if (ctx == NULL)
-		return ERR_PTR(-ENOMEM);
-	return ctx;
+	return alloc_nfs_open_context(dentry, flags_to_mode(open_flags));
 }
 
 static int do_open(struct inode *inode, struct file *filp)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a15fa8cf98..efb66db04f99 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -629,23 +629,28 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
 	nfs_revalidate_inode(server, inode);
 }
 
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode)
+struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode)
 {
 	struct nfs_open_context *ctx;
+	struct rpc_cred *cred = rpc_lookup_cred();
+	if (IS_ERR(cred))
+		return ERR_CAST(cred);
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
-	if (ctx != NULL) {
-		nfs_sb_active(dentry->d_sb);
-		ctx->dentry = dget(dentry);
-		ctx->cred = get_rpccred(cred);
-		ctx->state = NULL;
-		ctx->mode = f_mode;
-		ctx->flags = 0;
-		ctx->error = 0;
-		nfs_init_lock_context(&ctx->lock_context);
-		ctx->lock_context.open_context = ctx;
-		INIT_LIST_HEAD(&ctx->list);
+	if (!ctx) {
+		put_rpccred(cred);
+		return ERR_PTR(-ENOMEM);
 	}
+	nfs_sb_active(dentry->d_sb);
+	ctx->dentry = dget(dentry);
+	ctx->cred = cred;
+	ctx->state = NULL;
+	ctx->mode = f_mode;
+	ctx->flags = 0;
+	ctx->error = 0;
+	nfs_init_lock_context(&ctx->lock_context);
+	ctx->lock_context.open_context = ctx;
+	INIT_LIST_HEAD(&ctx->list);
 	return ctx;
 }
 
@@ -738,15 +743,10 @@ static void nfs_file_clear_open_context(struct file *filp)
 int nfs_open(struct inode *inode, struct file *filp)
 {
 	struct nfs_open_context *ctx;
-	struct rpc_cred *cred;
 
-	cred = rpc_lookup_cred();
-	if (IS_ERR(cred))
-		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode);
-	put_rpccred(cred);
-	if (ctx == NULL)
-		return -ENOMEM;
+	ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
 	nfs_file_set_open_context(filp, ctx);
 	put_nfs_open_context(ctx);
 	nfs_fscache_set_inode_cookie(inode, filp);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 92ecf5585fac..8c29950d2fa5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -373,7 +373,7 @@ extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
-extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode);
+extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode);
 extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
 extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
 extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
-- 
cgit v1.2.3


From 6c449c8dfe30142b3ced5f052e8ed3cce308801a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 Nov 2011 22:01:36 -0500
Subject: unexport put_mnt_ns(), make create_mnt_ns() static outright

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c                | 4 +---
 include/linux/mnt_namespace.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7aad258dcaf6..0953a3a6d45e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2444,7 +2444,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  * create_mnt_ns - creates a private namespace and adds a root filesystem
  * @mnt: pointer to the new root filesystem mountpoint
  */
-struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2459,7 +2459,6 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 	}
 	return new_ns;
 }
-EXPORT_SYMBOL(create_mnt_ns);
 
 struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
 {
@@ -2734,7 +2733,6 @@ void put_mnt_ns(struct mnt_namespace *ns)
 	release_mounts(&umount_list);
 	kfree(ns);
 }
-EXPORT_SYMBOL(put_mnt_ns);
 
 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
 {
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 29304855652d..e87ec01aac9d 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -22,7 +22,6 @@ struct proc_mounts {
 
 struct fs_struct;
 
-extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-- 
cgit v1.2.3


From a5166169f9b920cae3c503910cb66a3ac5dd846d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 12 Dec 2011 22:53:00 -0500
Subject: vfs: convert fs_supers to hlist

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/filesystems.c   |  1 -
 fs/super.c         | 26 ++++++++++++++------------
 include/linux/fs.h |  4 ++--
 3 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/filesystems.c b/fs/filesystems.c
index 0845f84f2a5f..96f24286667a 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -74,7 +74,6 @@ int register_filesystem(struct file_system_type * fs)
 	BUG_ON(strchr(fs->name, '.'));
 	if (fs->next)
 		return -EBUSY;
-	INIT_LIST_HEAD(&fs->fs_supers);
 	write_lock(&file_systems_lock);
 	p = find_filesystem(fs->name, strlen(fs->name));
 	if (*p)
diff --git a/fs/super.c b/fs/super.c
index 66a12f9bfc20..bab11bad13ba 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -136,7 +136,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_LIST_HEAD(&s->s_files);
 #endif
 		s->s_bdi = &default_backing_dev_info;
-		INIT_LIST_HEAD(&s->s_instances);
+		INIT_HLIST_NODE(&s->s_instances);
 		INIT_HLIST_BL_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		INIT_LIST_HEAD(&s->s_dentry_lru);
@@ -328,7 +328,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
 bool grab_super_passive(struct super_block *sb)
 {
 	spin_lock(&sb_lock);
-	if (list_empty(&sb->s_instances)) {
+	if (hlist_unhashed(&sb->s_instances)) {
 		spin_unlock(&sb_lock);
 		return false;
 	}
@@ -400,7 +400,7 @@ void generic_shutdown_super(struct super_block *sb)
 	}
 	spin_lock(&sb_lock);
 	/* should be initialized for __put_super_and_need_restart() */
-	list_del_init(&sb->s_instances);
+	hlist_del_init(&sb->s_instances);
 	spin_unlock(&sb_lock);
 	up_write(&sb->s_umount);
 }
@@ -420,13 +420,14 @@ struct super_block *sget(struct file_system_type *type,
 			void *data)
 {
 	struct super_block *s = NULL;
+	struct hlist_node *node;
 	struct super_block *old;
 	int err;
 
 retry:
 	spin_lock(&sb_lock);
 	if (test) {
-		list_for_each_entry(old, &type->fs_supers, s_instances) {
+		hlist_for_each_entry(old, node, &type->fs_supers, s_instances) {
 			if (!test(old, data))
 				continue;
 			if (!grab_super(old))
@@ -462,7 +463,7 @@ retry:
 	s->s_type = type;
 	strlcpy(s->s_id, type->name, sizeof(s->s_id));
 	list_add_tail(&s->s_list, &super_blocks);
-	list_add(&s->s_instances, &type->fs_supers);
+	hlist_add_head(&s->s_instances, &type->fs_supers);
 	spin_unlock(&sb_lock);
 	get_filesystem(type);
 	register_shrinker(&s->s_shrink);
@@ -497,7 +498,7 @@ void sync_supers(void)
 
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
+		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		if (sb->s_op->write_super && sb->s_dirt) {
 			sb->s_count++;
@@ -533,7 +534,7 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
+		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
@@ -566,9 +567,10 @@ void iterate_supers_type(struct file_system_type *type,
 	void (*f)(struct super_block *, void *), void *arg)
 {
 	struct super_block *sb, *p = NULL;
+	struct hlist_node *node;
 
 	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &type->fs_supers, s_instances) {
+	hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) {
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 
@@ -607,7 +609,7 @@ struct super_block *get_super(struct block_device *bdev)
 	spin_lock(&sb_lock);
 rescan:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
+		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		if (sb->s_bdev == bdev) {
 			sb->s_count++;
@@ -647,7 +649,7 @@ struct super_block *get_active_super(struct block_device *bdev)
 restart:
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
+		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		if (sb->s_bdev == bdev) {
 			if (grab_super(sb)) /* drops sb_lock */
@@ -667,7 +669,7 @@ struct super_block *user_get_super(dev_t dev)
 	spin_lock(&sb_lock);
 rescan:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
+		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		if (sb->s_dev ==  dev) {
 			sb->s_count++;
@@ -756,7 +758,7 @@ static void do_emergency_remount(struct work_struct *work)
 
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
+		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0bc4ffb8e7f..ed17e54fd204 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1440,7 +1440,7 @@ struct super_block {
 	struct block_device	*s_bdev;
 	struct backing_dev_info *s_bdi;
 	struct mtd_info		*s_mtd;
-	struct list_head	s_instances;
+	struct hlist_node	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */
 
 	int			s_frozen;
@@ -1864,7 +1864,7 @@ struct file_system_type {
 	void (*kill_sb) (struct super_block *);
 	struct module *owner;
 	struct file_system_type * next;
-	struct list_head fs_supers;
+	struct hlist_head fs_supers;
 
 	struct lock_class_key s_lock_key;
 	struct lock_class_key s_umount_key;
-- 
cgit v1.2.3


From 79e801a906db46cb8efad66c400b01df874b3f12 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 12 Dec 2011 23:07:05 -0500
Subject: vfs: make do_kern_mount() static

the only user outside of fs/namespace.c has died

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c        | 3 +--
 include/linux/mount.h | 3 ---
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index ed21ac4f7c69..7a8f949cec1b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1929,7 +1929,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
 	return ERR_PTR(err);
 }
 
-struct vfsmount *
+static struct vfsmount *
 do_kern_mount(const char *fstype, int flags, const char *name, void *data)
 {
 	struct file_system_type *type = get_fs_type(fstype);
@@ -1943,7 +1943,6 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
 	put_filesystem(type);
 	return mnt;
 }
-EXPORT_SYMBOL_GPL(do_kern_mount);
 
 /*
  * add a mount into a namespace's mount tree
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 33fe53d78110..65c1bb013836 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -100,9 +100,6 @@ extern void mnt_pin(struct vfsmount *mnt);
 extern void mnt_unpin(struct vfsmount *mnt);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 
-extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
-				      const char *name, void *data);
-
 struct file_system_type;
 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				      int flags, const char *name,
-- 
cgit v1.2.3


From 8c9379e972e984d11c2b99121847ba9fa7a0c56c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Dec 2011 20:18:57 -0500
Subject: constify seq_file stuff

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/seq_file.c            | 10 +++++-----
 include/linux/seq_file.h | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index dba43c3ea3af..4023d6be939b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -397,7 +397,7 @@ EXPORT_SYMBOL(seq_printf);
  *      Returns pointer past last written character in @s, or NULL in case of
  *      failure.
  */
-char *mangle_path(char *s, char *p, char *esc)
+char *mangle_path(char *s, const char *p, const char *esc)
 {
 	while (s <= p) {
 		char c = *p++;
@@ -427,7 +427,7 @@ EXPORT_SYMBOL(mangle_path);
  * return the absolute path of 'path', as represented by the
  * dentry / mnt pair in the path parameter.
  */
-int seq_path(struct seq_file *m, struct path *path, char *esc)
+int seq_path(struct seq_file *m, const struct path *path, const char *esc)
 {
 	char *buf;
 	size_t size = seq_get_buf(m, &buf);
@@ -450,8 +450,8 @@ EXPORT_SYMBOL(seq_path);
 /*
  * Same as seq_path, but relative to supplied root.
  */
-int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
-		  char *esc)
+int seq_path_root(struct seq_file *m, const struct path *path,
+		  const struct path *root, const char *esc)
 {
 	char *buf;
 	size_t size = seq_get_buf(m, &buf);
@@ -480,7 +480,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 /*
  * returns the path of the 'dentry' from the root of its filesystem.
  */
-int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
+int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc)
 {
 	char *buf;
 	size_t size = seq_get_buf(m, &buf);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 0b69a4684216..44f1514b00ba 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -74,7 +74,7 @@ static inline void seq_commit(struct seq_file *m, int num)
 	}
 }
 
-char *mangle_path(char *s, char *p, char *esc);
+char *mangle_path(char *s, const char *p, const char *esc);
 int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
 loff_t seq_lseek(struct file *, loff_t, int);
@@ -86,10 +86,10 @@ int seq_write(struct seq_file *seq, const void *data, size_t len);
 
 __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...);
 
-int seq_path(struct seq_file *, struct path *, char *);
-int seq_dentry(struct seq_file *, struct dentry *, char *);
-int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
-		  char *esc);
+int seq_path(struct seq_file *, const struct path *, const char *);
+int seq_dentry(struct seq_file *, struct dentry *, const char *);
+int seq_path_root(struct seq_file *m, const struct path *path,
+		  const struct path *root, const char *esc);
 int seq_bitmap(struct seq_file *m, const unsigned long *bits,
 				   unsigned int nr_bits);
 static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask)
-- 
cgit v1.2.3


From 2a79f17e4a641a2f463cb512cb0ec349844a147b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 9 Dec 2011 08:06:57 -0500
Subject: vfs: mnt_drop_write_file()

new helper (wrapper around mnt_drop_write()) to be used in pair with
mnt_want_write_file().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ioctl.c        | 12 ++++++------
 fs/ext2/ioctl.c         |  6 +++---
 fs/ext3/ioctl.c         | 10 +++++-----
 fs/ext4/ioctl.c         | 14 +++++++-------
 fs/fat/file.c           |  2 +-
 fs/gfs2/file.c          |  2 +-
 fs/hfsplus/ioctl.c      |  2 +-
 fs/inode.c              |  2 +-
 fs/jfs/ioctl.c          |  2 +-
 fs/namespace.c          |  6 ++++++
 fs/ncpfs/ioctl.c        |  2 +-
 fs/nfsd/nfs4recover.c   |  6 +++---
 fs/nilfs2/ioctl.c       | 12 ++++++------
 fs/ocfs2/ioctl.c        |  2 +-
 fs/ocfs2/move_extents.c |  2 +-
 fs/open.c               |  2 +-
 fs/reiserfs/ioctl.c     |  4 ++--
 fs/ubifs/ioctl.c        |  2 +-
 fs/xattr.c              |  4 ++--
 fs/xfs/xfs_ioctl.c      |  4 ++--
 fs/xfs/xfs_ioctl32.c    |  4 ++--
 include/linux/mount.h   |  1 +
 22 files changed, 55 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 20dd8f3b6c72..5441ff1480fd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -259,7 +259,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 
 	btrfs_end_transaction(trans, root);
 
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 
 	ret = 0;
  out_unlock:
@@ -1971,7 +1971,7 @@ out_dput:
 	dput(dentry);
 out_unlock_dir:
 	mutex_unlock(&dir->i_mutex);
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 out:
 	kfree(vol_args);
 	return err;
@@ -2040,7 +2040,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 		ret = -EINVAL;
 	}
 out:
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 	return ret;
 }
 
@@ -2510,7 +2510,7 @@ out_unlock:
 out_fput:
 	fput(src_file);
 out_drop_write:
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 	return ret;
 }
 
@@ -2565,7 +2565,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
 
 out_drop:
 	atomic_dec(&root->fs_info->open_ioctl_trans);
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 out:
 	return ret;
 }
@@ -2800,7 +2800,7 @@ long btrfs_ioctl_trans_end(struct file *file)
 
 	atomic_dec(&root->fs_info->open_ioctl_trans);
 
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 	return 0;
 }
 
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 61a3f9661728..1089f760c847 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -83,7 +83,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		inode->i_ctime = CURRENT_TIME_SEC;
 		mark_inode_dirty(inode);
 setflags_out:
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return ret;
 	}
 	case EXT2_IOC_GETVERSION:
@@ -100,7 +100,7 @@ setflags_out:
 			inode->i_ctime = CURRENT_TIME_SEC;
 			mark_inode_dirty(inode);
 		}
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return ret;
 	case EXT2_IOC_GETRSVSZ:
 		if (test_opt(inode->i_sb, RESERVATION)
@@ -145,7 +145,7 @@ setflags_out:
 			rsv->rsv_goal_size = rsv_window_size;
 		}
 		mutex_unlock(&ei->truncate_mutex);
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return 0;
 	}
 	default:
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index a02863a080d3..8e37c41a071b 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -110,7 +110,7 @@ flags_err:
 			err = ext3_change_inode_journal_flag(inode, jflag);
 flags_out:
 		mutex_unlock(&inode->i_mutex);
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 	case EXT3_IOC_GETVERSION:
@@ -147,7 +147,7 @@ flags_out:
 		}
 		ext3_journal_stop(handle);
 setversion_out:
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 	case EXT3_IOC_GETRSVSZ:
@@ -195,7 +195,7 @@ setversion_out:
 		}
 		mutex_unlock(&ei->truncate_mutex);
 setrsvsz_out:
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 	case EXT3_IOC_GROUP_EXTEND: {
@@ -221,7 +221,7 @@ setrsvsz_out:
 		if (err == 0)
 			err = err2;
 group_extend_out:
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 	case EXT3_IOC_GROUP_ADD: {
@@ -249,7 +249,7 @@ group_extend_out:
 		if (err == 0)
 			err = err2;
 group_add_out:
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 	case FITRIM: {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 9a49760b554d..d37b3bb2a3b8 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -134,7 +134,7 @@ flags_err:
 			err = ext4_ext_migrate(inode);
 flags_out:
 		mutex_unlock(&inode->i_mutex);
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 	case EXT4_IOC_GETVERSION:
@@ -171,7 +171,7 @@ flags_out:
 		}
 		ext4_journal_stop(handle);
 setversion_out:
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 	case EXT4_IOC_GROUP_EXTEND: {
@@ -204,7 +204,7 @@ setversion_out:
 		}
 		if (err == 0)
 			err = err2;
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		ext4_resize_end(sb);
 
 		return err;
@@ -246,7 +246,7 @@ setversion_out:
 
 		err = ext4_move_extents(filp, donor_filp, me.orig_start,
 					me.donor_start, me.len, &me.moved_len);
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		if (me.moved_len > 0)
 			file_remove_suid(donor_filp);
 
@@ -289,7 +289,7 @@ mext_out:
 		}
 		if (err == 0)
 			err = err2;
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		ext4_resize_end(sb);
 
 		return err;
@@ -313,7 +313,7 @@ mext_out:
 		mutex_lock(&(inode->i_mutex));
 		err = ext4_ext_migrate(inode);
 		mutex_unlock(&(inode->i_mutex));
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 
@@ -327,7 +327,7 @@ mext_out:
 		if (err)
 			return err;
 		err = ext4_alloc_da_blocks(inode);
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 50746a1a0789..d81d01a99b2c 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -108,7 +108,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
 	fat_save_attrs(inode, attr);
 	mark_inode_dirty(inode);
 out_drop_write:
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 out_unlock_inode:
 	mutex_unlock(&inode->i_mutex);
 out:
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 28fc6e3855f3..b8927d4f3bf2 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -285,7 +285,7 @@ out_trans_end:
 out:
 	gfs2_glock_dq_uninit(&gh);
 out_drop_write:
-	mnt_drop_write(filp->f_path.mnt);
+	mnt_drop_write_file(filp);
 	return error;
 }
 
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 31d3fe576429..f66c7655b3f7 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -94,7 +94,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
 out_unlock_inode:
 	mutex_unlock(&inode->i_mutex);
 out_drop_write:
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 out:
 	return err;
 }
diff --git a/fs/inode.c b/fs/inode.c
index ee4e66b998f4..4fda5ee85518 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1508,7 +1508,7 @@ void file_update_time(struct file *file)
 	if (sync_it & S_MTIME)
 		inode->i_mtime = now;
 	mark_inode_dirty_sync(inode);
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 }
 EXPORT_SYMBOL(file_update_time);
 
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 73d9eaa91c05..f19d1e04a374 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -120,7 +120,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		inode->i_ctime = CURRENT_TIME_SEC;
 		mark_inode_dirty(inode);
 setflags_out:
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return err;
 	}
 	default:
diff --git a/fs/namespace.c b/fs/namespace.c
index 7a8f949cec1b..86b4f6406470 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -392,6 +392,12 @@ void mnt_drop_write(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
+void mnt_drop_write_file(struct file *file)
+{
+	mnt_drop_write(file->f_path.mnt);
+}
+EXPORT_SYMBOL(mnt_drop_write_file);
+
 static int mnt_make_readonly(struct vfsmount *mnt)
 {
 	int ret = 0;
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 790e92a9ec63..6958adfaff08 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -901,7 +901,7 @@ long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	ret = __ncp_ioctl(inode, cmd, arg);
 outDropWrite:
 	if (need_drop_write)
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 out:
 	return ret;
 }
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index a9aa2f161262..80a0be9ed008 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -151,7 +151,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	if (status)
 		goto out_put;
 	status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
-	mnt_drop_write(rec_file->f_path.mnt);
+	mnt_drop_write_file(rec_file);
 out_put:
 	dput(dentry);
 out_unlock:
@@ -281,7 +281,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	nfs4_reset_creds(original_cred);
 	if (status == 0)
 		vfs_fsync(rec_file, 0);
-	mnt_drop_write(rec_file->f_path.mnt);
+	mnt_drop_write_file(rec_file);
 out:
 	if (status)
 		printk("NFSD: Failed to remove expired client state directory"
@@ -317,7 +317,7 @@ nfsd4_recdir_purge_old(void) {
 	status = nfsd4_list_rec_dir(purge_old);
 	if (status == 0)
 		vfs_fsync(rec_file, 0);
-	mnt_drop_write(rec_file->f_path.mnt);
+	mnt_drop_write_file(rec_file);
 out:
 	if (status)
 		printk("nfsd4: failed to purge old clients from recovery"
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index b7697d1ccd61..886649627c3d 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -27,7 +27,7 @@
 #include <linux/uaccess.h>	/* copy_from_user(), copy_to_user() */
 #include <linux/vmalloc.h>
 #include <linux/compat.h>	/* compat_ptr() */
-#include <linux/mount.h>	/* mnt_want_write_file(), mnt_drop_write() */
+#include <linux/mount.h>	/* mnt_want_write_file(), mnt_drop_write_file() */
 #include <linux/buffer_head.h>
 #include <linux/nilfs2_fs.h>
 #include "nilfs.h"
@@ -154,7 +154,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
 	ret = nilfs_transaction_commit(inode->i_sb);
 out:
 	mutex_unlock(&inode->i_mutex);
-	mnt_drop_write(filp->f_path.mnt);
+	mnt_drop_write_file(filp);
 	return ret;
 }
 
@@ -194,7 +194,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
 
 	up_read(&inode->i_sb->s_umount);
 out:
-	mnt_drop_write(filp->f_path.mnt);
+	mnt_drop_write_file(filp);
 	return ret;
 }
 
@@ -225,7 +225,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
 	else
 		nilfs_transaction_commit(inode->i_sb); /* never fails */
 out:
-	mnt_drop_write(filp->f_path.mnt);
+	mnt_drop_write_file(filp);
 	return ret;
 }
 
@@ -675,7 +675,7 @@ out_free:
 		vfree(kbufs[n]);
 	kfree(kbufs[4]);
 out:
-	mnt_drop_write(filp->f_path.mnt);
+	mnt_drop_write_file(filp);
 	return ret;
 }
 
@@ -721,7 +721,7 @@ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
 	ret = nilfs_resize_fs(inode->i_sb, newsize);
 
 out_drop_write:
-	mnt_drop_write(filp->f_path.mnt);
+	mnt_drop_write_file(filp);
 out:
 	return ret;
 }
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 892ace253f97..a6fda3c188aa 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -911,7 +911,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			return status;
 		status = ocfs2_set_inode_attr(inode, flags,
 			OCFS2_FL_MODIFIABLE);
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return status;
 	case OCFS2_IOC_RESVSP:
 	case OCFS2_IOC_RESVSP64:
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 1d3bf83f8b85..b1e3fce72ea4 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -1145,7 +1145,7 @@ out:
 
 	kfree(context);
 
-	mnt_drop_write(filp->f_path.mnt);
+	mnt_drop_write_file(filp);
 
 	return status;
 }
diff --git a/fs/open.c b/fs/open.c
index 22c41b543f2d..4ef8d868a448 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -608,7 +608,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 	dentry = file->f_path.dentry;
 	audit_inode(NULL, dentry);
 	error = chown_common(&file->f_path, user, group);
-	mnt_drop_write(file->f_path.mnt);
+	mnt_drop_write_file(file);
 out_fput:
 	fput(file);
 out:
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 0b94d7b2b11f..950e3d1b5c9e 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -96,7 +96,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			inode->i_ctime = CURRENT_TIME_SEC;
 			mark_inode_dirty(inode);
 setflags_out:
-			mnt_drop_write(filp->f_path.mnt);
+			mnt_drop_write_file(filp);
 			break;
 		}
 	case REISERFS_IOC_GETVERSION:
@@ -117,7 +117,7 @@ setflags_out:
 		inode->i_ctime = CURRENT_TIME_SEC;
 		mark_inode_dirty(inode);
 setversion_out:
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		break;
 	default:
 		err = -ENOTTY;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index e52c84598feb..1a7e2d8bdbe9 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -178,7 +178,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			return err;
 		dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags);
 		err = setflags(inode, flags);
-		mnt_drop_write(file->f_path.mnt);
+		mnt_drop_write_file(file);
 		return err;
 	}
 
diff --git a/fs/xattr.c b/fs/xattr.c
index 67583de8218c..82f43376c7cd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -397,7 +397,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
 	error = mnt_want_write_file(f);
 	if (!error) {
 		error = setxattr(dentry, name, value, size, flags);
-		mnt_drop_write(f->f_path.mnt);
+		mnt_drop_write_file(f);
 	}
 	fput(f);
 	return error;
@@ -624,7 +624,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
 	error = mnt_want_write_file(f);
 	if (!error) {
 		error = removexattr(dentry, name);
-		mnt_drop_write(f->f_path.mnt);
+		mnt_drop_write_file(f);
 	}
 	fput(f);
 	return error;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index b436e17c753e..76f3ca5cfc36 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -566,7 +566,7 @@ xfs_attrmulti_by_handle(
 					dentry->d_inode, attr_name,
 					ops[i].am_attrvalue, ops[i].am_length,
 					ops[i].am_flags);
-			mnt_drop_write(parfilp->f_path.mnt);
+			mnt_drop_write_file(parfilp);
 			break;
 		case ATTR_OP_REMOVE:
 			ops[i].am_error = mnt_want_write_file(parfilp);
@@ -575,7 +575,7 @@ xfs_attrmulti_by_handle(
 			ops[i].am_error = xfs_attrmulti_attr_remove(
 					dentry->d_inode, attr_name,
 					ops[i].am_flags);
-			mnt_drop_write(parfilp->f_path.mnt);
+			mnt_drop_write_file(parfilp);
 			break;
 		default:
 			ops[i].am_error = EINVAL;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index dd4ba1d4c582..f9ccb7b7c043 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -461,7 +461,7 @@ xfs_compat_attrmulti_by_handle(
 					dentry->d_inode, attr_name,
 					compat_ptr(ops[i].am_attrvalue),
 					ops[i].am_length, ops[i].am_flags);
-			mnt_drop_write(parfilp->f_path.mnt);
+			mnt_drop_write_file(parfilp);
 			break;
 		case ATTR_OP_REMOVE:
 			ops[i].am_error = mnt_want_write_file(parfilp);
@@ -470,7 +470,7 @@ xfs_compat_attrmulti_by_handle(
 			ops[i].am_error = xfs_attrmulti_attr_remove(
 					dentry->d_inode, attr_name,
 					ops[i].am_flags);
-			mnt_drop_write(parfilp->f_path.mnt);
+			mnt_drop_write_file(parfilp);
 			break;
 		default:
 			ops[i].am_error = EINVAL;
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 65c1bb013836..00f5c4f2160b 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -94,6 +94,7 @@ extern int mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write_file(struct file *file);
 extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
+extern void mnt_drop_write_file(struct file *file);
 extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
-- 
cgit v1.2.3


From cf31e70d6cf93f19fe9bf1144966ef40991ac723 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 2 Jan 2012 22:28:36 -0500
Subject: vfs: new helper - vfs_ustat()

... and bury user_get_super()/statfs_by_dentry() - they are
purely internal now.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/parisc/hpux/sys_hpux.c |  9 +--------
 fs/compat.c                 |  9 +--------
 fs/internal.h               |  1 +
 fs/statfs.c                 | 21 +++++++++++++--------
 include/linux/fs.h          |  3 +--
 5 files changed, 17 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 6ab9580b0b00..d9dc6cd3b7d2 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -136,16 +136,9 @@ struct hpux_ustat {
  */
 static int hpux_ustat(dev_t dev, struct hpux_ustat __user *ubuf)
 {
-	struct super_block *s;
 	struct hpux_ustat tmp;  /* Changed to hpux_ustat */
 	struct kstatfs sbuf;
-	int err = -EINVAL;
-
-	s = user_get_super(dev);
-	if (s == NULL)
-		goto out;
-	err = statfs_by_dentry(s->s_root, &sbuf);
-	drop_super(s);
+	int err = vfs_ustat(dev, &sbuf);
 	if (err)
 		goto out;
 
diff --git a/fs/compat.c b/fs/compat.c
index c98787536bb8..9db5a6076610 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -342,16 +342,9 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
  */
 asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
 {
-	struct super_block *sb;
 	struct compat_ustat tmp;
 	struct kstatfs sbuf;
-	int err;
-
-	sb = user_get_super(new_decode_dev(dev));
-	if (!sb)
-		return -EINVAL;
-	err = statfs_by_dentry(sb->s_root, &sbuf);
-	drop_super(sb);
+	int err = vfs_ustat(new_decode_dev(dev), &sbuf);
 	if (err)
 		return err;
 
diff --git a/fs/internal.h b/fs/internal.h
index 839d3f9e9173..7b1cb1528ac2 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -78,6 +78,7 @@ extern int do_remount_sb(struct super_block *, int, void *, int);
 extern bool grab_super_passive(struct super_block *sb);
 extern struct dentry *mount_fs(struct file_system_type *,
 			       int, const char *, void *);
+extern struct super_block *user_get_super(dev_t);
 
 /*
  * open.c
diff --git a/fs/statfs.c b/fs/statfs.c
index 9cf04a118965..2aa6a22e0be2 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -7,6 +7,7 @@
 #include <linux/statfs.h>
 #include <linux/security.h>
 #include <linux/uaccess.h>
+#include "internal.h"
 
 static int flags_by_mnt(int mnt_flags)
 {
@@ -45,7 +46,7 @@ static int calculate_f_flags(struct vfsmount *mnt)
 		flags_by_sb(mnt->mnt_sb->s_flags);
 }
 
-int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
+static int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
 {
 	int retval;
 
@@ -205,19 +206,23 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
 	return error;
 }
 
-SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
+int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
 {
-	struct super_block *s;
-	struct ustat tmp;
-	struct kstatfs sbuf;
+	struct super_block *s = user_get_super(dev);
 	int err;
-
-	s = user_get_super(new_decode_dev(dev));
 	if (!s)
 		return -EINVAL;
 
-	err = statfs_by_dentry(s->s_root, &sbuf);
+	err = statfs_by_dentry(s->s_root, sbuf);
 	drop_super(s);
+	return err;
+}
+
+SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
+{
+	struct ustat tmp;
+	struct kstatfs sbuf;
+	int err = vfs_ustat(new_decode_dev(dev), &sbuf);
 	if (err)
 		return err;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed17e54fd204..cec429d76ab0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1939,7 +1939,7 @@ extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
 extern int vfs_statfs(struct path *, struct kstatfs *);
 extern int user_statfs(const char __user *, struct kstatfs *);
 extern int fd_statfs(int, struct kstatfs *);
-extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
+extern int vfs_ustat(dev_t, struct kstatfs *);
 extern int freeze_super(struct super_block *super);
 extern int thaw_super(struct super_block *super);
 extern bool our_mnt(struct vfsmount *mnt);
@@ -2531,7 +2531,6 @@ extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
 extern struct super_block *get_active_super(struct block_device *bdev);
-extern struct super_block *user_get_super(dev_t);
 extern void drop_super(struct super_block *sb);
 extern void iterate_supers(void (*)(struct super_block *, void *), void *);
 extern void iterate_supers_type(struct file_system_type *,
-- 
cgit v1.2.3


From ff01bb4832651c6d25ac509a06a10fcbd75c461c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 16 Sep 2011 02:31:11 -0400
Subject: fs: move code out of buffer.c

Move invalidate_bdev, block_sync_page into fs/block_dev.c.  Export
kill_bdev as well, so brd doesn't have to open code it.  Reduce
buffer_head.h requirement accordingly.

Removed a rather large comment from invalidate_bdev, as it looked a bit
obsolete to bother moving.  The small comment replacing it says enough.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/sysdev/axonram.c   |  1 -
 block/genhd.c                   |  1 -
 block/ioctl.c                   |  2 +-
 drivers/block/amiflop.c         |  2 +-
 drivers/block/brd.c             |  9 ++++----
 drivers/block/floppy.c          |  1 -
 drivers/block/loop.c            |  1 -
 drivers/cdrom/cdrom.c           |  1 -
 drivers/md/dm.c                 |  1 -
 drivers/md/md.c                 |  3 +--
 drivers/mtd/devices/block2mtd.c |  1 -
 drivers/s390/block/dasd.c       |  1 -
 drivers/scsi/scsicam.c          |  1 -
 drivers/tty/sysrq.c             |  2 +-
 fs/block_dev.c                  | 30 ++++++++++++++++++++++---
 fs/buffer.c                     | 50 -----------------------------------------
 fs/cachefiles/interface.c       |  1 -
 fs/cramfs/inode.c               |  1 -
 fs/fs-writeback.c               |  1 -
 fs/libfs.c                      |  2 +-
 fs/quota/dquot.c                |  1 -
 fs/quota/quota.c                |  1 -
 fs/splice.c                     |  1 -
 fs/sync.c                       |  1 -
 include/linux/fs.h              |  3 +++
 kernel/power/swap.c             |  1 -
 mm/page-writeback.c             |  2 +-
 mm/swap_state.c                 |  1 -
 28 files changed, 40 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index ba4271919062..1c16141c031c 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -25,7 +25,6 @@
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
-#include <linux/buffer_head.h>
 #include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
diff --git a/block/genhd.c b/block/genhd.c
index bf443a71b93e..b7d1a0e42686 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/kobj_map.h>
-#include <linux/buffer_head.h>
 #include <linux/mutex.h>
 #include <linux/idr.h>
 #include <linux/log2.h>
diff --git a/block/ioctl.c b/block/ioctl.c
index ca939fc1030f..91e7b19c86f4 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -5,7 +5,7 @@
 #include <linux/blkpg.h>
 #include <linux/hdreg.h>
 #include <linux/backing-dev.h>
-#include <linux/buffer_head.h>
+#include <linux/fs.h>
 #include <linux/blktrace_api.h>
 #include <asm/uaccess.h>
 
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 8eba86bba599..386146d792d1 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -63,7 +63,7 @@
 #include <linux/mutex.h>
 #include <linux/amifdreg.h>
 #include <linux/amifd.h>
-#include <linux/buffer_head.h>
+#include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/interrupt.h>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index d22119d49e53..ec246437f5a4 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -17,7 +17,7 @@
 #include <linux/highmem.h>
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
-#include <linux/buffer_head.h> /* invalidate_bh_lrus() */
+#include <linux/fs.h>
 #include <linux/slab.h>
 
 #include <asm/uaccess.h>
@@ -402,14 +402,13 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode,
 	error = -EBUSY;
 	if (bdev->bd_openers <= 1) {
 		/*
-		 * Invalidate the cache first, so it isn't written
-		 * back to the device.
+		 * Kill the cache first, so it isn't written back to the
+		 * device.
 		 *
 		 * Another thread might instantiate more buffercache here,
 		 * but there is not much we can do to close that race.
 		 */
-		invalidate_bh_lrus();
-		truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
+		kill_bdev(bdev);
 		brd_free_pages(brd);
 		error = 0;
 	}
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 9955a53733b2..510fb10ec45a 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -188,7 +188,6 @@ static int print_unex = 1;
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/mod_devicetable.h>
-#include <linux/buffer_head.h>	/* for invalidate_buffers() */
 #include <linux/mutex.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1e888c9e85b3..f00257782fcc 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -69,7 +69,6 @@
 #include <linux/freezer.h>
 #include <linux/mutex.h>
 #include <linux/writeback.h>
-#include <linux/buffer_head.h>		/* for invalidate_bdev() */
 #include <linux/completion.h>
 #include <linux/highmem.h>
 #include <linux/kthread.h>
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index f997c27d79e2..2118211aff99 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -267,7 +267,6 @@
 
 #include <linux/module.h>
 #include <linux/fs.h>
-#include <linux/buffer_head.h>
 #include <linux/major.h>
 #include <linux/types.h>
 #include <linux/errno.h>
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4720f68f817e..b89c548ec3f8 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -14,7 +14,6 @@
 #include <linux/moduleparam.h>
 #include <linux/blkpg.h>
 #include <linux/bio.h>
-#include <linux/buffer_head.h>
 #include <linux/mempool.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f47f1f8ac44b..5d1b6762f108 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -36,8 +36,7 @@
 #include <linux/blkdev.h>
 #include <linux/sysctl.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
-#include <linux/buffer_head.h> /* for invalidate_bdev */
+#include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/ctype.h>
 #include <linux/string.h>
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index b78f23169d4e..ebeabc727f70 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -14,7 +14,6 @@
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/mtd/mtd.h>
-#include <linux/buffer_head.h>
 #include <linux/mutex.h>
 #include <linux/mount.h>
 #include <linux/slab.h>
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 65894f05a801..2de5b60ee8c8 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -17,7 +17,6 @@
 #include <linux/ctype.h>
 #include <linux/major.h>
 #include <linux/slab.h>
-#include <linux/buffer_head.h>
 #include <linux/hdreg.h>
 #include <linux/async.h>
 #include <linux/mutex.h>
diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index 6803b1e26ecc..92d24d6dcb39 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -16,7 +16,6 @@
 #include <linux/genhd.h>
 #include <linux/kernel.h>
 #include <linux/blkdev.h>
-#include <linux/buffer_head.h>
 #include <asm/unaligned.h>
 
 #include <scsi/scsicam.h>
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 43db715f1502..7867b7c4538e 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -32,7 +32,6 @@
 #include <linux/module.h>
 #include <linux/suspend.h>
 #include <linux/writeback.h>
-#include <linux/buffer_head.h>		/* for fsync_bdev() */
 #include <linux/swap.h>
 #include <linux/spinlock.h>
 #include <linux/vt_kern.h>
@@ -41,6 +40,7 @@
 #include <linux/oom.h>
 #include <linux/slab.h>
 #include <linux/input.h>
+#include <linux/uaccess.h>
 
 #include <asm/ptrace.h>
 #include <asm/irq_regs.h>
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7866cdd9fe70..69a5b6fbee2b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/blkpg.h>
 #include <linux/buffer_head.h>
+#include <linux/swap.h>
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
 #include <linux/mpage.h>
@@ -25,6 +26,7 @@
 #include <linux/namei.h>
 #include <linux/log2.h>
 #include <linux/kmemleak.h>
+#include <linux/cleancache.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -82,13 +84,35 @@ static sector_t max_block(struct block_device *bdev)
 }
 
 /* Kill _all_ buffers and pagecache , dirty or not.. */
-static void kill_bdev(struct block_device *bdev)
+void kill_bdev(struct block_device *bdev)
 {
-	if (bdev->bd_inode->i_mapping->nrpages == 0)
+	struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+	if (mapping->nrpages == 0)
 		return;
+
 	invalidate_bh_lrus();
-	truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
+	truncate_inode_pages(mapping, 0);
 }	
+EXPORT_SYMBOL(kill_bdev);
+
+/* Invalidate clean unused buffers and pagecache. */
+void invalidate_bdev(struct block_device *bdev)
+{
+	struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+	if (mapping->nrpages == 0)
+		return;
+
+	invalidate_bh_lrus();
+	lru_add_drain_all();	/* make sure all lru add caches are flushed */
+	invalidate_mapping_pages(mapping, 0, -1);
+	/* 99% of the time, we don't need to flush the cleancache on the bdev.
+	 * But, for the strange corners, lets be cautious
+	 */
+	cleancache_flush_inode(mapping);
+}
+EXPORT_SYMBOL(invalidate_bdev);
 
 int set_blocksize(struct block_device *bdev, int size)
 {
diff --git a/fs/buffer.c b/fs/buffer.c
index 19d8eb7fdc81..1a30db77af32 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,7 +41,6 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
-#include <linux/cleancache.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 
@@ -231,55 +230,6 @@ out:
 	return ret;
 }
 
-/* If invalidate_buffers() will trash dirty buffers, it means some kind
-   of fs corruption is going on. Trashing dirty data always imply losing
-   information that was supposed to be just stored on the physical layer
-   by the user.
-
-   Thus invalidate_buffers in general usage is not allwowed to trash
-   dirty buffers. For example ioctl(FLSBLKBUF) expects dirty data to
-   be preserved.  These buffers are simply skipped.
-  
-   We also skip buffers which are still in use.  For example this can
-   happen if a userspace program is reading the block device.
-
-   NOTE: In the case where the user removed a removable-media-disk even if
-   there's still dirty data not synced on disk (due a bug in the device driver
-   or due an error of the user), by not destroying the dirty buffers we could
-   generate corruption also on the next media inserted, thus a parameter is
-   necessary to handle this case in the most safe way possible (trying
-   to not corrupt also the new disk inserted with the data belonging to
-   the old now corrupted disk). Also for the ramdisk the natural thing
-   to do in order to release the ramdisk memory is to destroy dirty buffers.
-
-   These are two special cases. Normal usage imply the device driver
-   to issue a sync on the device (without waiting I/O completion) and
-   then an invalidate_buffers call that doesn't trash dirty buffers.
-
-   For handling cache coherency with the blkdev pagecache the 'update' case
-   is been introduced. It is needed to re-read from disk any pinned
-   buffer. NOTE: re-reading from disk is destructive so we can do it only
-   when we assume nobody is changing the buffercache under our I/O and when
-   we think the disk contains more recent information than the buffercache.
-   The update == 1 pass marks the buffers we need to update, the update == 2
-   pass does the actual I/O. */
-void invalidate_bdev(struct block_device *bdev)
-{
-	struct address_space *mapping = bdev->bd_inode->i_mapping;
-
-	if (mapping->nrpages == 0)
-		return;
-
-	invalidate_bh_lrus();
-	lru_add_drain_all();	/* make sure all lru add caches are flushed */
-	invalidate_mapping_pages(mapping, 0, -1);
-	/* 99% of the time, we don't need to flush the cleancache on the bdev.
-	 * But, for the strange corners, lets be cautious
-	 */
-	cleancache_flush_inode(mapping);
-}
-EXPORT_SYMBOL(invalidate_bdev);
-
 /*
  * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
  */
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 1064805e653b..67bef6d01484 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -11,7 +11,6 @@
 
 #include <linux/slab.h>
 #include <linux/mount.h>
-#include <linux/buffer_head.h>
 #include "internal.h"
 
 #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 739fb59bcdc2..c37adb222113 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -20,7 +20,6 @@
 #include <linux/cramfs_fs.h>
 #include <linux/slab.h>
 #include <linux/cramfs_fs_sb.h>
-#include <linux/buffer_head.h>
 #include <linux/vfs.h>
 #include <linux/mutex.h>
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 517f211a3bd4..80a4574028f1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,7 +25,6 @@
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
-#include <linux/buffer_head.h>
 #include <linux/tracepoint.h>
 #include "internal.h"
 
diff --git a/fs/libfs.c b/fs/libfs.c
index f6d411eef1e7..5b2dbb3ba4fc 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -12,7 +12,7 @@
 #include <linux/mutex.h>
 #include <linux/exportfs.h>
 #include <linux/writeback.h>
-#include <linux/buffer_head.h>
+#include <linux/buffer_head.h> /* sync_mapping_buffers */
 
 #include <asm/uaccess.h>
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5b572c89e6c4..5d81e92daf83 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -73,7 +73,6 @@
 #include <linux/security.h>
 #include <linux/kmod.h>
 #include <linux/namei.h>
-#include <linux/buffer_head.h>
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include "../internal.h" /* ugh */
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 35f4b0ecdeb3..7898cd688a00 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -13,7 +13,6 @@
 #include <linux/kernel.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
-#include <linux/buffer_head.h>
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/types.h>
diff --git a/fs/splice.c b/fs/splice.c
index fa2defa8afcf..1ec0493266b3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -25,7 +25,6 @@
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
-#include <linux/buffer_head.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/uio.h>
diff --git a/fs/sync.c b/fs/sync.c
index 101b8ef901d7..f3501ef39235 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,7 +14,6 @@
 #include <linux/linkage.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
-#include <linux/buffer_head.h>
 #include <linux/backing-dev.h>
 #include "internal.h"
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cec429d76ab0..e853ba5eddd4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2092,6 +2092,7 @@ extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern void invalidate_bdev(struct block_device *);
 extern int sync_blockdev(struct block_device *bdev);
+extern void kill_bdev(struct block_device *);
 extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
@@ -2099,6 +2100,7 @@ extern int fsync_bdev(struct block_device *);
 #else
 static inline void bd_forget(struct inode *inode) {}
 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
+static inline void kill_bdev(struct block_device *bdev) {}
 static inline void invalidate_bdev(struct block_device *bdev) {}
 
 static inline struct super_block *freeze_bdev(struct block_device *sb)
@@ -2415,6 +2417,7 @@ extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos);
 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
 			int datasync);
+extern void block_sync_page(struct page *page);
 
 /* fs/splice.c */
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 11a594c4ba25..3739ecced085 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -18,7 +18,6 @@
 #include <linux/bitops.h>
 #include <linux/genhd.h>
 #include <linux/device.h>
-#include <linux/buffer_head.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/swap.h>
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 50f08241f981..8616ef3025a4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -32,7 +32,7 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
-#include <linux/buffer_head.h>
+#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
 #include <linux/pagevec.h>
 #include <trace/events/writeback.h>
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 78cc4d1f6cce..ea6b32d61873 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -13,7 +13,6 @@
 #include <linux/swapops.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/buffer_head.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
 #include <linux/migrate.h>
-- 
cgit v1.2.3


From 8208a22bb8bd3c52ef634b4ff194f14892ab1713 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Jul 2011 17:32:17 -0400
Subject: switch sys_mknodat(2) to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               | 4 ++--
 include/linux/syscalls.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 5008f01787f5..f6b3c73e862c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2489,7 +2489,7 @@ static int may_mknod(mode_t mode)
 	}
 }
 
-SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
+SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
 		unsigned, dev)
 {
 	struct dentry *dentry;
@@ -2536,7 +2536,7 @@ out_dput:
 	return error;
 }
 
-SYSCALL_DEFINE3(mknod, const char __user *, filename, int, mode, unsigned, dev)
+SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
 {
 	return sys_mknodat(AT_FDCWD, filename, mode, dev);
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 86a24b1166d1..b3c16d8a6383 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -475,7 +475,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len,
 asmlinkage long sys_pivot_root(const char __user *new_root,
 				const char __user *put_old);
 asmlinkage long sys_chroot(const char __user *filename);
-asmlinkage long sys_mknod(const char __user *filename, int mode,
+asmlinkage long sys_mknod(const char __user *filename, umode_t mode,
 				unsigned dev);
 asmlinkage long sys_link(const char __user *oldname,
 				const char __user *newname);
@@ -755,7 +755,7 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
 asmlinkage long sys_spu_create(const char __user *name,
 		unsigned int flags, mode_t mode, int fd);
 
-asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode,
+asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode,
 			    unsigned dev);
 asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, int mode);
 asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag);
-- 
cgit v1.2.3


From 18bb1db3e7607e4a997d50991a6f9fa5b0f8722c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 01:41:39 -0400
Subject: switch vfs_mkdir() and ->mkdir() to umode_t

vfs_mkdir() gets int, but immediately drops everything that might not
fit into umode_t and that's the only caller of ->mkdir()...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking | 2 +-
 Documentation/filesystems/vfs.txt | 2 +-
 drivers/staging/pohmelfs/dir.c    | 2 +-
 fs/9p/vfs_inode.c                 | 2 +-
 fs/9p/vfs_inode_dotl.c            | 2 +-
 fs/affs/affs.h                    | 2 +-
 fs/affs/namei.c                   | 4 ++--
 fs/afs/dir.c                      | 6 +++---
 fs/autofs4/root.c                 | 4 ++--
 fs/bad_inode.c                    | 2 +-
 fs/btrfs/inode.c                  | 2 +-
 fs/ceph/dir.c                     | 4 ++--
 fs/cifs/cifsfs.h                  | 2 +-
 fs/cifs/inode.c                   | 4 ++--
 fs/coda/dir.c                     | 4 ++--
 fs/configfs/dir.c                 | 2 +-
 fs/ecryptfs/inode.c               | 2 +-
 fs/exofs/namei.c                  | 2 +-
 fs/ext2/namei.c                   | 2 +-
 fs/ext3/namei.c                   | 2 +-
 fs/ext4/namei.c                   | 2 +-
 fs/fat/namei_msdos.c              | 2 +-
 fs/fat/namei_vfat.c               | 2 +-
 fs/fuse/dir.c                     | 2 +-
 fs/gfs2/inode.c                   | 2 +-
 fs/hfs/dir.c                      | 2 +-
 fs/hfsplus/dir.c                  | 2 +-
 fs/hostfs/hostfs_kern.c           | 2 +-
 fs/hpfs/namei.c                   | 2 +-
 fs/hugetlbfs/inode.c              | 2 +-
 fs/jffs2/dir.c                    | 4 ++--
 fs/jfs/namei.c                    | 2 +-
 fs/logfs/dir.c                    | 2 +-
 fs/minix/namei.c                  | 2 +-
 fs/namei.c                        | 2 +-
 fs/ncpfs/dir.c                    | 4 ++--
 fs/nfs/dir.c                      | 4 ++--
 fs/nilfs2/namei.c                 | 2 +-
 fs/ocfs2/dlmfs/dlmfs.c            | 2 +-
 fs/ocfs2/namei.c                  | 2 +-
 fs/omfs/dir.c                     | 2 +-
 fs/ramfs/inode.c                  | 2 +-
 fs/reiserfs/namei.c               | 2 +-
 fs/reiserfs/xattr.c               | 2 +-
 fs/sysv/namei.c                   | 2 +-
 fs/ubifs/dir.c                    | 4 ++--
 fs/udf/namei.c                    | 2 +-
 fs/ufs/namei.c                    | 2 +-
 fs/xfs/xfs_iops.c                 | 2 +-
 include/linux/fs.h                | 4 ++--
 include/linux/security.h          | 4 ++--
 kernel/cgroup.c                   | 4 ++--
 mm/shmem.c                        | 2 +-
 security/capability.c             | 2 +-
 security/security.c               | 2 +-
 security/selinux/hooks.c          | 2 +-
 56 files changed, 70 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index d819ba16a0c7..6c7676d9c0ea 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -43,7 +43,7 @@ ata *);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
-	int (*mkdir) (struct inode *,struct dentry *,int);
+	int (*mkdir) (struct inode *,struct dentry *,umode_t);
 	int (*rmdir) (struct inode *,struct dentry *);
 	int (*mknod) (struct inode *,struct dentry *,int,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 43cbd0821721..0c147c79cdd8 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -346,7 +346,7 @@ struct inode_operations {
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
-	int (*mkdir) (struct inode *,struct dentry *,int);
+	int (*mkdir) (struct inode *,struct dentry *,umode_t);
 	int (*rmdir) (struct inode *,struct dentry *);
 	int (*mknod) (struct inode *,struct dentry *,int,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
diff --git a/drivers/staging/pohmelfs/dir.c b/drivers/staging/pohmelfs/dir.c
index 7598e77672a5..d3ad4dde991f 100644
--- a/drivers/staging/pohmelfs/dir.c
+++ b/drivers/staging/pohmelfs/dir.c
@@ -667,7 +667,7 @@ static int pohmelfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	return pohmelfs_create_entry(dir, dentry, 0, mode);
 }
 
-static int pohmelfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int pohmelfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int err;
 
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 2310cc9eb402..3e54900f3b7e 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -785,7 +785,7 @@ error:
  *
  */
 
-static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int err;
 	u32 perm;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0b5745e21946..87e46b19b21b 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -395,7 +395,7 @@ err_clunk_old_fid:
  */
 
 static int v9fs_vfs_mkdir_dotl(struct inode *dir,
-			       struct dentry *dentry, int omode)
+			       struct dentry *dentry, umode_t omode)
 {
 	int err;
 	struct v9fs_session_info *v9ses;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c2b9c79eb64e..8abcad7c935f 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -157,7 +157,7 @@ extern int	affs_hash_name(struct super_block *sb, const u8 *name, unsigned int l
 extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *);
 extern int	affs_unlink(struct inode *dir, struct dentry *dentry);
 extern int	affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *);
-extern int	affs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+extern int	affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 extern int	affs_rmdir(struct inode *dir, struct dentry *dentry);
 extern int	affs_link(struct dentry *olddentry, struct inode *dir,
 			  struct dentry *dentry);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 780a11dc6318..7bb7660f805d 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -285,12 +285,12 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
 }
 
 int
-affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode		*inode;
 	int			 error;
 
-	pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%o)\n",dir->i_ino,
+	pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%ho)\n",dir->i_ino,
 		 (int)dentry->d_name.len,dentry->d_name.name,mode);
 
 	inode = affs_new_inode(dir);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 1b0b19550015..e6ea58abde3b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -30,7 +30,7 @@ static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
 		      struct nameidata *nd);
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 static int afs_rmdir(struct inode *dir, struct dentry *dentry);
 static int afs_unlink(struct inode *dir, struct dentry *dentry);
 static int afs_link(struct dentry *from, struct inode *dir,
@@ -764,7 +764,7 @@ static void afs_d_release(struct dentry *dentry)
 /*
  * create a directory on an AFS filesystem
  */
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct afs_file_status status;
 	struct afs_callback cb;
@@ -777,7 +777,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	dvnode = AFS_FS_I(dir);
 
-	_enter("{%x:%u},{%s},%o",
+	_enter("{%x:%u},{%s},%ho",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
 
 	ret = -ENAMETOOLONG;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index f55ae23b137e..75e5f1c8e028 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -26,7 +26,7 @@
 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
 static int autofs4_dir_unlink(struct inode *,struct dentry *);
 static int autofs4_dir_rmdir(struct inode *,struct dentry *);
-static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
+static int autofs4_dir_mkdir(struct inode *,struct dentry *,umode_t);
 static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
 #ifdef CONFIG_COMPAT
 static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
@@ -699,7 +699,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 	return 0;
 }
 
-static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 9205cf25f1c6..5a2738c1f315 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -202,7 +202,7 @@ static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
 }
 
 static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
-			int mode)
+			umode_t mode)
 {
 	return -EIO;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f8ff9738558a..e30de56e6b62 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4792,7 +4792,7 @@ fail:
 	return err;
 }
 
-static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode *inode = NULL;
 	struct btrfs_trans_handle *trans;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 98954003a8d3..96141ae3d8be 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -753,7 +753,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 	return err;
 }
 
-static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -767,7 +767,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
 		     dentry->d_name.len, dentry->d_name.name, dentry);
 	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
-		dout("mkdir dir %p dn %p mode 0%o\n", dir, dentry, mode);
+		dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
 		op = CEPH_MDS_OP_MKDIR;
 	} else {
 		goto out;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 30ff56005d8f..add64454fd51 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -51,7 +51,7 @@ extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
 extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
 extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t);
-extern int cifs_mkdir(struct inode *, struct dentry *, int);
+extern int cifs_mkdir(struct inode *, struct dentry *, umode_t);
 extern int cifs_rmdir(struct inode *, struct dentry *);
 extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
 		       struct dentry *);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index e851d5b8931e..a5f54b7d9822 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1264,7 +1264,7 @@ unlink_out:
 	return rc;
 }
 
-int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
+int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
 {
 	int rc = 0, tmprc;
 	int xid;
@@ -1275,7 +1275,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 	struct inode *newinode = NULL;
 	struct cifs_fattr fattr;
 
-	cFYI(1, "In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode);
+	cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode);
 
 	cifs_sb = CIFS_SB(inode->i_sb);
 	tlink = cifs_sb_tlink(cifs_sb);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 28e7e135cfab..a74ae6fcfb7e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -37,7 +37,7 @@ static int coda_link(struct dentry *old_dentry, struct inode *dir_inode,
 static int coda_unlink(struct inode *dir_inode, struct dentry *entry);
 static int coda_symlink(struct inode *dir_inode, struct dentry *entry,
 			const char *symname);
-static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, int mode);
+static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, umode_t mode);
 static int coda_rmdir(struct inode *dir_inode, struct dentry *entry);
 static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, 
                        struct inode *new_inode, struct dentry *new_dentry);
@@ -223,7 +223,7 @@ err_out:
 	return error;
 }
 
-static int coda_mkdir(struct inode *dir, struct dentry *de, int mode)
+static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode)
 {
 	struct inode *inode;
 	struct coda_vattr attrs;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 1c5296911104..5ddd7ebd9dcd 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1170,7 +1170,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
 }
 EXPORT_SYMBOL(configfs_undepend_item);
 
-static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int ret = 0;
 	int module_got = 0;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 32f90a3ae63e..ebf8726482b6 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -559,7 +559,7 @@ out_lock:
 	return rc;
 }
 
-static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int rc;
 	struct dentry *lower_dentry;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b54c43775f17..ff1c8286cd69 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
 	return exofs_add_nondir(dentry, inode);
 }
 
-static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode *inode;
 	int err = -EMLINK;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 761fde807fc9..e3f3672b2020 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -214,7 +214,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 	return err;
 }
 
-static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
 	struct inode * inode;
 	int err = -EMLINK;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 642dc6d66dfd..08ecb53a33ea 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1768,7 +1768,7 @@ retry:
 	return err;
 }
 
-static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
 	handle_t *handle;
 	struct inode * inode;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index aa4c782c9dd7..e506746724cf 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1806,7 +1806,7 @@ retry:
 	return err;
 }
 
-static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	handle_t *handle;
 	struct inode *inode;
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 216b419f30e2..d1f53cae897c 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -346,7 +346,7 @@ out:
 }
 
 /***** Make a directory */
-static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct super_block *sb = dir->i_sb;
 	struct fat_slot_info sinfo;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index a87a65663c25..fde2eda6332e 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -870,7 +870,7 @@ out:
 	return err;
 }
 
-static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct super_block *sb = dir->i_sb;
 	struct inode *inode;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 9f63e493a9b6..4848a1acb3bb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -585,7 +585,7 @@ static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
 	return fuse_mknod(dir, entry, mode, 0);
 }
 
-static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
+static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
 {
 	struct fuse_mkdir_in inarg;
 	struct fuse_conn *fc = get_fuse_conn(dir);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index cfd4959b218c..eecfc39c07e6 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1129,7 +1129,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
  * Returns: errno
  */
 
-static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0);
 }
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index bce4eef91a06..06dc161e911c 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -216,7 +216,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
  * in a directory, given the inode for the parent directory and the
  * name (and its length) of the new directory.
  */
-static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode *inode;
 	int res;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4536cd3f15ae..ed321f0384d7 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -459,7 +459,7 @@ static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode,
 	return hfsplus_mknod(dir, dentry, mode, 0);
 }
 
-static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0);
 }
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 343ea632b97c..d35240fbbd73 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -676,7 +676,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
 	return err;
 }
 
-int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
+int hostfs_mkdir(struct inode *ino, struct dentry *dentry, umode_t mode)
 {
 	char *file;
 	int err;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index ea91fcb0ef9b..a2f89f2b9503 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -8,7 +8,7 @@
 #include <linux/sched.h>
 #include "hpfs_fn.h"
 
-static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9c4ec538725b..ba269706e798 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -523,7 +523,7 @@ static int hugetlbfs_mknod(struct inode *dir,
 	return error;
 }
 
-static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
 	if (!retval)
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index be6169bd8acd..5dc458f19bc9 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -29,7 +29,7 @@ static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
 static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
 static int jffs2_unlink (struct inode *,struct dentry *);
 static int jffs2_symlink (struct inode *,struct dentry *,const char *);
-static int jffs2_mkdir (struct inode *,struct dentry *,int);
+static int jffs2_mkdir (struct inode *,struct dentry *,umode_t);
 static int jffs2_rmdir (struct inode *,struct dentry *);
 static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t);
 static int jffs2_rename (struct inode *, struct dentry *,
@@ -450,7 +450,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 }
 
 
-static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
+static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode)
 {
 	struct jffs2_inode_info *f, *dir_f;
 	struct jffs2_sb_info *c;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a112ad96e474..17ea85835715 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -205,7 +205,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
  * note:
  * EACCESS: user needs search+write permission on the parent directory
  */
-static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
+static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 {
 	int rc = 0;
 	tid_t tid;		/* transaction id */
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index b7d7f67cee5a..25c5cbf8c123 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -482,7 +482,7 @@ out:
 	return ret;
 }
 
-static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode *inode;
 
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 6e6777f1b4b2..0e7a1a22e554 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -103,7 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
 	return add_nondir(dentry, inode);
 }
 
-static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
+static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode * inode;
 	int err = -EMLINK;
diff --git a/fs/namei.c b/fs/namei.c
index f6b3c73e862c..443c703249b3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2541,7 +2541,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
 	return sys_mknodat(AT_FDCWD, filename, mode, dev);
 }
 
-int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int error = may_create(dir, dentry);
 
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 9c51f621e901..dfb51f084407 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -33,7 +33,7 @@ static int ncp_readdir(struct file *, void *, filldir_t);
 static int ncp_create(struct inode *, struct dentry *, int, struct nameidata *);
 static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *);
 static int ncp_unlink(struct inode *, struct dentry *);
-static int ncp_mkdir(struct inode *, struct dentry *, int);
+static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
 static int ncp_rmdir(struct inode *, struct dentry *);
 static int ncp_rename(struct inode *, struct dentry *,
 	  	      struct inode *, struct dentry *);
@@ -985,7 +985,7 @@ static int ncp_create(struct inode *dir, struct dentry *dentry, int mode,
 	return ncp_create_new(dir, dentry, mode, 0, 0);
 }
 
-static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct ncp_entry_info finfo;
 	struct ncp_server *server = NCP_SERVER(dir);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 23be134b3193..5d67d92a4248 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -48,7 +48,7 @@ static int nfs_closedir(struct inode *, struct file *);
 static int nfs_readdir(struct file *, void *, filldir_t);
 static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
 static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
-static int nfs_mkdir(struct inode *, struct dentry *, int);
+static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
 static int nfs_rmdir(struct inode *, struct dentry *);
 static int nfs_unlink(struct inode *, struct dentry *);
 static int nfs_symlink(struct inode *, struct dentry *, const char *);
@@ -1719,7 +1719,7 @@ out_err:
 /*
  * See comments for nfs_proc_create regarding failed operations.
  */
-static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct iattr attr;
 	int error;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 768982de10e4..e5e7311f1b92 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -213,7 +213,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
 	return err;
 }
 
-static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode *inode;
 	struct nilfs_transaction_info ti;
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index a9f007de1da8..77c8d8069461 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -488,7 +488,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
 /* SMP-safe */
 static int dlmfs_mkdir(struct inode * dir,
 		       struct dentry * dentry,
-		       int mode)
+		       umode_t mode)
 {
 	int status;
 	struct inode *inode = NULL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index a8b2bfea574e..c779f8bfc8a6 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -602,7 +602,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
 static int ocfs2_mkdir(struct inode *dir,
 		       struct dentry *dentry,
-		       int mode)
+		       umode_t mode)
 {
 	int ret;
 
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 98e544274390..667dc7ff28c0 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -279,7 +279,7 @@ out_free_inode:
 	return err;
 }
 
-static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	return omfs_add_node(dir, dentry, mode | S_IFDIR);
 }
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 462ceb38fec6..61972bee0561 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -106,7 +106,7 @@ ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	return error;
 }
 
-static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
 	int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0);
 	if (!retval)
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 80058e8ce361..763239a7e8dd 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -721,7 +721,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	return retval;
 }
 
-static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int retval;
 	struct inode *inode;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 6bc346c160e7..c24deda8a8bc 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -66,7 +66,7 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 }
 #endif
 
-static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
 	return dir->i_op->mkdir(dir, dentry, mode);
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index e474fbcf8bde..3368425a4ce2 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -131,7 +131,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
 	return add_nondir(dentry, inode);
 }
 
-static int sysv_mkdir(struct inode * dir, struct dentry *dentry, int mode)
+static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode * inode;
 	int err = -EMLINK;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 683492043317..f5102f368160 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -712,7 +712,7 @@ out_cancel:
 	return err;
 }
 
-static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode *inode;
 	struct ubifs_inode *dir_ui = ubifs_inode(dir);
@@ -725,7 +725,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	 * directory inode.
 	 */
 
-	dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
+	dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
 		dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
 
 	err = ubifs_budget_space(c, &req);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 4639e137222f..7f8ee32842be 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -640,7 +640,7 @@ out:
 	return err;
 }
 
-static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode *inode;
 	struct udf_fileident_bh fibh;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 639d49162241..fa743aaa327c 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 	return error;
 }
 
-static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
 	struct inode * inode;
 	int err = -EMLINK;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 23ce927973a4..99b324d43c98 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -241,7 +241,7 @@ STATIC int
 xfs_vn_mkdir(
 	struct inode	*dir,
 	struct dentry	*dentry,
-	int		mode)
+	umode_t		mode)
 {
 	return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cec429d76ab0..3f7bd8b12e37 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1517,7 +1517,7 @@ extern void unlock_super(struct super_block *);
  * VFS helper functions..
  */
 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
-extern int vfs_mkdir(struct inode *, struct dentry *, int);
+extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
@@ -1623,7 +1623,7 @@ struct inode_operations {
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
-	int (*mkdir) (struct inode *,struct dentry *,int);
+	int (*mkdir) (struct inode *,struct dentry *,umode_t);
 	int (*rmdir) (struct inode *,struct dentry *);
 	int (*mknod) (struct inode *,struct dentry *,int,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
diff --git a/include/linux/security.h b/include/linux/security.h
index e8c619d39291..16cbc58cb13b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1453,7 +1453,7 @@ struct security_operations {
 	int (*inode_unlink) (struct inode *dir, struct dentry *dentry);
 	int (*inode_symlink) (struct inode *dir,
 			      struct dentry *dentry, const char *old_name);
-	int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, int mode);
+	int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, umode_t mode);
 	int (*inode_rmdir) (struct inode *dir, struct dentry *dentry);
 	int (*inode_mknod) (struct inode *dir, struct dentry *dentry,
 			    int mode, dev_t dev);
@@ -1722,7 +1722,7 @@ int security_inode_link(struct dentry *old_dentry, struct inode *dir,
 int security_inode_unlink(struct inode *dir, struct dentry *dentry);
 int security_inode_symlink(struct inode *dir, struct dentry *dentry,
 			   const char *old_name);
-int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 int security_inode_rmdir(struct inode *dir, struct dentry *dentry);
 int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
 int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a184470cf9b5..b37a0ea55114 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -760,7 +760,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
  * -> cgroup_mkdir.
  */
 
-static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
 static int cgroup_populate_dir(struct cgroup *cgrp);
@@ -3846,7 +3846,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	return err;
 }
 
-static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct cgroup *c_parent = dentry->d_parent->d_fsdata;
 
diff --git a/mm/shmem.c b/mm/shmem.c
index c58594c06569..b8a8ddf069d0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1489,7 +1489,7 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	return error;
 }
 
-static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int error;
 
diff --git a/security/capability.c b/security/capability.c
index 2984ea4f776f..ddd17892826a 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -148,7 +148,7 @@ static int cap_inode_symlink(struct inode *inode, struct dentry *dentry,
 }
 
 static int cap_inode_mkdir(struct inode *inode, struct dentry *dentry,
-			   int mask)
+			   umode_t mask)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index e2f684aeb70c..be49eb5768bc 100644
--- a/security/security.c
+++ b/security/security.c
@@ -506,7 +506,7 @@ int security_inode_symlink(struct inode *dir, struct dentry *dentry,
 	return security_ops->inode_symlink(dir, dentry, old_name);
 }
 
-int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	if (unlikely(IS_PRIVATE(dir)))
 		return 0;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1126c10a5e82..ad74ad24ce2a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2618,7 +2618,7 @@ static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, const
 	return may_create(dir, dentry, SECCLASS_LNK_FILE);
 }
 
-static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, int mask)
+static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mask)
 {
 	return may_create(dir, dentry, SECCLASS_DIR);
 }
-- 
cgit v1.2.3


From 4acdaf27ebe2034c342f3be57ef49aed1ad885ef Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 01:42:34 -0400
Subject: switch ->create() to umode_t

vfs_create() ignores everything outside of 16bit subset of its
mode argument; switching it to umode_t is obviously equivalent
and it's the only caller of the method

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  2 +-
 drivers/staging/pohmelfs/dir.c    |  2 +-
 fs/9p/vfs_inode.c                 |  2 +-
 fs/9p/vfs_inode_dotl.c            |  4 ++--
 fs/affs/affs.h                    |  2 +-
 fs/affs/namei.c                   |  4 ++--
 fs/afs/dir.c                      |  6 +++---
 fs/bad_inode.c                    |  2 +-
 fs/bfs/dir.c                      |  2 +-
 fs/btrfs/inode.c                  |  2 +-
 fs/ceph/dir.c                     |  2 +-
 fs/cifs/cifsfs.h                  |  2 +-
 fs/cifs/dir.c                     |  2 +-
 fs/coda/dir.c                     |  4 ++--
 fs/ecryptfs/inode.c               |  2 +-
 fs/exofs/namei.c                  |  2 +-
 fs/ext2/namei.c                   |  2 +-
 fs/ext3/namei.c                   |  2 +-
 fs/ext4/namei.c                   |  2 +-
 fs/fat/namei_msdos.c              |  2 +-
 fs/fat/namei_vfat.c               |  2 +-
 fs/fuse/dir.c                     |  2 +-
 fs/gfs2/inode.c                   |  2 +-
 fs/hfs/dir.c                      |  2 +-
 fs/hfsplus/dir.c                  |  2 +-
 fs/hostfs/hostfs_kern.c           |  2 +-
 fs/hpfs/namei.c                   |  2 +-
 fs/hugetlbfs/inode.c              |  2 +-
 fs/jffs2/dir.c                    |  6 +++---
 fs/jfs/namei.c                    |  2 +-
 fs/logfs/dir.c                    |  2 +-
 fs/minix/namei.c                  |  2 +-
 fs/namei.c                        |  2 +-
 fs/ncpfs/dir.c                    |  4 ++--
 fs/nfs/dir.c                      | 12 ++++++------
 fs/nilfs2/namei.c                 |  2 +-
 fs/ocfs2/dlmfs/dlmfs.c            |  2 +-
 fs/ocfs2/namei.c                  |  2 +-
 fs/omfs/dir.c                     |  2 +-
 fs/ramfs/inode.c                  |  2 +-
 fs/reiserfs/namei.c               |  2 +-
 fs/sysv/namei.c                   |  2 +-
 fs/ubifs/dir.c                    |  4 ++--
 fs/udf/namei.c                    |  2 +-
 fs/ufs/namei.c                    |  2 +-
 fs/xfs/xfs_iops.c                 |  2 +-
 include/linux/fs.h                |  4 ++--
 include/linux/security.h          |  6 +++---
 ipc/mqueue.c                      |  4 ++--
 mm/shmem.c                        |  2 +-
 security/capability.c             |  2 +-
 security/security.c               |  2 +-
 security/selinux/hooks.c          |  2 +-
 54 files changed, 72 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 6c7676d9c0ea..38d00c8898b9 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -37,7 +37,7 @@ d_manage:	no		no		yes (ref-walk)	maybe
 
 --------------------------- inode_operations --------------------------- 
 prototypes:
-	int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
+	int (*create) (struct inode *,struct dentry *,umode_t, struct nameidata *);
 	struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid
 ata *);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 0c147c79cdd8..e7b900bc6285 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -341,7 +341,7 @@ This describes how the VFS can manipulate an inode in your
 filesystem. As of kernel 2.6.22, the following members are defined:
 
 struct inode_operations {
-	int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
+	int (*create) (struct inode *,struct dentry *, umode_t, struct nameidata *);
 	struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
diff --git a/drivers/staging/pohmelfs/dir.c b/drivers/staging/pohmelfs/dir.c
index d3ad4dde991f..c33e959b6efe 100644
--- a/drivers/staging/pohmelfs/dir.c
+++ b/drivers/staging/pohmelfs/dir.c
@@ -661,7 +661,7 @@ static int pohmelfs_create_entry(struct inode *dir, struct dentry *dentry, u64 s
 /*
  * VFS create and mkdir callbacks.
  */
-static int pohmelfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int pohmelfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	return pohmelfs_create_entry(dir, dentry, 0, mode);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 3e54900f3b7e..15cd5cef4485 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -702,7 +702,7 @@ error:
  */
 
 static int
-v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
+v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	int err;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 87e46b19b21b..c4731381f0c5 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -253,7 +253,7 @@ int v9fs_open_to_dotl_flags(int flags)
  */
 
 static int
-v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 		struct nameidata *nd)
 {
 	int err = 0;
@@ -284,7 +284,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 
 	name = (char *) dentry->d_name.name;
 	P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
-			"mode:0x%x\n", name, flags, omode);
+			"mode:0x%hx\n", name, flags, omode);
 
 	dfid = v9fs_fid_lookup(dentry->d_parent);
 	if (IS_ERR(dfid)) {
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 8abcad7c935f..9cad9b4a9af7 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -156,7 +156,7 @@ extern void	affs_free_bitmap(struct super_block *sb);
 extern int	affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
 extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *);
 extern int	affs_unlink(struct inode *dir, struct dentry *dentry);
-extern int	affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *);
+extern int	affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *);
 extern int	affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 extern int	affs_rmdir(struct inode *dir, struct dentry *dentry);
 extern int	affs_link(struct dentry *olddentry, struct inode *dir,
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 7bb7660f805d..47806940aac0 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -255,13 +255,13 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
 }
 
 int
-affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
 {
 	struct super_block *sb = dir->i_sb;
 	struct inode	*inode;
 	int		 error;
 
-	pr_debug("AFFS: create(%lu,\"%.*s\",0%o)\n",dir->i_ino,(int)dentry->d_name.len,
+	pr_debug("AFFS: create(%lu,\"%.*s\",0%ho)\n",dir->i_ino,(int)dentry->d_name.len,
 		 dentry->d_name.name,mode);
 
 	inode = affs_new_inode(dir);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e6ea58abde3b..e22dc4b4a503 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -28,7 +28,7 @@ static int afs_d_delete(const struct dentry *dentry);
 static void afs_d_release(struct dentry *dentry);
 static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
-static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      struct nameidata *nd);
 static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 static int afs_rmdir(struct inode *dir, struct dentry *dentry);
@@ -948,7 +948,7 @@ error:
 /*
  * create a regular file on an AFS filesystem
  */
-static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      struct nameidata *nd)
 {
 	struct afs_file_status status;
@@ -962,7 +962,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
 
 	dvnode = AFS_FS_I(dir);
 
-	_enter("{%x:%u},{%s},%o,",
+	_enter("{%x:%u},{%s},%ho,",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
 
 	ret = -ENAMETOOLONG;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 5a2738c1f315..8087fbc35f43 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -173,7 +173,7 @@ static const struct file_operations bad_file_ops =
 };
 
 static int bad_inode_create (struct inode *dir, struct dentry *dentry,
-		int mode, struct nameidata *nd)
+		umode_t mode, struct nameidata *nd)
 {
 	return -EIO;
 }
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 9cc074019479..d12c7966db27 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -84,7 +84,7 @@ const struct file_operations bfs_dir_operations = {
 
 extern void dump_imap(const char *, struct super_block *);
 
-static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 						struct nameidata *nd)
 {
 	int err;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e30de56e6b62..19630aacb320 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4665,7 +4665,7 @@ out_unlock:
 }
 
 static int btrfs_create(struct inode *dir, struct dentry *dentry,
-			int mode, struct nameidata *nd)
+			umode_t mode, struct nameidata *nd)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 96141ae3d8be..9848d686591c 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -699,7 +699,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 	return err;
 }
 
-static int ceph_create(struct inode *dir, struct dentry *dentry, int mode,
+static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		       struct nameidata *nd)
 {
 	dout("create in dir %p dentry %p name '%.*s'\n",
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index add64454fd51..358724df558b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -44,7 +44,7 @@ extern const struct address_space_operations cifs_addr_ops_smallbuf;
 /* Functions related to inodes */
 extern const struct inode_operations cifs_dir_inode_ops;
 extern struct inode *cifs_root_iget(struct super_block *);
-extern int cifs_create(struct inode *, struct dentry *, int,
+extern int cifs_create(struct inode *, struct dentry *, umode_t,
 		       struct nameidata *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 				  struct nameidata *);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index d7eeb9d3ed6f..2dc8be86be09 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -136,7 +136,7 @@ cifs_bp_rename_retry:
 /* Inode operations in similar order to how they appear in Linux file fs.h */
 
 int
-cifs_create(struct inode *inode, struct dentry *direntry, int mode,
+cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 		struct nameidata *nd)
 {
 	int rc = -ENOENT;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index a74ae6fcfb7e..83d2fd8ec24b 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -30,7 +30,7 @@
 #include "coda_int.h"
 
 /* dir inode-ops */
-static int coda_create(struct inode *dir, struct dentry *new, int mode, struct nameidata *nd);
+static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd);
 static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd);
 static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, 
 		     struct dentry *entry);
@@ -191,7 +191,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
 }
 
 /* creation routines: create, mknod, mkdir, link, symlink */
-static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd)
+static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, struct nameidata *nd)
 {
 	int error;
 	const char *name=de->d_name.name;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ebf8726482b6..81e6542ab20f 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -267,7 +267,7 @@ out:
  */
 static int
 ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
-		int mode, struct nameidata *nd)
+		umode_t mode, struct nameidata *nd)
 {
 	struct inode *ecryptfs_inode;
 	int rc;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index ff1c8286cd69..58644544849d 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -59,7 +59,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
 	return d_splice_alias(inode, dentry);
 }
 
-static int exofs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 			 struct nameidata *nd)
 {
 	struct inode *inode = exofs_new_inode(dir, mode);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e3f3672b2020..cb759e661b15 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -94,7 +94,7 @@ struct dentry *ext2_get_parent(struct dentry *child)
  * If the create succeeds, we fill in the inode information
  * with d_instantiate(). 
  */
-static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
+static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
 {
 	struct inode *inode;
 
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 08ecb53a33ea..6047d121f537 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1698,7 +1698,7 @@ static int ext3_add_nondir(handle_t *handle,
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	handle_t *handle;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e506746724cf..77306f36a610 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1736,7 +1736,7 @@ static int ext4_add_nondir(handle_t *handle,
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
+static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		       struct nameidata *nd)
 {
 	handle_t *handle;
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index d1f53cae897c..c5938c9084b9 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -264,7 +264,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 }
 
 /***** Create a file */
-static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
+static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 			struct nameidata *nd)
 {
 	struct super_block *sb = dir->i_sb;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index fde2eda6332e..3a444b4e2368 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -781,7 +781,7 @@ error:
 	return ERR_PTR(err);
 }
 
-static int vfat_create(struct inode *dir, struct dentry *dentry, int mode,
+static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		       struct nameidata *nd)
 {
 	struct super_block *sb = dir->i_sb;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4848a1acb3bb..603bb8a9b8ca 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -573,7 +573,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
 	return create_new_entry(fc, req, dir, entry, mode);
 }
 
-static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
+static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
 		       struct nameidata *nd)
 {
 	if (nd) {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index eecfc39c07e6..aadf792be750 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -760,7 +760,7 @@ fail:
  */
 
 static int gfs2_create(struct inode *dir, struct dentry *dentry,
-		       int mode, struct nameidata *nd)
+		       umode_t mode, struct nameidata *nd)
 {
 	int excl = 0;
 	if (nd && (nd->flags & LOOKUP_EXCL))
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 06dc161e911c..62fc14ea4b73 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -186,7 +186,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
  * a directory and return a corresponding inode, given the inode for
  * the directory and the name (and its length) of the new file.
  */
-static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      struct nameidata *nd)
 {
 	struct inode *inode;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index ed321f0384d7..ef6547ca4214 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -453,7 +453,7 @@ out:
 	return res;
 }
 
-static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode,
+static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 			  struct nameidata *nd)
 {
 	return hfsplus_mknod(dir, dentry, mode, 0);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index d35240fbbd73..3a3a530f5bad 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -551,7 +551,7 @@ static int read_name(struct inode *ino, char *name)
 	return 0;
 }
 
-int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
+int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		  struct nameidata *nd)
 {
 	struct inode *inode;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index a2f89f2b9503..769f76c7303a 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -115,7 +115,7 @@ bail:
 	return err;
 }
 
-static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
 {
 	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ba269706e798..57996c3d8d0c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -531,7 +531,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mod
 	return retval;
 }
 
-static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
 {
 	return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 5dc458f19bc9..16a75e9a038d 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -22,7 +22,7 @@
 
 static int jffs2_readdir (struct file *, void *, filldir_t);
 
-static int jffs2_create (struct inode *,struct dentry *,int,
+static int jffs2_create (struct inode *,struct dentry *,umode_t,
 			 struct nameidata *);
 static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
 				    struct nameidata *);
@@ -169,8 +169,8 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
 /***********************************************************************/
 
 
-static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
-			struct nameidata *nd)
+static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
+			umode_t mode, struct nameidata *nd)
 {
 	struct jffs2_raw_inode *ri;
 	struct jffs2_inode_info *f, *dir_f;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 17ea85835715..6c0b1ab8107d 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -72,7 +72,7 @@ static inline void free_ea_wmap(struct inode *inode)
  * RETURN:	Errors from subroutines
  *
  */
-static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
+static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	int rc = 0;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 25c5cbf8c123..a74aa461d53c 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -501,7 +501,7 @@ static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	return __logfs_create(dir, dentry, inode, NULL, 0);
 }
 
-static int logfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	struct inode *inode;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 0e7a1a22e554..c652650bf5a3 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -54,7 +54,7 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_
 	return error;
 }
 
-static int minix_create(struct inode * dir, struct dentry *dentry, int mode,
+static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	return minix_mknod(dir, dentry, mode, 0);
diff --git a/fs/namei.c b/fs/namei.c
index 443c703249b3..05d1c2ceb131 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1976,7 +1976,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 	}
 }
 
-int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
+int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	int error = may_create(dir, dentry);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index dfb51f084407..98d1b8c6fd8c 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -30,7 +30,7 @@ static void ncp_do_readdir(struct file *, void *, filldir_t,
 
 static int ncp_readdir(struct file *, void *, filldir_t);
 
-static int ncp_create(struct inode *, struct dentry *, int, struct nameidata *);
+static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
 static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *);
 static int ncp_unlink(struct inode *, struct dentry *);
 static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
@@ -979,7 +979,7 @@ out:
 	return error;
 }
 
-static int ncp_create(struct inode *dir, struct dentry *dentry, int mode,
+static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	return ncp_create_new(dir, dentry, mode, 0, 0);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5d67d92a4248..7cdee1d4160f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -47,7 +47,7 @@ static int nfs_opendir(struct inode *, struct file *);
 static int nfs_closedir(struct inode *, struct file *);
 static int nfs_readdir(struct file *, void *, filldir_t);
 static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
+static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
 static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
 static int nfs_rmdir(struct inode *, struct dentry *);
 static int nfs_unlink(struct inode *, struct dentry *);
@@ -112,7 +112,7 @@ const struct inode_operations nfs3_dir_inode_operations = {
 #ifdef CONFIG_NFS_V4
 
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
+static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd);
 const struct inode_operations nfs4_dir_inode_operations = {
 	.create		= nfs_open_create,
 	.lookup		= nfs_atomic_lookup,
@@ -1573,8 +1573,8 @@ no_open:
 	return nfs_lookup_revalidate(dentry, nd);
 }
 
-static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
-		struct nameidata *nd)
+static int nfs_open_create(struct inode *dir, struct dentry *dentry,
+		umode_t mode, struct nameidata *nd)
 {
 	struct nfs_open_context *ctx = NULL;
 	struct iattr attr;
@@ -1664,8 +1664,8 @@ out_error:
  * that the operation succeeded on the server, but an error in the
  * reply path made it appear to have failed.
  */
-static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
-		struct nameidata *nd)
+static int nfs_create(struct inode *dir, struct dentry *dentry,
+		umode_t mode, struct nameidata *nd)
 {
 	struct iattr attr;
 	int error;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index e5e7311f1b92..fcd86c38f968 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -84,7 +84,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 			struct nameidata *nd)
 {
 	struct inode *inode;
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 77c8d8069461..ccb33289c29a 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -536,7 +536,7 @@ bail:
 
 static int dlmfs_create(struct inode *dir,
 			struct dentry *dentry,
-			int mode,
+			umode_t mode,
 			struct nameidata *nd)
 {
 	int status = 0;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index c779f8bfc8a6..46f46ffe77c5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -617,7 +617,7 @@ static int ocfs2_mkdir(struct inode *dir,
 
 static int ocfs2_create(struct inode *dir,
 			struct dentry *dentry,
-			int mode,
+			umode_t mode,
 			struct nameidata *nd)
 {
 	int ret;
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 667dc7ff28c0..d82599f49f6d 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -284,7 +284,7 @@ static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	return omfs_add_node(dir, dentry, mode | S_IFDIR);
 }
 
-static int omfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	return omfs_add_node(dir, dentry, mode | S_IFREG);
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 61972bee0561..c2ed2a36094e 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -114,7 +114,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 	return retval;
 }
 
-static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
 {
 	return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 763239a7e8dd..46db3b9fa7cf 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -572,7 +572,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
 	return 0;
 }
 
-static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 			   struct nameidata *nd)
 {
 	int retval;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 3368425a4ce2..d306eebeb6c1 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -80,7 +80,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_
 	return err;
 }
 
-static int sysv_create(struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
+static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
 {
 	return sysv_mknod(dir, dentry, mode, 0);
 }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index f5102f368160..f332878ce4de 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -253,7 +253,7 @@ out:
 	return ERR_PTR(err);
 }
 
-static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 			struct nameidata *nd)
 {
 	struct inode *inode;
@@ -268,7 +268,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
 	 * parent directory inode.
 	 */
 
-	dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
+	dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
 		dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
 
 	err = ubifs_budget_space(c, &req);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 7f8ee32842be..135a4ca01038 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -552,7 +552,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
 	return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
 }
 
-static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
+static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      struct nameidata *nd)
 {
 	struct udf_fileident_bh fibh;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index fa743aaa327c..ba2a9d6c0314 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -70,7 +70,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
  * If the create succeeds, we fill in the inode information
  * with d_instantiate(). 
  */
-static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
+static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	struct inode *inode;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 99b324d43c98..0efa4e51bebf 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -231,7 +231,7 @@ STATIC int
 xfs_vn_create(
 	struct inode	*dir,
 	struct dentry	*dentry,
-	int		mode,
+	umode_t		mode,
 	struct nameidata *nd)
 {
 	return xfs_vn_mknod(dir, dentry, mode, 0);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f7bd8b12e37..e40321a6e239 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1516,7 +1516,7 @@ extern void unlock_super(struct super_block *);
 /*
  * VFS helper functions..
  */
-extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
+extern int vfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
@@ -1619,7 +1619,7 @@ struct inode_operations {
 	int (*readlink) (struct dentry *, char __user *,int);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 
-	int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
+	int (*create) (struct inode *,struct dentry *,umode_t,struct nameidata *);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
diff --git a/include/linux/security.h b/include/linux/security.h
index 16cbc58cb13b..8fc22373db34 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1447,7 +1447,7 @@ struct security_operations {
 				    const struct qstr *qstr, char **name,
 				    void **value, size_t *len);
 	int (*inode_create) (struct inode *dir,
-			     struct dentry *dentry, int mode);
+			     struct dentry *dentry, umode_t mode);
 	int (*inode_link) (struct dentry *old_dentry,
 			   struct inode *dir, struct dentry *new_dentry);
 	int (*inode_unlink) (struct inode *dir, struct dentry *dentry);
@@ -1716,7 +1716,7 @@ int security_inode_init_security(struct inode *inode, struct inode *dir,
 int security_old_inode_init_security(struct inode *inode, struct inode *dir,
 				     const struct qstr *qstr, char **name,
 				     void **value, size_t *len);
-int security_inode_create(struct inode *dir, struct dentry *dentry, int mode);
+int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode);
 int security_inode_link(struct dentry *old_dentry, struct inode *dir,
 			 struct dentry *new_dentry);
 int security_inode_unlink(struct inode *dir, struct dentry *dentry);
@@ -2061,7 +2061,7 @@ static inline int security_old_inode_init_security(struct inode *inode,
 
 static inline int security_inode_create(struct inode *dir,
 					 struct dentry *dentry,
-					 int mode)
+					 umode_t mode)
 {
 	return 0;
 }
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 4e0be364aa36..57ed704d2ca7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -295,7 +295,7 @@ static void mqueue_evict_inode(struct inode *inode)
 }
 
 static int mqueue_create(struct inode *dir, struct dentry *dentry,
-				int mode, struct nameidata *nd)
+				umode_t mode, struct nameidata *nd)
 {
 	struct inode *inode;
 	struct mq_attr *attr = dentry->d_fsdata;
@@ -610,7 +610,7 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
  * Invoked when creating a new queue via sys_mq_open
  */
 static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
-			struct dentry *dentry, int oflag, mode_t mode,
+			struct dentry *dentry, int oflag, umode_t mode,
 			struct mq_attr *attr)
 {
 	const struct cred *cred = current_cred();
diff --git a/mm/shmem.c b/mm/shmem.c
index b8a8ddf069d0..542aad28928d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1499,7 +1499,7 @@ static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	return 0;
 }
 
-static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
+static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		struct nameidata *nd)
 {
 	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
diff --git a/security/capability.c b/security/capability.c
index ddd17892826a..ff18d0ca30bf 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -125,7 +125,7 @@ static int cap_inode_init_security(struct inode *inode, struct inode *dir,
 }
 
 static int cap_inode_create(struct inode *inode, struct dentry *dentry,
-			    int mask)
+			    umode_t mask)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index be49eb5768bc..2420eed87639 100644
--- a/security/security.c
+++ b/security/security.c
@@ -475,7 +475,7 @@ int security_path_chroot(struct path *path)
 }
 #endif
 
-int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
+int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	if (unlikely(IS_PRIVATE(dir)))
 		return 0;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ad74ad24ce2a..a1eba2b9ea5c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2598,7 +2598,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
 	return 0;
 }
 
-static int selinux_inode_create(struct inode *dir, struct dentry *dentry, int mask)
+static int selinux_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	return may_create(dir, dentry, SECCLASS_FILE);
 }
-- 
cgit v1.2.3


From 1a67aafb5f72a436ca044293309fa7e6351d6a35 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 01:52:52 -0400
Subject: switch ->mknod() to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking | 2 +-
 Documentation/filesystems/vfs.txt | 2 +-
 fs/9p/vfs_inode.c                 | 2 +-
 fs/9p/vfs_inode_dotl.c            | 6 +++---
 fs/bad_inode.c                    | 2 +-
 fs/btrfs/inode.c                  | 2 +-
 fs/ceph/dir.c                     | 4 ++--
 fs/cifs/cifsfs.h                  | 2 +-
 fs/cifs/dir.c                     | 2 +-
 fs/ecryptfs/inode.c               | 2 +-
 fs/exofs/namei.c                  | 2 +-
 fs/ext2/namei.c                   | 2 +-
 fs/ext3/namei.c                   | 2 +-
 fs/ext4/namei.c                   | 2 +-
 fs/fuse/dir.c                     | 2 +-
 fs/gfs2/inode.c                   | 2 +-
 fs/hfsplus/dir.c                  | 2 +-
 fs/hostfs/hostfs_kern.c           | 2 +-
 fs/hpfs/namei.c                   | 2 +-
 fs/hugetlbfs/inode.c              | 2 +-
 fs/jffs2/dir.c                    | 4 ++--
 fs/jfs/namei.c                    | 2 +-
 fs/logfs/dir.c                    | 2 +-
 fs/minix/namei.c                  | 2 +-
 fs/namei.c                        | 2 +-
 fs/ncpfs/dir.c                    | 6 +++---
 fs/nfs/dir.c                      | 4 ++--
 fs/nilfs2/namei.c                 | 2 +-
 fs/ocfs2/namei.c                  | 2 +-
 fs/ramfs/inode.c                  | 2 +-
 fs/reiserfs/namei.c               | 2 +-
 fs/sysv/namei.c                   | 2 +-
 fs/ubifs/dir.c                    | 2 +-
 fs/udf/namei.c                    | 2 +-
 fs/ufs/namei.c                    | 2 +-
 fs/xfs/xfs_iops.c                 | 2 +-
 include/linux/fs.h                | 4 ++--
 include/linux/security.h          | 4 ++--
 mm/shmem.c                        | 2 +-
 security/capability.c             | 2 +-
 security/security.c               | 2 +-
 security/selinux/hooks.c          | 2 +-
 42 files changed, 51 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 38d00c8898b9..9e9f30b9f46b 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -45,7 +45,7 @@ ata *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
 	int (*mkdir) (struct inode *,struct dentry *,umode_t);
 	int (*rmdir) (struct inode *,struct dentry *);
-	int (*mknod) (struct inode *,struct dentry *,int,dev_t);
+	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
 	int (*readlink) (struct dentry *, char __user *,int);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index e7b900bc6285..4b9f0d092a79 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -348,7 +348,7 @@ struct inode_operations {
 	int (*symlink) (struct inode *,struct dentry *,const char *);
 	int (*mkdir) (struct inode *,struct dentry *,umode_t);
 	int (*rmdir) (struct inode *,struct dentry *);
-	int (*mknod) (struct inode *,struct dentry *,int,dev_t);
+	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
 	int (*readlink) (struct dentry *, char __user *,int);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 15cd5cef4485..f54a26859fcc 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1397,7 +1397,7 @@ clunk_fid:
  */
 
 static int
-v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
 	int retval;
 	char *name;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index c4731381f0c5..259f0cd248c8 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -48,7 +48,7 @@
 #include "acl.h"
 
 static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 		    dev_t rdev);
 
 /**
@@ -799,7 +799,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
  *
  */
 static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 		dev_t rdev)
 {
 	int err;
@@ -814,7 +814,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
 	struct posix_acl *dacl = NULL, *pacl = NULL;
 
 	P9_DPRINTK(P9_DEBUG_VFS,
-		" %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+		" %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", dir->i_ino,
 		dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
 
 	if (!new_valid_dev(rdev))
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 8087fbc35f43..22e9a78872ff 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -213,7 +213,7 @@ static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
 }
 
 static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
-			int mode, dev_t rdev)
+			umode_t mode, dev_t rdev)
 {
 	return -EIO;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 19630aacb320..0060875d6af6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4596,7 +4596,7 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
 }
 
 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
-			int mode, dev_t rdev)
+			umode_t mode, dev_t rdev)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 9848d686591c..f011ed295bf7 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -666,7 +666,7 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 }
 
 static int ceph_mknod(struct inode *dir, struct dentry *dentry,
-		      int mode, dev_t rdev)
+		      umode_t mode, dev_t rdev)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -676,7 +676,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		return -EROFS;
 
-	dout("mknod in dir %p dentry %p mode 0%o rdev %d\n",
+	dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
 	     dir, dentry, mode, rdev);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 358724df558b..fe5ecf1b422a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -50,7 +50,7 @@ extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 				  struct nameidata *);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
 extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
-extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t);
+extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 extern int cifs_mkdir(struct inode *, struct dentry *, umode_t);
 extern int cifs_rmdir(struct inode *, struct dentry *);
 extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 2dc8be86be09..df8fecb5b993 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -355,7 +355,7 @@ cifs_create_out:
 	return rc;
 }
 
-int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
+int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
 		dev_t device_number)
 {
 	int rc = -EPERM;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 81e6542ab20f..be20cbfca7e9 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -607,7 +607,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
 }
 
 static int
-ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
 	int rc;
 	struct dentry *lower_dentry;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 58644544849d..9dbf0c301030 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -74,7 +74,7 @@ static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	return err;
 }
 
-static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int exofs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 		       dev_t rdev)
 {
 	struct inode *inode;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index cb759e661b15..080419814bae 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -119,7 +119,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
 	return ext2_add_nondir(dentry, inode);
 }
 
-static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
+static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
 	struct inode * inode;
 	int err;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 6047d121f537..4f35b2f315d4 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1732,7 +1732,7 @@ retry:
 }
 
 static int ext3_mknod (struct inode * dir, struct dentry *dentry,
-			int mode, dev_t rdev)
+			umode_t mode, dev_t rdev)
 {
 	handle_t *handle;
 	struct inode *inode;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 77306f36a610..86edc45b52a4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1770,7 +1770,7 @@ retry:
 }
 
 static int ext4_mknod(struct inode *dir, struct dentry *dentry,
-		      int mode, dev_t rdev)
+		      umode_t mode, dev_t rdev)
 {
 	handle_t *handle;
 	struct inode *inode;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 603bb8a9b8ca..b4c09c5ed8dc 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -547,7 +547,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	return err;
 }
 
-static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
+static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 		      dev_t rdev)
 {
 	struct fuse_mknod_in inarg;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index aadf792be750..ea4edf510559 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1143,7 +1143,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  *
  */
 
-static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      dev_t dev)
 {
 	return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index ef6547ca4214..88e155f895c6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -424,7 +424,7 @@ out:
 }
 
 static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
-			 int mode, dev_t rdev)
+			 umode_t mode, dev_t rdev)
 {
 	struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
 	struct inode *inode;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 3a3a530f5bad..a7340e710a90 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -700,7 +700,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
 	return err;
 }
 
-int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
 	struct inode *inode;
 	char *name;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 769f76c7303a..30dd7b10b507 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -201,7 +201,7 @@ bail:
 	return err;
 }
 
-static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
 	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 57996c3d8d0c..698485ce5f3f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -500,7 +500,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
  * File creation. Allocate an inode, and we're done..
  */
 static int hugetlbfs_mknod(struct inode *dir,
-			struct dentry *dentry, int mode, dev_t dev)
+			struct dentry *dentry, umode_t mode, dev_t dev)
 {
 	struct inode *inode;
 	int error = -ENOSPC;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 16a75e9a038d..973ac5822bd7 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -31,7 +31,7 @@ static int jffs2_unlink (struct inode *,struct dentry *);
 static int jffs2_symlink (struct inode *,struct dentry *,const char *);
 static int jffs2_mkdir (struct inode *,struct dentry *,umode_t);
 static int jffs2_rmdir (struct inode *,struct dentry *);
-static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t);
+static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
 static int jffs2_rename (struct inode *, struct dentry *,
 			 struct inode *, struct dentry *);
 
@@ -618,7 +618,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
 	return ret;
 }
 
-static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, dev_t rdev)
+static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
 	struct jffs2_inode_info *f, *dir_f;
 	struct jffs2_sb_info *c;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 6c0b1ab8107d..5f7c160ea64f 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1353,7 +1353,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  * FUNCTION:	Create a special file (device)
  */
 static int jfs_mknod(struct inode *dir, struct dentry *dentry,
-		int mode, dev_t rdev)
+		umode_t mode, dev_t rdev)
 {
 	struct jfs_inode_info *jfs_ip;
 	struct btstack btstack;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index a74aa461d53c..501043e8966c 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -517,7 +517,7 @@ static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	return __logfs_create(dir, dentry, inode, NULL, 0);
 }
 
-static int logfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int logfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 		dev_t rdev)
 {
 	struct inode *inode;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index c652650bf5a3..2f76e38c2065 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -36,7 +36,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
 	return NULL;
 }
 
-static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
+static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
 	int error;
 	struct inode *inode;
diff --git a/fs/namei.c b/fs/namei.c
index 05d1c2ceb131..85bb44f222c9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2444,7 +2444,7 @@ struct dentry *user_path_create(int dfd, const char __user *pathname, struct pat
 }
 EXPORT_SYMBOL(user_path_create);
 
-int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
 	int error = may_create(dir, dentry);
 
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 98d1b8c6fd8c..a2d50f803a17 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -38,7 +38,7 @@ static int ncp_rmdir(struct inode *, struct dentry *);
 static int ncp_rename(struct inode *, struct dentry *,
 	  	      struct inode *, struct dentry *);
 static int ncp_mknod(struct inode * dir, struct dentry *dentry,
-		     int mode, dev_t rdev);
+		     umode_t mode, dev_t rdev);
 #if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
 extern int ncp_symlink(struct inode *, struct dentry *, const char *);
 #else
@@ -1201,12 +1201,12 @@ out:
 }
 
 static int ncp_mknod(struct inode * dir, struct dentry *dentry,
-		     int mode, dev_t rdev)
+		     umode_t mode, dev_t rdev)
 {
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 	if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) {
-		DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%o\n", mode);
+		DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%ho\n", mode);
 		return ncp_create_new(dir, dentry, mode, rdev, 0);
 	}
 	return -EPERM; /* Strange, but true */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7cdee1d4160f..fd9a872fada0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -53,7 +53,7 @@ static int nfs_rmdir(struct inode *, struct dentry *);
 static int nfs_unlink(struct inode *, struct dentry *);
 static int nfs_symlink(struct inode *, struct dentry *, const char *);
 static int nfs_link(struct dentry *, struct inode *, struct dentry *);
-static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
+static int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 static int nfs_rename(struct inode *, struct dentry *,
 		      struct inode *, struct dentry *);
 static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
@@ -1693,7 +1693,7 @@ out_err:
  * See comments for nfs_proc_create regarding failed operations.
  */
 static int
-nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
 	struct iattr attr;
 	int status;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index fcd86c38f968..1cd3f624dffc 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -112,7 +112,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 
 static int
-nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
 	struct inode *inode;
 	struct nilfs_transaction_info ti;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 46f46ffe77c5..11c62e20054c 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -207,7 +207,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
 
 static int ocfs2_mknod(struct inode *dir,
 		       struct dentry *dentry,
-		       int mode,
+		       umode_t mode,
 		       dev_t dev)
 {
 	int status = 0;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index c2ed2a36094e..145680e9d581 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -92,7 +92,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
  */
 /* SMP-safe */
 static int
-ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
 	struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
 	int error = -ENOSPC;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 46db3b9fa7cf..a8614bd7cc8d 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -643,7 +643,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
 	return retval;
 }
 
-static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 			  dev_t rdev)
 {
 	int retval;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index d306eebeb6c1..b217797e621b 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -61,7 +61,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
 	return NULL;
 }
 
-static int sysv_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t rdev)
+static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev)
 {
 	struct inode * inode;
 	int err;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index f332878ce4de..d9aec2fc90a6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -769,7 +769,7 @@ out_budg:
 }
 
 static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
-		       int mode, dev_t rdev)
+		       umode_t mode, dev_t rdev)
 {
 	struct inode *inode;
 	struct ubifs_inode *ui;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 135a4ca01038..08bf46edf9c4 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -596,7 +596,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	return 0;
 }
 
-static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 		     dev_t rdev)
 {
 	struct inode *inode;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index ba2a9d6c0314..38cac199edff 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -94,7 +94,7 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
 	return err;
 }
 
-static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
+static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
 	struct inode *inode;
 	int err;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 0efa4e51bebf..c2cf9bb60863 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -168,7 +168,7 @@ STATIC int
 xfs_vn_mknod(
 	struct inode	*dir,
 	struct dentry	*dentry,
-	int		mode,
+	umode_t		mode,
 	dev_t		rdev)
 {
 	struct inode	*inode;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e40321a6e239..b89eef1d1752 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1518,7 +1518,7 @@ extern void unlock_super(struct super_block *);
  */
 extern int vfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
-extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
+extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
 extern int vfs_rmdir(struct inode *, struct dentry *);
@@ -1625,7 +1625,7 @@ struct inode_operations {
 	int (*symlink) (struct inode *,struct dentry *,const char *);
 	int (*mkdir) (struct inode *,struct dentry *,umode_t);
 	int (*rmdir) (struct inode *,struct dentry *);
-	int (*mknod) (struct inode *,struct dentry *,int,dev_t);
+	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
 	void (*truncate) (struct inode *);
diff --git a/include/linux/security.h b/include/linux/security.h
index 8fc22373db34..0e5aeb86dfc4 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1456,7 +1456,7 @@ struct security_operations {
 	int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, umode_t mode);
 	int (*inode_rmdir) (struct inode *dir, struct dentry *dentry);
 	int (*inode_mknod) (struct inode *dir, struct dentry *dentry,
-			    int mode, dev_t dev);
+			    umode_t mode, dev_t dev);
 	int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry,
 			     struct inode *new_dir, struct dentry *new_dentry);
 	int (*inode_readlink) (struct dentry *dentry);
@@ -1724,7 +1724,7 @@ int security_inode_symlink(struct inode *dir, struct dentry *dentry,
 			   const char *old_name);
 int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 int security_inode_rmdir(struct inode *dir, struct dentry *dentry);
-int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
+int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev);
 int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
 			  struct inode *new_dir, struct dentry *new_dentry);
 int security_inode_readlink(struct dentry *dentry);
diff --git a/mm/shmem.c b/mm/shmem.c
index 542aad28928d..4000f370948c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1456,7 +1456,7 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
  * File creation. Allocate an inode, and we're done..
  */
 static int
-shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
 	struct inode *inode;
 	int error = -ENOSPC;
diff --git a/security/capability.c b/security/capability.c
index ff18d0ca30bf..9def035cd572 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -159,7 +159,7 @@ static int cap_inode_rmdir(struct inode *inode, struct dentry *dentry)
 }
 
 static int cap_inode_mknod(struct inode *inode, struct dentry *dentry,
-			   int mode, dev_t dev)
+			   umode_t mode, dev_t dev)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 2420eed87639..8cc0f0caa640 100644
--- a/security/security.c
+++ b/security/security.c
@@ -521,7 +521,7 @@ int security_inode_rmdir(struct inode *dir, struct dentry *dentry)
 	return security_ops->inode_rmdir(dir, dentry);
 }
 
-int security_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
 	if (unlikely(IS_PRIVATE(dir)))
 		return 0;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a1eba2b9ea5c..8878370c13bf 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2628,7 +2628,7 @@ static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry)
 	return may_link(dir, dentry, MAY_RMDIR);
 }
 
-static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
 	return may_create(dir, dentry, inode_mode_to_security_class(mode));
 }
-- 
cgit v1.2.3


From 2c9ede55ecec58099b72e4bb8eab719f32f72c31 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jul 2011 20:24:48 -0400
Subject: switch device_get_devnode() and ->devnode() to umode_t *

both callers of device_get_devnode() are only interested in lower 16bits
and nobody tries to return anything wider than 16bit anyway.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/kernel/cpuid.c                    | 2 +-
 arch/x86/kernel/msr.c                      | 2 +-
 block/bsg.c                                | 2 +-
 block/genhd.c                              | 2 +-
 drivers/base/core.c                        | 4 ++--
 drivers/base/devtmpfs.c                    | 2 +-
 drivers/block/aoe/aoechr.c                 | 2 +-
 drivers/block/pktcdvd.c                    | 2 +-
 drivers/char/mem.c                         | 4 ++--
 drivers/char/misc.c                        | 2 +-
 drivers/char/raw.c                         | 2 +-
 drivers/char/tile-srom.c                   | 2 +-
 drivers/gpu/drm/drm_sysfs.c                | 2 +-
 drivers/hid/usbhid/hiddev.c                | 2 +-
 drivers/infiniband/core/cm.c               | 2 +-
 drivers/infiniband/core/user_mad.c         | 2 +-
 drivers/infiniband/core/uverbs_main.c      | 2 +-
 drivers/input/input.c                      | 2 +-
 drivers/media/dvb/ddbridge/ddbridge-core.c | 2 +-
 drivers/media/dvb/dvb-core/dvbdev.c        | 2 +-
 drivers/media/rc/rc-main.c                 | 2 +-
 drivers/tty/tty_io.c                       | 2 +-
 drivers/usb/class/usblp.c                  | 2 +-
 drivers/usb/core/file.c                    | 2 +-
 drivers/usb/core/usb.c                     | 2 +-
 drivers/usb/misc/iowarrior.c               | 2 +-
 drivers/usb/misc/legousbtower.c            | 2 +-
 include/linux/device.h                     | 6 +++---
 include/linux/genhd.h                      | 2 +-
 include/linux/usb.h                        | 2 +-
 sound/sound_core.c                         | 2 +-
 31 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 212a6a42527c..a524353d93f2 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -177,7 +177,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
 	.notifier_call = cpuid_class_cpu_callback,
 };
 
-static char *cpuid_devnode(struct device *dev, mode_t *mode)
+static char *cpuid_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
 }
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 12fcbe2c143e..96356762a51d 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -236,7 +236,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
 	.notifier_call = msr_class_cpu_callback,
 };
 
-static char *msr_devnode(struct device *dev, mode_t *mode)
+static char *msr_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
 }
diff --git a/block/bsg.c b/block/bsg.c
index 702f1316bb8f..9651ec7b87c2 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -1070,7 +1070,7 @@ EXPORT_SYMBOL_GPL(bsg_register_queue);
 
 static struct cdev bsg_cdev;
 
-static char *bsg_devnode(struct device *dev, mode_t *mode)
+static char *bsg_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
 }
diff --git a/block/genhd.c b/block/genhd.c
index 02e9fca80825..80578f3176ef 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1109,7 +1109,7 @@ struct class block_class = {
 	.name		= "block",
 };
 
-static char *block_devnode(struct device *dev, mode_t *mode)
+static char *block_devnode(struct device *dev, umode_t *mode)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 919daa7cd5b1..1dfa1d616fa5 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -198,7 +198,7 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj,
 	if (MAJOR(dev->devt)) {
 		const char *tmp;
 		const char *name;
-		mode_t mode = 0;
+		umode_t mode = 0;
 
 		add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt));
 		add_uevent_var(env, "MINOR=%u", MINOR(dev->devt));
@@ -1182,7 +1182,7 @@ static struct device *next_device(struct klist_iter *i)
  * freed by the caller.
  */
 const char *device_get_devnode(struct device *dev,
-			       mode_t *mode, const char **tmp)
+			       umode_t *mode, const char **tmp)
 {
 	char *s;
 
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index a4760e095ff5..3990f682e690 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -40,7 +40,7 @@ static struct req {
 	struct completion done;
 	int err;
 	const char *name;
-	mode_t mode;	/* 0 => delete */
+	umode_t mode;	/* 0 => delete */
 	struct device *dev;
 } *requests;
 
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 5f8e39c43ae5..e86d2062a164 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -270,7 +270,7 @@ static const struct file_operations aoe_fops = {
 	.llseek = noop_llseek,
 };
 
-static char *aoe_devnode(struct device *dev, mode_t *mode)
+static char *aoe_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev));
 }
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index a63b0a2b7805..d59edeabd93f 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2817,7 +2817,7 @@ static const struct block_device_operations pktcdvd_ops = {
 	.check_events =		pkt_check_events,
 };
 
-static char *pktcdvd_devnode(struct gendisk *gd, mode_t *mode)
+static char *pktcdvd_devnode(struct gendisk *gd, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "pktcdvd/%s", gd->disk_name);
 }
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 145179033716..d6e9d081c8b1 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -847,7 +847,7 @@ static const struct file_operations kmsg_fops = {
 
 static const struct memdev {
 	const char *name;
-	mode_t mode;
+	umode_t mode;
 	const struct file_operations *fops;
 	struct backing_dev_info *dev_info;
 } devlist[] = {
@@ -901,7 +901,7 @@ static const struct file_operations memory_fops = {
 	.llseek = noop_llseek,
 };
 
-static char *mem_devnode(struct device *dev, mode_t *mode)
+static char *mem_devnode(struct device *dev, umode_t *mode)
 {
 	if (mode && devlist[MINOR(dev->devt)].mode)
 		*mode = devlist[MINOR(dev->devt)].mode;
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 778273c93242..522136d40843 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -258,7 +258,7 @@ int misc_deregister(struct miscdevice *misc)
 EXPORT_SYMBOL(misc_register);
 EXPORT_SYMBOL(misc_deregister);
 
-static char *misc_devnode(struct device *dev, mode_t *mode)
+static char *misc_devnode(struct device *dev, umode_t *mode)
 {
 	struct miscdevice *c = dev_get_drvdata(dev);
 
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index b6de2c047145..54a3a6d09819 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -308,7 +308,7 @@ static const struct file_operations raw_ctl_fops = {
 
 static struct cdev raw_cdev;
 
-static char *raw_devnode(struct device *dev, mode_t *mode)
+static char *raw_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "raw/%s", dev_name(dev));
 }
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
index cf3ee008dca2..4dc019408fac 100644
--- a/drivers/char/tile-srom.c
+++ b/drivers/char/tile-srom.c
@@ -329,7 +329,7 @@ static struct device_attribute srom_dev_attrs[] = {
 	__ATTR_NULL
 };
 
-static char *srom_devnode(struct device *dev, mode_t *mode)
+static char *srom_devnode(struct device *dev, umode_t *mode)
 {
 	*mode = S_IRUGO | S_IWUSR;
 	return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev));
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 0f9ef9bf6730..62c3675045ac 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -72,7 +72,7 @@ static int drm_class_resume(struct device *dev)
 	return 0;
 }
 
-static char *drm_devnode(struct device *dev, mode_t *mode)
+static char *drm_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "dri/%s", dev_name(dev));
 }
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 4ef02b269a71..7c297d305d5d 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -859,7 +859,7 @@ static const struct file_operations hiddev_fops = {
 	.llseek		= noop_llseek,
 };
 
-static char *hiddev_devnode(struct device *dev, mode_t *mode)
+static char *hiddev_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 8b72f39202fb..c889aaef3416 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3659,7 +3659,7 @@ static struct kobj_type cm_port_obj_type = {
 	.release = cm_release_port_obj
 };
 
-static char *cm_devnode(struct device *dev, mode_t *mode)
+static char *cm_devnode(struct device *dev, umode_t *mode)
 {
 	if (mode)
 		*mode = 0666;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 07db22997e97..f0d588f8859e 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1175,7 +1175,7 @@ static void ib_umad_remove_one(struct ib_device *device)
 	kref_put(&umad_dev->ref, ib_umad_release_dev);
 }
 
-static char *umad_devnode(struct device *dev, mode_t *mode)
+static char *umad_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
 }
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 879636746373..604556d73d25 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -846,7 +846,7 @@ static void ib_uverbs_remove_one(struct ib_device *device)
 	kfree(uverbs_dev);
 }
 
-static char *uverbs_devnode(struct device *dev, mode_t *mode)
+static char *uverbs_devnode(struct device *dev, umode_t *mode)
 {
 	if (mode)
 		*mode = 0666;
diff --git a/drivers/input/input.c b/drivers/input/input.c
index da38d97a51b1..1f78c957a75a 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1624,7 +1624,7 @@ static struct device_type input_dev_type = {
 #endif
 };
 
-static char *input_devnode(struct device *dev, mode_t *mode)
+static char *input_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "input/%s", dev_name(dev));
 }
diff --git a/drivers/media/dvb/ddbridge/ddbridge-core.c b/drivers/media/dvb/ddbridge/ddbridge-core.c
index ba9a643b9c6a..d1e91bc80e78 100644
--- a/drivers/media/dvb/ddbridge/ddbridge-core.c
+++ b/drivers/media/dvb/ddbridge/ddbridge-core.c
@@ -1480,7 +1480,7 @@ static const struct file_operations ddb_fops = {
 	.open           = ddb_open,
 };
 
-static char *ddb_devnode(struct device *device, mode_t *mode)
+static char *ddb_devnode(struct device *device, umode_t *mode)
 {
 	struct ddb *dev = dev_get_drvdata(device);
 
diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
index f73287775953..00a67326c193 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.c
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
@@ -450,7 +450,7 @@ static int dvb_uevent(struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
-static char *dvb_devnode(struct device *dev, mode_t *mode)
+static char *dvb_devnode(struct device *dev, umode_t *mode)
 {
 	struct dvb_device *dvbdev = dev_get_drvdata(dev);
 
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 29f900065d8a..f5db8b949bc3 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -715,7 +715,7 @@ static void ir_close(struct input_dev *idev)
 }
 
 /* class for /sys/class/rc */
-static char *ir_devnode(struct device *dev, mode_t *mode)
+static char *ir_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "rc/%s", dev_name(dev));
 }
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 05085beb83db..3fdebd306b94 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3267,7 +3267,7 @@ void __init console_init(void)
 	}
 }
 
-static char *tty_devnode(struct device *dev, mode_t *mode)
+static char *tty_devnode(struct device *dev, umode_t *mode)
 {
 	if (!mode)
 		return NULL;
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index cb3a93243a05..bc5089f76cec 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -1045,7 +1045,7 @@ static const struct file_operations usblp_fops = {
 	.llseek =	noop_llseek,
 };
 
-static char *usblp_devnode(struct device *dev, mode_t *mode)
+static char *usblp_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 99458c843d60..d95760de9e8b 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -66,7 +66,7 @@ static struct usb_class {
 	struct class *class;
 } *usb_class;
 
-static char *usb_devnode(struct device *dev, mode_t *mode)
+static char *usb_devnode(struct device *dev, umode_t *mode)
 {
 	struct usb_class_driver *drv;
 
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 73cd90012ec5..1382c90d0834 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -326,7 +326,7 @@ static const struct dev_pm_ops usb_device_pm_ops = {
 #endif	/* CONFIG_PM */
 
 
-static char *usb_devnode(struct device *dev, mode_t *mode)
+static char *usb_devnode(struct device *dev, umode_t *mode)
 {
 	struct usb_device *usb_dev;
 
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 81457904d6ba..5bd4b0526de5 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -734,7 +734,7 @@ static const struct file_operations iowarrior_fops = {
 	.llseek = noop_llseek,
 };
 
-static char *iowarrior_devnode(struct device *dev, mode_t *mode)
+static char *iowarrior_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index a989356f693e..94f6566b99f8 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -269,7 +269,7 @@ static const struct file_operations tower_fops = {
 	.llseek =	tower_llseek,
 };
 
-static char *legousbtower_devnode(struct device *dev, mode_t *mode)
+static char *legousbtower_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 3136ede5a1e1..2fe0005543ed 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -294,7 +294,7 @@ struct class {
 	struct kobject			*dev_kobj;
 
 	int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env);
-	char *(*devnode)(struct device *dev, mode_t *mode);
+	char *(*devnode)(struct device *dev, umode_t *mode);
 
 	void (*class_release)(struct class *class);
 	void (*dev_release)(struct device *dev);
@@ -423,7 +423,7 @@ struct device_type {
 	const char *name;
 	const struct attribute_group **groups;
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
-	char *(*devnode)(struct device *dev, mode_t *mode);
+	char *(*devnode)(struct device *dev, umode_t *mode);
 	void (*release)(struct device *dev);
 
 	const struct dev_pm_ops *pm;
@@ -720,7 +720,7 @@ extern int device_rename(struct device *dev, const char *new_name);
 extern int device_move(struct device *dev, struct device *new_parent,
 		       enum dpm_order dpm_order);
 extern const char *device_get_devnode(struct device *dev,
-				      mode_t *mode, const char **tmp);
+				      umode_t *mode, const char **tmp);
 extern void *dev_get_drvdata(const struct device *dev);
 extern int dev_set_drvdata(struct device *dev, void *data);
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6d18f3531f18..fe23ee768589 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -163,7 +163,7 @@ struct gendisk {
                                          * disks that can't be partitioned. */
 
 	char disk_name[DISK_NAME_LEN];	/* name of major driver */
-	char *(*devnode)(struct gendisk *gd, mode_t *mode);
+	char *(*devnode)(struct gendisk *gd, umode_t *mode);
 
 	unsigned int events;		/* supported events */
 	unsigned int async_events;	/* async events, subset of all */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index d3d0c1374334..a59321779f8b 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -935,7 +935,7 @@ extern struct bus_type usb_bus_type;
  */
 struct usb_class_driver {
 	char *name;
-	char *(*devnode)(struct device *dev, mode_t *mode);
+	char *(*devnode)(struct device *dev, umode_t *mode);
 	const struct file_operations *fops;
 	int minor_base;
 };
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 6ce277860fd7..c6e81fb928e9 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -29,7 +29,7 @@ MODULE_DESCRIPTION("Core sound module");
 MODULE_AUTHOR("Alan Cox");
 MODULE_LICENSE("GPL");
 
-static char *sound_devnode(struct device *dev, mode_t *mode)
+static char *sound_devnode(struct device *dev, umode_t *mode)
 {
 	if (MAJOR(dev->devt) == SOUND_MAJOR)
 		return NULL;
-- 
cgit v1.2.3


From 9104e427f3e21ddb380ddc39752624365b5bffea Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jul 2011 23:10:46 -0400
Subject: switch sysfs attr->mode to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/platform/x86/intel_menlow.c | 2 +-
 include/linux/sysfs.h               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c
index abddc83e9fd7..3271ac85115e 100644
--- a/drivers/platform/x86/intel_menlow.c
+++ b/drivers/platform/x86/intel_menlow.c
@@ -389,7 +389,7 @@ static ssize_t bios_enabled_show(struct device *dev,
 	return sprintf(buf, "%s\n", bios_enabled ? "enabled" : "disabled");
 }
 
-static int intel_menlow_add_one_attribute(char *name, int mode, void *show,
+static int intel_menlow_add_one_attribute(char *name, umode_t mode, void *show,
 					  void *store, struct device *dev,
 					  acpi_handle handle)
 {
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index dac0859e6440..d1994ec02c89 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -25,7 +25,7 @@ enum kobj_ns_type;
 
 struct attribute {
 	const char		*name;
-	mode_t			mode;
+	umode_t			mode;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lock_class_key	*key;
 	struct lock_class_key	skey;
-- 
cgit v1.2.3


From 587a1f1659e8b330b8738ef4901832a2b63f0bed Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jul 2011 23:11:19 -0400
Subject: switch ->is_visible() to returning umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/firmware/iscsi_ibft.c            | 12 ++++++------
 drivers/hwmon/jc42.c                     |  2 +-
 drivers/hwmon/max1668.c                  |  4 ++--
 drivers/hwmon/max6650.c                  |  2 +-
 drivers/hwmon/tmp421.c                   |  2 +-
 drivers/infiniband/ulp/iser/iscsi_iser.c |  2 +-
 drivers/input/touchscreen/ad7877.c       |  4 ++--
 drivers/input/touchscreen/tsc2005.c      |  4 ++--
 drivers/pci/pci-label.c                  |  4 ++--
 drivers/platform/x86/asus-laptop.c       |  2 +-
 drivers/platform/x86/asus-wmi.c          |  4 ++--
 drivers/platform/x86/ideapad-laptop.c    |  2 +-
 drivers/power/power_supply_sysfs.c       |  4 ++--
 drivers/scsi/be2iscsi/be_iscsi.c         |  2 +-
 drivers/scsi/be2iscsi/be_iscsi.h         |  2 +-
 drivers/scsi/be2iscsi/be_main.c          | 12 ++++++------
 drivers/scsi/bnx2i/bnx2i_iscsi.c         |  2 +-
 drivers/scsi/cxgbi/libcxgbi.c            |  2 +-
 drivers/scsi/cxgbi/libcxgbi.h            |  2 +-
 drivers/scsi/iscsi_boot_sysfs.c          | 14 +++++++-------
 drivers/scsi/iscsi_tcp.c                 |  2 +-
 drivers/scsi/qla4xxx/ql4_os.c            | 10 +++++-----
 drivers/scsi/scsi_transport_iscsi.c      |  8 ++++----
 drivers/scsi/scsi_transport_spi.c        |  2 +-
 drivers/staging/iio/adc/ad7192.c         |  4 ++--
 drivers/staging/iio/adc/ad7606_core.c    |  4 ++--
 drivers/staging/iio/dac/ad5446.c         |  4 ++--
 drivers/staging/iio/dds/ad9834.c         |  4 ++--
 drivers/usb/core/sysfs.c                 |  4 ++--
 fs/sysfs/group.c                         |  2 +-
 include/linux/iscsi_boot_sysfs.h         |  8 ++++----
 include/linux/sysfs.h                    |  2 +-
 include/scsi/scsi_transport_iscsi.h      |  2 +-
 33 files changed, 70 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index 2cce44a1d7d0..3ee852c9925b 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -433,11 +433,11 @@ static int __init ibft_check_device(void)
  * Helper routiners to check to determine if the entry is valid
  * in the proper iBFT structure.
  */
-static mode_t ibft_check_nic_for(void *data, int type)
+static umode_t ibft_check_nic_for(void *data, int type)
 {
 	struct ibft_kobject *entry = data;
 	struct ibft_nic *nic = entry->nic;
-	mode_t rc = 0;
+	umode_t rc = 0;
 
 	switch (type) {
 	case ISCSI_BOOT_ETH_INDEX:
@@ -488,11 +488,11 @@ static mode_t ibft_check_nic_for(void *data, int type)
 	return rc;
 }
 
-static mode_t __init ibft_check_tgt_for(void *data, int type)
+static umode_t __init ibft_check_tgt_for(void *data, int type)
 {
 	struct ibft_kobject *entry = data;
 	struct ibft_tgt *tgt = entry->tgt;
-	mode_t rc = 0;
+	umode_t rc = 0;
 
 	switch (type) {
 	case ISCSI_BOOT_TGT_INDEX:
@@ -524,11 +524,11 @@ static mode_t __init ibft_check_tgt_for(void *data, int type)
 	return rc;
 }
 
-static mode_t __init ibft_check_initiator_for(void *data, int type)
+static umode_t __init ibft_check_initiator_for(void *data, int type)
 {
 	struct ibft_kobject *entry = data;
 	struct ibft_initiator *init = entry->initiator;
-	mode_t rc = 0;
+	umode_t rc = 0;
 
 	switch (type) {
 	case ISCSI_BOOT_INI_INDEX:
diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
index 2d3d72805ff4..1a92951f4031 100644
--- a/drivers/hwmon/jc42.c
+++ b/drivers/hwmon/jc42.c
@@ -413,7 +413,7 @@ static struct attribute *jc42_attributes[] = {
 	NULL
 };
 
-static mode_t jc42_attribute_mode(struct kobject *kobj,
+static umode_t jc42_attribute_mode(struct kobject *kobj,
 				  struct attribute *attr, int index)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/hwmon/max1668.c b/drivers/hwmon/max1668.c
index 20d1b2ddffb6..6914195cfd35 100644
--- a/drivers/hwmon/max1668.c
+++ b/drivers/hwmon/max1668.c
@@ -335,10 +335,10 @@ static struct attribute *max1668_attribute_unique[] = {
 	NULL
 };
 
-static mode_t max1668_attribute_mode(struct kobject *kobj,
+static umode_t max1668_attribute_mode(struct kobject *kobj,
 				     struct attribute *attr, int index)
 {
-	int ret = S_IRUGO;
+	umode_t ret = S_IRUGO;
 	if (read_only)
 		return ret;
 	if (attr == &sensor_dev_attr_temp1_max.dev_attr.attr ||
diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c
index ece3aafa54b3..2fc034aeca09 100644
--- a/drivers/hwmon/max6650.c
+++ b/drivers/hwmon/max6650.c
@@ -464,7 +464,7 @@ static SENSOR_DEVICE_ATTR(gpio1_alarm, S_IRUGO, get_alarm, NULL,
 static SENSOR_DEVICE_ATTR(gpio2_alarm, S_IRUGO, get_alarm, NULL,
 			  MAX6650_ALRM_GPIO2);
 
-static mode_t max6650_attrs_visible(struct kobject *kobj, struct attribute *a,
+static umode_t max6650_attrs_visible(struct kobject *kobj, struct attribute *a,
 				    int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index 0517a8f09d35..c48381f2cd02 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -157,7 +157,7 @@ static ssize_t show_fault(struct device *dev,
 		return sprintf(buf, "0\n");
 }
 
-static mode_t tmp421_is_visible(struct kobject *kobj, struct attribute *a,
+static umode_t tmp421_is_visible(struct kobject *kobj, struct attribute *a,
 				int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 7e7373a700e6..9a43cb07f294 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -638,7 +638,7 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
 	iser_conn_terminate(ib_conn);
 }
 
-static mode_t iser_attr_is_visible(int param_type, int param)
+static umode_t iser_attr_is_visible(int param_type, int param)
 {
 	switch (param_type) {
 	case ISCSI_HOST_PARAM:
diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index 400131df677b..baa43df6502d 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c
@@ -612,10 +612,10 @@ static struct attribute *ad7877_attributes[] = {
 	NULL
 };
 
-static mode_t ad7877_attr_is_visible(struct kobject *kobj,
+static umode_t ad7877_attr_is_visible(struct kobject *kobj,
 				     struct attribute *attr, int n)
 {
-	mode_t mode = attr->mode;
+	umode_t mode = attr->mode;
 
 	if (attr == &dev_attr_aux3.attr) {
 		if (gpio3)
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index cbf0ff322676..067d95662997 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -450,13 +450,13 @@ static struct attribute *tsc2005_attrs[] = {
 	NULL
 };
 
-static mode_t tsc2005_attr_is_visible(struct kobject *kobj,
+static umode_t tsc2005_attr_is_visible(struct kobject *kobj,
 				      struct attribute *attr, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct spi_device *spi = to_spi_device(dev);
 	struct tsc2005 *ts = spi_get_drvdata(spi);
-	mode_t mode = attr->mode;
+	umode_t mode = attr->mode;
 
 	if (attr == &dev_attr_selftest.attr) {
 		if (!ts->set_reset)
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index 81525ae5d869..edaed6f4da6c 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -89,7 +89,7 @@ find_smbios_instance_string(struct pci_dev *pdev, char *buf,
 	return 0;
 }
 
-static mode_t
+static umode_t
 smbios_instance_string_exist(struct kobject *kobj, struct attribute *attr,
 			     int n)
 {
@@ -275,7 +275,7 @@ device_has_dsm(struct device *dev)
 	return FALSE;
 }
 
-static mode_t
+static umode_t
 acpi_index_string_exist(struct kobject *kobj, struct attribute *attr, int n)
 {
 	struct device *dev;
diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index edaccad9b5bf..b7944f903886 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -1477,7 +1477,7 @@ static struct attribute *asus_attributes[] = {
 	NULL
 };
 
-static mode_t asus_sysfs_is_visible(struct kobject *kobj,
+static umode_t asus_sysfs_is_visible(struct kobject *kobj,
 				    struct attribute *attr,
 				    int idx)
 {
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index d1049ee3c9e8..72d731c21d45 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -992,7 +992,7 @@ static struct attribute *hwmon_attributes[] = {
 	NULL
 };
 
-static mode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj,
+static umode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj,
 					  struct attribute *attr, int idx)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -1357,7 +1357,7 @@ static struct attribute *platform_attributes[] = {
 	NULL
 };
 
-static mode_t asus_sysfs_is_visible(struct kobject *kobj,
+static umode_t asus_sysfs_is_visible(struct kobject *kobj,
 				    struct attribute *attr, int idx)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index a36addf106a0..ac902f7a9baa 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -368,7 +368,7 @@ static struct attribute *ideapad_attributes[] = {
 	NULL
 };
 
-static mode_t ideapad_is_visible(struct kobject *kobj,
+static umode_t ideapad_is_visible(struct kobject *kobj,
 				 struct attribute *attr,
 				 int idx)
 {
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index e15d4c9d3988..e95cd657dac2 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -176,13 +176,13 @@ static struct device_attribute power_supply_attrs[] = {
 static struct attribute *
 __power_supply_attrs[ARRAY_SIZE(power_supply_attrs) + 1];
 
-static mode_t power_supply_attr_is_visible(struct kobject *kobj,
+static umode_t power_supply_attr_is_visible(struct kobject *kobj,
 					   struct attribute *attr,
 					   int attrno)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct power_supply *psy = dev_get_drvdata(dev);
-	mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+	umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
 	int i;
 
 	if (attrno == POWER_SUPPLY_PROP_TYPE)
diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index 8b002f6db6ca..33c8f09c7ac1 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c
@@ -733,7 +733,7 @@ void beiscsi_ep_disconnect(struct iscsi_endpoint *ep)
 	iscsi_destroy_endpoint(beiscsi_ep->openiscsi_ep);
 }
 
-mode_t be2iscsi_attr_is_visible(int param_type, int param)
+umode_t be2iscsi_attr_is_visible(int param_type, int param)
 {
 	switch (param_type) {
 	case ISCSI_HOST_PARAM:
diff --git a/drivers/scsi/be2iscsi/be_iscsi.h b/drivers/scsi/be2iscsi/be_iscsi.h
index 4a1f2e393f31..5c45be134501 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.h
+++ b/drivers/scsi/be2iscsi/be_iscsi.h
@@ -26,7 +26,7 @@
 #define BE2_IPV4  0x1
 #define BE2_IPV6  0x10
 
-mode_t be2iscsi_attr_is_visible(int param_type, int param);
+umode_t be2iscsi_attr_is_visible(int param_type, int param);
 
 void beiscsi_offload_connection(struct beiscsi_conn *beiscsi_conn,
 				struct beiscsi_offload_params *params);
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 379c696dac19..797a43994b55 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -325,9 +325,9 @@ static ssize_t beiscsi_show_boot_eth_info(void *data, int type, char *buf)
 }
 
 
-static mode_t beiscsi_tgt_get_attr_visibility(void *data, int type)
+static umode_t beiscsi_tgt_get_attr_visibility(void *data, int type)
 {
-	int rc;
+	umode_t rc;
 
 	switch (type) {
 	case ISCSI_BOOT_TGT_NAME:
@@ -348,9 +348,9 @@ static mode_t beiscsi_tgt_get_attr_visibility(void *data, int type)
 	return rc;
 }
 
-static mode_t beiscsi_ini_get_attr_visibility(void *data, int type)
+static umode_t beiscsi_ini_get_attr_visibility(void *data, int type)
 {
-	int rc;
+	umode_t rc;
 
 	switch (type) {
 	case ISCSI_BOOT_INI_INITIATOR_NAME:
@@ -364,9 +364,9 @@ static mode_t beiscsi_ini_get_attr_visibility(void *data, int type)
 }
 
 
-static mode_t beiscsi_eth_get_attr_visibility(void *data, int type)
+static umode_t beiscsi_eth_get_attr_visibility(void *data, int type)
 {
-	int rc;
+	umode_t rc;
 
 	switch (type) {
 	case ISCSI_BOOT_ETH_FLAGS:
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index d1e697190970..1a44b45e7bef 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -2177,7 +2177,7 @@ static int bnx2i_nl_set_path(struct Scsi_Host *shost, struct iscsi_path *params)
 	return 0;
 }
 
-static mode_t bnx2i_attr_is_visible(int param_type, int param)
+static umode_t bnx2i_attr_is_visible(int param_type, int param)
 {
 	switch (param_type) {
 	case ISCSI_HOST_PARAM:
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index c10f74a566f2..997fa36999be 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -2569,7 +2569,7 @@ void cxgbi_iscsi_cleanup(struct iscsi_transport *itp,
 }
 EXPORT_SYMBOL_GPL(cxgbi_iscsi_cleanup);
 
-mode_t cxgbi_attr_is_visible(int param_type, int param)
+umode_t cxgbi_attr_is_visible(int param_type, int param)
 {
 	switch (param_type) {
 	case ISCSI_HOST_PARAM:
diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h
index 20c88279c7a6..80fa99b3d384 100644
--- a/drivers/scsi/cxgbi/libcxgbi.h
+++ b/drivers/scsi/cxgbi/libcxgbi.h
@@ -709,7 +709,7 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *);
 
 void cxgbi_cleanup_task(struct iscsi_task *task);
 
-mode_t cxgbi_attr_is_visible(int param_type, int param);
+umode_t cxgbi_attr_is_visible(int param_type, int param);
 void cxgbi_get_conn_stats(struct iscsi_cls_conn *, struct iscsi_stats *);
 int cxgbi_set_conn_param(struct iscsi_cls_conn *,
 			enum iscsi_param, char *, int);
diff --git a/drivers/scsi/iscsi_boot_sysfs.c b/drivers/scsi/iscsi_boot_sysfs.c
index 89700cbca16e..14c1c8f6a95e 100644
--- a/drivers/scsi/iscsi_boot_sysfs.c
+++ b/drivers/scsi/iscsi_boot_sysfs.c
@@ -112,7 +112,7 @@ static struct attribute *target_attrs[] = {
 	NULL
 };
 
-static mode_t iscsi_boot_tgt_attr_is_visible(struct kobject *kobj,
+static umode_t iscsi_boot_tgt_attr_is_visible(struct kobject *kobj,
 					     struct attribute *attr, int i)
 {
 	struct iscsi_boot_kobj *boot_kobj =
@@ -193,7 +193,7 @@ static struct attribute *ethernet_attrs[] = {
 	NULL
 };
 
-static mode_t iscsi_boot_eth_attr_is_visible(struct kobject *kobj,
+static umode_t iscsi_boot_eth_attr_is_visible(struct kobject *kobj,
 					     struct attribute *attr, int i)
 {
 	struct iscsi_boot_kobj *boot_kobj =
@@ -265,7 +265,7 @@ static struct attribute *initiator_attrs[] = {
 	NULL
 };
 
-static mode_t iscsi_boot_ini_attr_is_visible(struct kobject *kobj,
+static umode_t iscsi_boot_ini_attr_is_visible(struct kobject *kobj,
 					     struct attribute *attr, int i)
 {
 	struct iscsi_boot_kobj *boot_kobj =
@@ -306,7 +306,7 @@ iscsi_boot_create_kobj(struct iscsi_boot_kset *boot_kset,
 		       struct attribute_group *attr_group,
 		       const char *name, int index, void *data,
 		       ssize_t (*show) (void *data, int type, char *buf),
-		       mode_t (*is_visible) (void *data, int type),
+		       umode_t (*is_visible) (void *data, int type),
 		       void (*release) (void *data))
 {
 	struct iscsi_boot_kobj *boot_kobj;
@@ -369,7 +369,7 @@ struct iscsi_boot_kobj *
 iscsi_boot_create_target(struct iscsi_boot_kset *boot_kset, int index,
 			 void *data,
 			 ssize_t (*show) (void *data, int type, char *buf),
-			 mode_t (*is_visible) (void *data, int type),
+			 umode_t (*is_visible) (void *data, int type),
 			 void (*release) (void *data))
 {
 	return iscsi_boot_create_kobj(boot_kset, &iscsi_boot_target_attr_group,
@@ -394,7 +394,7 @@ struct iscsi_boot_kobj *
 iscsi_boot_create_initiator(struct iscsi_boot_kset *boot_kset, int index,
 			    void *data,
 			    ssize_t (*show) (void *data, int type, char *buf),
-			    mode_t (*is_visible) (void *data, int type),
+			    umode_t (*is_visible) (void *data, int type),
 			    void (*release) (void *data))
 {
 	return iscsi_boot_create_kobj(boot_kset,
@@ -420,7 +420,7 @@ struct iscsi_boot_kobj *
 iscsi_boot_create_ethernet(struct iscsi_boot_kset *boot_kset, int index,
 			   void *data,
 			   ssize_t (*show) (void *data, int type, char *buf),
-			   mode_t (*is_visible) (void *data, int type),
+			   umode_t (*is_visible) (void *data, int type),
 			   void (*release) (void *data))
 {
 	return iscsi_boot_create_kobj(boot_kset,
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 7c34d8e7cc75..db47158e0dde 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -873,7 +873,7 @@ static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
 	iscsi_host_free(shost);
 }
 
-static mode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param)
+static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param)
 {
 	switch (param_type) {
 	case ISCSI_HOST_PARAM:
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 4169c8baa112..78bf700b365f 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -128,7 +128,7 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd);
 static int qla4xxx_slave_alloc(struct scsi_device *device);
 static int qla4xxx_slave_configure(struct scsi_device *device);
 static void qla4xxx_slave_destroy(struct scsi_device *sdev);
-static mode_t ql4_attr_is_visible(int param_type, int param);
+static umode_t ql4_attr_is_visible(int param_type, int param);
 static int qla4xxx_host_reset(struct Scsi_Host *shost, int reset_type);
 
 static struct qla4_8xxx_legacy_intr_set legacy_intr[] =
@@ -197,7 +197,7 @@ static struct iscsi_transport qla4xxx_iscsi_transport = {
 
 static struct scsi_transport_template *qla4xxx_scsi_transport;
 
-static mode_t ql4_attr_is_visible(int param_type, int param)
+static umode_t ql4_attr_is_visible(int param_type, int param)
 {
 	switch (param_type) {
 	case ISCSI_HOST_PARAM:
@@ -3039,7 +3039,7 @@ static ssize_t qla4xxx_show_boot_eth_info(void *data, int type, char *buf)
 	return rc;
 }
 
-static mode_t qla4xxx_eth_get_attr_visibility(void *data, int type)
+static umode_t qla4xxx_eth_get_attr_visibility(void *data, int type)
 {
 	int rc;
 
@@ -3073,7 +3073,7 @@ static ssize_t qla4xxx_show_boot_ini_info(void *data, int type, char *buf)
 	return rc;
 }
 
-static mode_t qla4xxx_ini_get_attr_visibility(void *data, int type)
+static umode_t qla4xxx_ini_get_attr_visibility(void *data, int type)
 {
 	int rc;
 
@@ -3160,7 +3160,7 @@ static ssize_t qla4xxx_show_boot_tgt_sec_info(void *data, int type, char *buf)
 	return qla4xxx_show_boot_tgt_info(boot_sess, type, buf);
 }
 
-static mode_t qla4xxx_tgt_get_attr_visibility(void *data, int type)
+static umode_t qla4xxx_tgt_get_attr_visibility(void *data, int type)
 {
 	int rc;
 
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 96029e6d027f..e8447fbc31f3 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -328,7 +328,7 @@ iscsi_iface_net_attr(iface, vlan_enabled, ISCSI_NET_PARAM_VLAN_ENABLED);
 iscsi_iface_net_attr(iface, mtu, ISCSI_NET_PARAM_MTU);
 iscsi_iface_net_attr(iface, port, ISCSI_NET_PARAM_PORT);
 
-static mode_t iscsi_iface_attr_is_visible(struct kobject *kobj,
+static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj,
 					  struct attribute *attr, int i)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2199,7 +2199,7 @@ static struct attribute *iscsi_conn_attrs[] = {
 	NULL,
 };
 
-static mode_t iscsi_conn_attr_is_visible(struct kobject *kobj,
+static umode_t iscsi_conn_attr_is_visible(struct kobject *kobj,
 					 struct attribute *attr, int i)
 {
 	struct device *cdev = container_of(kobj, struct device, kobj);
@@ -2370,7 +2370,7 @@ static struct attribute *iscsi_session_attrs[] = {
 	NULL,
 };
 
-static mode_t iscsi_session_attr_is_visible(struct kobject *kobj,
+static umode_t iscsi_session_attr_is_visible(struct kobject *kobj,
 					    struct attribute *attr, int i)
 {
 	struct device *cdev = container_of(kobj, struct device, kobj);
@@ -2468,7 +2468,7 @@ static struct attribute *iscsi_host_attrs[] = {
 	NULL,
 };
 
-static mode_t iscsi_host_attr_is_visible(struct kobject *kobj,
+static umode_t iscsi_host_attr_is_visible(struct kobject *kobj,
 					 struct attribute *attr, int i)
 {
 	struct device *cdev = container_of(kobj, struct device, kobj);
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index 5fbeadd96819..a2715c31e754 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -1434,7 +1434,7 @@ static int spi_host_configure(struct transport_container *tc,
 	(si->f->show_##name ? S_IRUGO : 0) | \
 	(si->f->set_##name ? S_IWUSR : 0)
 
-static mode_t target_attribute_is_visible(struct kobject *kobj,
+static umode_t target_attribute_is_visible(struct kobject *kobj,
 					  struct attribute *attr, int i)
 {
 	struct device *cdev = container_of(kobj, struct device, kobj);
diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c
index 31c376b9d5eb..e7bf32461736 100644
--- a/drivers/staging/iio/adc/ad7192.c
+++ b/drivers/staging/iio/adc/ad7192.c
@@ -838,14 +838,14 @@ static struct attribute *ad7192_attributes[] = {
 	NULL
 };
 
-static mode_t ad7192_attr_is_visible(struct kobject *kobj,
+static umode_t ad7192_attr_is_visible(struct kobject *kobj,
 				     struct attribute *attr, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct ad7192_state *st = iio_priv(indio_dev);
 
-	mode_t mode = attr->mode;
+	umode_t mode = attr->mode;
 
 	if ((st->devid != ID_AD7195) &&
 		(attr == &iio_dev_attr_ac_excitation_en.dev_attr.attr))
diff --git a/drivers/staging/iio/adc/ad7606_core.c b/drivers/staging/iio/adc/ad7606_core.c
index 54423ab196fe..e3ecd3d2ef3a 100644
--- a/drivers/staging/iio/adc/ad7606_core.c
+++ b/drivers/staging/iio/adc/ad7606_core.c
@@ -205,14 +205,14 @@ static struct attribute *ad7606_attributes[] = {
 	NULL,
 };
 
-static mode_t ad7606_attr_is_visible(struct kobject *kobj,
+static umode_t ad7606_attr_is_visible(struct kobject *kobj,
 				     struct attribute *attr, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct ad7606_state *st = iio_priv(indio_dev);
 
-	mode_t mode = attr->mode;
+	umode_t mode = attr->mode;
 
 	if (!(gpio_is_valid(st->pdata->gpio_os0) &&
 	      gpio_is_valid(st->pdata->gpio_os1) &&
diff --git a/drivers/staging/iio/dac/ad5446.c b/drivers/staging/iio/dac/ad5446.c
index e1c204d51d8c..dc46b6d6eaa3 100644
--- a/drivers/staging/iio/dac/ad5446.c
+++ b/drivers/staging/iio/dac/ad5446.c
@@ -197,14 +197,14 @@ static struct attribute *ad5446_attributes[] = {
 	NULL,
 };
 
-static mode_t ad5446_attr_is_visible(struct kobject *kobj,
+static umode_t ad5446_attr_is_visible(struct kobject *kobj,
 				     struct attribute *attr, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct ad5446_state *st = iio_priv(indio_dev);
 
-	mode_t mode = attr->mode;
+	umode_t mode = attr->mode;
 
 	if (!st->chip_info->store_pwr_down &&
 		(attr == &iio_dev_attr_out_voltage0_powerdown.dev_attr.attr ||
diff --git a/drivers/staging/iio/dds/ad9834.c b/drivers/staging/iio/dds/ad9834.c
index c468f696fe25..cc3293a9f496 100644
--- a/drivers/staging/iio/dds/ad9834.c
+++ b/drivers/staging/iio/dds/ad9834.c
@@ -281,14 +281,14 @@ static struct attribute *ad9834_attributes[] = {
 	NULL,
 };
 
-static mode_t ad9834_attr_is_visible(struct kobject *kobj,
+static umode_t ad9834_attr_is_visible(struct kobject *kobj,
 				     struct attribute *attr, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct ad9834_state *st = iio_priv(indio_dev);
 
-	mode_t mode = attr->mode;
+	umode_t mode = attr->mode;
 
 	if (((st->devid == ID_AD9833) || (st->devid == ID_AD9837)) &&
 		((attr == &iio_dev_attr_dds0_out1_enable.dev_attr.attr) ||
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 662c0cf3a3e1..9e491ca2e5c4 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -642,7 +642,7 @@ static struct attribute *dev_string_attrs[] = {
 	NULL
 };
 
-static mode_t dev_string_attrs_are_visible(struct kobject *kobj,
+static umode_t dev_string_attrs_are_visible(struct kobject *kobj,
 		struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -877,7 +877,7 @@ static struct attribute *intf_assoc_attrs[] = {
 	NULL,
 };
 
-static mode_t intf_assoc_attrs_are_visible(struct kobject *kobj,
+static umode_t intf_assoc_attrs_are_visible(struct kobject *kobj,
 		struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 194414f8298c..dd1701caecc9 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -33,7 +33,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
 	int error = 0, i;
 
 	for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
-		mode_t mode = 0;
+		umode_t mode = 0;
 
 		/* in update mode, we're changing the permissions or
 		 * visibility.  Do this by first removing then
diff --git a/include/linux/iscsi_boot_sysfs.h b/include/linux/iscsi_boot_sysfs.h
index f0a2f8b0aa13..2a8b1659bf35 100644
--- a/include/linux/iscsi_boot_sysfs.h
+++ b/include/linux/iscsi_boot_sysfs.h
@@ -91,7 +91,7 @@ struct iscsi_boot_kobj {
 	 * The enum of the type. This can be any value of the above
 	 * properties.
 	 */
-	mode_t (*is_visible) (void *data, int type);
+	umode_t (*is_visible) (void *data, int type);
 
 	/*
 	 * Driver specific release function.
@@ -110,20 +110,20 @@ struct iscsi_boot_kobj *
 iscsi_boot_create_initiator(struct iscsi_boot_kset *boot_kset, int index,
 			    void *data,
 			    ssize_t (*show) (void *data, int type, char *buf),
-			    mode_t (*is_visible) (void *data, int type),
+			    umode_t (*is_visible) (void *data, int type),
 			    void (*release) (void *data));
 
 struct iscsi_boot_kobj *
 iscsi_boot_create_ethernet(struct iscsi_boot_kset *boot_kset, int index,
 			   void *data,
 			   ssize_t (*show) (void *data, int type, char *buf),
-			   mode_t (*is_visible) (void *data, int type),
+			   umode_t (*is_visible) (void *data, int type),
 			   void (*release) (void *data));
 struct iscsi_boot_kobj *
 iscsi_boot_create_target(struct iscsi_boot_kset *boot_kset, int index,
 			 void *data,
 			 ssize_t (*show) (void *data, int type, char *buf),
-			 mode_t (*is_visible) (void *data, int type),
+			 umode_t (*is_visible) (void *data, int type),
 			 void (*release) (void *data));
 
 struct iscsi_boot_kset *iscsi_boot_create_kset(const char *set_name);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d1994ec02c89..e90eea7afb4e 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -55,7 +55,7 @@ do {							\
 
 struct attribute_group {
 	const char		*name;
-	mode_t			(*is_visible)(struct kobject *,
+	umode_t			(*is_visible)(struct kobject *,
 					      struct attribute *, int);
 	struct attribute	**attrs;
 };
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 5994bcc1b017..87f34c3d447d 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -142,7 +142,7 @@ struct iscsi_transport {
 	int (*get_iface_param) (struct iscsi_iface *iface,
 				enum iscsi_param_type param_type,
 				int param, char *buf);
-	mode_t (*attr_is_visible)(int param_type, int param);
+	umode_t (*attr_is_visible)(int param_type, int param);
 	int (*bsg_request)(struct bsg_job *job);
 };
 
-- 
cgit v1.2.3


From d161a13f974c72fd7ff0069d39a3ae57cb5694ff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jul 2011 03:36:29 -0400
Subject: switch procfs to umode_t use

both proc_dir_entry ->mode and populating functions

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/kernel/lparcfg.c        |  2 +-
 drivers/acpi/battery.c               |  2 +-
 drivers/message/i2o/i2o_proc.c       |  2 +-
 drivers/misc/sgi-gru/gruprocfs.c     |  2 +-
 drivers/platform/x86/asus_acpi.c     |  4 ++--
 drivers/platform/x86/thinkpad_acpi.c |  4 ++--
 drivers/scsi/sg.c                    |  7 +++----
 fs/proc/base.c                       |  2 +-
 fs/proc/generic.c                    |  8 ++++----
 fs/proc/proc_net.c                   |  2 +-
 include/linux/ide.h                  |  2 +-
 include/linux/proc_fs.h              | 24 ++++++++++++------------
 include/sound/info.h                 |  2 +-
 13 files changed, 31 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 84daabe2fcba..578f35f18723 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -783,7 +783,7 @@ static const struct file_operations lparcfg_fops = {
 static int __init lparcfg_init(void)
 {
 	struct proc_dir_entry *ent;
-	mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+	umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
 
 	/* Allow writing if we have FW_FEATURE_SPLPAR */
 	if (firmware_has_feature(FW_FEATURE_SPLPAR) &&
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 7711d94a0409..86933ca8b472 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -873,7 +873,7 @@ DECLARE_FILE_FUNCTIONS(alarm);
 
 static const struct battery_file {
 	struct file_operations ops;
-	mode_t mode;
+	umode_t mode;
 	const char *name;
 } acpi_battery_file[] = {
 	FILE_DESCRIPTION_RO(info),
diff --git a/drivers/message/i2o/i2o_proc.c b/drivers/message/i2o/i2o_proc.c
index 07dbeaf9df99..6d115c7208ab 100644
--- a/drivers/message/i2o/i2o_proc.c
+++ b/drivers/message/i2o/i2o_proc.c
@@ -56,7 +56,7 @@
 /* Structure used to define /proc entries */
 typedef struct _i2o_proc_entry_t {
 	char *name;		/* entry name */
-	mode_t mode;		/* mode */
+	umode_t mode;		/* mode */
 	const struct file_operations *fops;	/* open function */
 } i2o_proc_entry;
 
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 7768b87d995b..950dbe9ecb36 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -324,7 +324,7 @@ static const struct file_operations gru_fops = {
 
 static struct proc_entry {
 	char *name;
-	int mode;
+	umode_t mode;
 	const struct file_operations *fops;
 	struct proc_dir_entry *entry;
 } proc_files[] = {
diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index d9312b3073e5..6f966d6c062b 100644
--- a/drivers/platform/x86/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c
@@ -1053,7 +1053,7 @@ static const struct file_operations disp_proc_fops = {
 };
 
 static int
-asus_proc_add(char *name, const struct file_operations *proc_fops, mode_t mode,
+asus_proc_add(char *name, const struct file_operations *proc_fops, umode_t mode,
 		     struct acpi_device *device)
 {
 	struct proc_dir_entry *proc;
@@ -1072,7 +1072,7 @@ asus_proc_add(char *name, const struct file_operations *proc_fops, mode_t mode,
 static int asus_hotk_add_fs(struct acpi_device *device)
 {
 	struct proc_dir_entry *proc;
-	mode_t mode;
+	umode_t mode;
 
 	if ((asus_uid == 0) && (asus_gid == 0)) {
 		mode = S_IFREG | S_IRUGO | S_IWUSR | S_IWGRP;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 7b828680b21d..455e1522253e 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -297,7 +297,7 @@ struct ibm_init_struct {
 	char param[32];
 
 	int (*init) (struct ibm_init_struct *);
-	mode_t base_procfs_mode;
+	umode_t base_procfs_mode;
 	struct ibm_struct *data;
 };
 
@@ -8542,7 +8542,7 @@ static int __init ibm_init(struct ibm_init_struct *iibm)
 		"%s installed\n", ibm->name);
 
 	if (ibm->read) {
-		mode_t mode = iibm->base_procfs_mode;
+		umode_t mode = iibm->base_procfs_mode;
 
 		if (!mode)
 			mode = S_IRUGO;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 441a1c5b8974..02d99982a74d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2325,16 +2325,15 @@ static struct sg_proc_leaf sg_proc_leaf_arr[] = {
 static int
 sg_proc_init(void)
 {
-	int k, mask;
 	int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr);
-	struct sg_proc_leaf * leaf;
+	int k;
 
 	sg_proc_sgp = proc_mkdir(sg_proc_sg_dirname, NULL);
 	if (!sg_proc_sgp)
 		return 1;
 	for (k = 0; k < num_leaves; ++k) {
-		leaf = &sg_proc_leaf_arr[k];
-		mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO;
+		struct sg_proc_leaf *leaf = &sg_proc_leaf_arr[k];
+		umode_t mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO;
 		proc_create(leaf->name, mask, sg_proc_sgp, leaf->fops);
 	}
 	return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 851ba3dcdc29..65054d38ca23 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -101,7 +101,7 @@
 struct pid_entry {
 	char *name;
 	int len;
-	mode_t mode;
+	umode_t mode;
 	const struct inode_operations *iop;
 	const struct file_operations *fop;
 	union proc_op op;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 10090d9c7ad5..2edf34f2eb61 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,7 +597,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 
 static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 					  const char *name,
-					  mode_t mode,
+					  umode_t mode,
 					  nlink_t nlink)
 {
 	struct proc_dir_entry *ent = NULL;
@@ -659,7 +659,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
 }
 EXPORT_SYMBOL(proc_symlink);
 
-struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
+struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
 		struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry *ent;
@@ -699,7 +699,7 @@ struct proc_dir_entry *proc_mkdir(const char *name,
 }
 EXPORT_SYMBOL(proc_mkdir);
 
-struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
 					 struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry *ent;
@@ -728,7 +728,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL(create_proc_entry);
 
-struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
+struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
 					struct proc_dir_entry *parent,
 					const struct file_operations *proc_fops,
 					void *data)
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index f738024ccc8e..06e1cc17caf6 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -179,7 +179,7 @@ const struct file_operations proc_net_operations = {
 
 
 struct proc_dir_entry *proc_net_fops_create(struct net *net,
-	const char *name, mode_t mode, const struct file_operations *fops)
+	const char *name, umode_t mode, const struct file_operations *fops)
 {
 	return proc_create(name, mode, net->proc_net, fops);
 }
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 42557851b12e..501370b61ee5 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -920,7 +920,7 @@ __IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL)
 
 typedef struct {
 	const char	*name;
-	mode_t		mode;
+	umode_t		mode;
 	const struct file_operations *proc_fops;
 } ide_proc_entry_t;
 
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 643b96c7a94f..6d9e575519cc 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -50,7 +50,7 @@ typedef	int (write_proc_t)(struct file *file, const char __user *buffer,
 
 struct proc_dir_entry {
 	unsigned int low_ino;
-	mode_t mode;
+	umode_t mode;
 	nlink_t nlink;
 	uid_t uid;
 	gid_t gid;
@@ -106,9 +106,9 @@ extern void proc_root_init(void);
 
 void proc_flush_task(struct task_struct *task);
 
-extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+extern struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
 						struct proc_dir_entry *parent);
-struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
+struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
 				struct proc_dir_entry *parent,
 				const struct file_operations *proc_fops,
 				void *data);
@@ -146,17 +146,17 @@ extern void proc_device_tree_update_prop(struct proc_dir_entry *pde,
 extern struct proc_dir_entry *proc_symlink(const char *,
 		struct proc_dir_entry *, const char *);
 extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *);
-extern struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
+extern struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
 			struct proc_dir_entry *parent);
 
-static inline struct proc_dir_entry *proc_create(const char *name, mode_t mode,
+static inline struct proc_dir_entry *proc_create(const char *name, umode_t mode,
 	struct proc_dir_entry *parent, const struct file_operations *proc_fops)
 {
 	return proc_create_data(name, mode, parent, proc_fops, NULL);
 }
 
 static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
-	mode_t mode, struct proc_dir_entry *base, 
+	umode_t mode, struct proc_dir_entry *base, 
 	read_proc_t *read_proc, void * data)
 {
 	struct proc_dir_entry *res=create_proc_entry(name,mode,base);
@@ -168,7 +168,7 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 }
  
 extern struct proc_dir_entry *proc_net_fops_create(struct net *net,
-	const char *name, mode_t mode, const struct file_operations *fops);
+	const char *name, umode_t mode, const struct file_operations *fops);
 extern void proc_net_remove(struct net *net, const char *name);
 extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 	struct proc_dir_entry *parent);
@@ -185,15 +185,15 @@ static inline void proc_flush_task(struct task_struct *task)
 }
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
-	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+	umode_t mode, struct proc_dir_entry *parent) { return NULL; }
 static inline struct proc_dir_entry *proc_create(const char *name,
-	mode_t mode, struct proc_dir_entry *parent,
+	umode_t mode, struct proc_dir_entry *parent,
 	const struct file_operations *proc_fops)
 {
 	return NULL;
 }
 static inline struct proc_dir_entry *proc_create_data(const char *name,
-	mode_t mode, struct proc_dir_entry *parent,
+	umode_t mode, struct proc_dir_entry *parent,
 	const struct file_operations *proc_fops, void *data)
 {
 	return NULL;
@@ -205,10 +205,10 @@ static inline struct proc_dir_entry *proc_symlink(const char *name,
 static inline struct proc_dir_entry *proc_mkdir(const char *name,
 	struct proc_dir_entry *parent) {return NULL;}
 static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
-	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+	umode_t mode, struct proc_dir_entry *parent) { return NULL; }
 
 static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
-	mode_t mode, struct proc_dir_entry *base, 
+	umode_t mode, struct proc_dir_entry *base, 
 	read_proc_t *read_proc, void * data) { return NULL; }
 
 struct tty_driver;
diff --git a/include/sound/info.h b/include/sound/info.h
index 5492cc40dc57..9ca1a493d370 100644
--- a/include/sound/info.h
+++ b/include/sound/info.h
@@ -72,7 +72,7 @@ struct snd_info_entry_ops {
 
 struct snd_info_entry {
 	const char *name;
-	mode_t mode;
+	umode_t mode;
 	long size;
 	unsigned short content;
 	union {
-- 
cgit v1.2.3


From 48176a973d65572e61d0ce95495e5072887e6fb6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jul 2011 03:40:40 -0400
Subject: switch sysfs_chmod_file() to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/hwmon/dme1737.c | 6 +++---
 fs/sysfs/file.c         | 2 +-
 include/linux/sysfs.h   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index d9c592713919..d9803958e49f 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -1223,7 +1223,7 @@ static ssize_t show_pwm(struct device *dev, struct device_attribute *attr,
 }
 
 static struct attribute *dme1737_pwm_chmod_attr[];
-static void dme1737_chmod_file(struct device*, struct attribute*, mode_t);
+static void dme1737_chmod_file(struct device*, struct attribute*, umode_t);
 
 static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
 		       const char *buf, size_t count)
@@ -1961,7 +1961,7 @@ static inline void dme1737_sio_outb(int sio_cip, int reg, int val)
 static int dme1737_i2c_get_features(int, struct dme1737_data*);
 
 static void dme1737_chmod_file(struct device *dev,
-			       struct attribute *attr, mode_t mode)
+			       struct attribute *attr, umode_t mode)
 {
 	if (sysfs_chmod_file(&dev->kobj, attr, mode)) {
 		dev_warn(dev, "Failed to change permissions of %s.\n",
@@ -1971,7 +1971,7 @@ static void dme1737_chmod_file(struct device *dev,
 
 static void dme1737_chmod_group(struct device *dev,
 				const struct attribute_group *group,
-				mode_t mode)
+				umode_t mode)
 {
 	struct attribute **attr;
 
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d4e6080b4b20..120c3adff6b0 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
  *
  */
 int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
-		     mode_t mode)
+		     umode_t mode)
 {
 	struct sysfs_dirent *sd;
 	struct iattr newattrs;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index e90eea7afb4e..0010009b2f00 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -133,7 +133,7 @@ int __must_check sysfs_create_file(struct kobject *kobj,
 int __must_check sysfs_create_files(struct kobject *kobj,
 				   const struct attribute **attr);
 int __must_check sysfs_chmod_file(struct kobject *kobj,
-				  const struct attribute *attr, mode_t mode);
+				  const struct attribute *attr, umode_t mode);
 void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr);
 void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr);
 
@@ -221,7 +221,7 @@ static inline int sysfs_create_files(struct kobject *kobj,
 }
 
 static inline int sysfs_chmod_file(struct kobject *kobj,
-				   const struct attribute *attr, mode_t mode)
+				   const struct attribute *attr, umode_t mode)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From f4ae40a6a50a98ac23d4b285f739455e926a473e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jul 2011 04:33:43 -0400
Subject: switch debugfs to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/debugfs.txt              | 24 +++++------
 arch/arm/mach-msm/smd_debug.c                      |  2 +-
 arch/s390/include/asm/debug.h                      |  4 +-
 arch/s390/kernel/debug.c                           |  8 ++--
 arch/x86/xen/debugfs.c                             |  2 +-
 arch/x86/xen/debugfs.h                             |  2 +-
 drivers/acpi/ec_sys.c                              |  2 +-
 drivers/mmc/card/mmc_test.c                        |  2 +-
 .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c    |  2 +-
 drivers/net/wireless/ath/carl9170/debug.c          |  2 +-
 drivers/net/wireless/libertas/debugfs.c            |  2 +-
 drivers/s390/block/dasd.c                          |  4 +-
 drivers/scsi/bfa/bfad_debugfs.c                    |  2 +-
 fs/debugfs/file.c                                  | 22 +++++------
 fs/debugfs/inode.c                                 | 14 +++----
 fs/ocfs2/cluster/netdebug.c                        |  2 +-
 include/linux/debugfs.h                            | 46 +++++++++++-----------
 include/linux/relay.h                              |  2 +-
 kernel/relay.c                                     |  2 +-
 kernel/trace/blktrace.c                            |  2 +-
 kernel/trace/trace.c                               |  2 +-
 kernel/trace/trace.h                               |  2 +-
 lib/fault-inject.c                                 |  8 ++--
 mm/failslab.c                                      |  2 +-
 mm/page_alloc.c                                    |  2 +-
 25 files changed, 82 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
index 742cc06e138f..9281a95d689f 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.txt
@@ -35,7 +35,7 @@ described below will work.
 
 The most general way to create a file within a debugfs directory is with:
 
-    struct dentry *debugfs_create_file(const char *name, mode_t mode,
+    struct dentry *debugfs_create_file(const char *name, umode_t mode,
 				       struct dentry *parent, void *data,
 				       const struct file_operations *fops);
 
@@ -53,13 +53,13 @@ actually necessary; the debugfs code provides a number of helper functions
 for simple situations.  Files containing a single integer value can be
 created with any of:
 
-    struct dentry *debugfs_create_u8(const char *name, mode_t mode,
+    struct dentry *debugfs_create_u8(const char *name, umode_t mode,
 				     struct dentry *parent, u8 *value);
-    struct dentry *debugfs_create_u16(const char *name, mode_t mode,
+    struct dentry *debugfs_create_u16(const char *name, umode_t mode,
 				      struct dentry *parent, u16 *value);
-    struct dentry *debugfs_create_u32(const char *name, mode_t mode,
+    struct dentry *debugfs_create_u32(const char *name, umode_t mode,
 				      struct dentry *parent, u32 *value);
-    struct dentry *debugfs_create_u64(const char *name, mode_t mode,
+    struct dentry *debugfs_create_u64(const char *name, umode_t mode,
 				      struct dentry *parent, u64 *value);
 
 These files support both reading and writing the given value; if a specific
@@ -67,13 +67,13 @@ file should not be written to, simply set the mode bits accordingly.  The
 values in these files are in decimal; if hexadecimal is more appropriate,
 the following functions can be used instead:
 
-    struct dentry *debugfs_create_x8(const char *name, mode_t mode,
+    struct dentry *debugfs_create_x8(const char *name, umode_t mode,
 				     struct dentry *parent, u8 *value);
-    struct dentry *debugfs_create_x16(const char *name, mode_t mode,
+    struct dentry *debugfs_create_x16(const char *name, umode_t mode,
 				      struct dentry *parent, u16 *value);
-    struct dentry *debugfs_create_x32(const char *name, mode_t mode,
+    struct dentry *debugfs_create_x32(const char *name, umode_t mode,
 				      struct dentry *parent, u32 *value);
-    struct dentry *debugfs_create_x64(const char *name, mode_t mode,
+    struct dentry *debugfs_create_x64(const char *name, umode_t mode,
 				      struct dentry *parent, u64 *value);
 
 These functions are useful as long as the developer knows the size of the
@@ -81,7 +81,7 @@ value to be exported.  Some types can have different widths on different
 architectures, though, complicating the situation somewhat.  There is a
 function meant to help out in one special case:
 
-    struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+    struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
 				         struct dentry *parent, 
 					 size_t *value);
 
@@ -90,7 +90,7 @@ a variable of type size_t.
 
 Boolean values can be placed in debugfs with:
 
-    struct dentry *debugfs_create_bool(const char *name, mode_t mode,
+    struct dentry *debugfs_create_bool(const char *name, umode_t mode,
 				       struct dentry *parent, u32 *value);
 
 A read on the resulting file will yield either Y (for non-zero values) or
@@ -104,7 +104,7 @@ Finally, a block of arbitrary binary data can be exported with:
 	unsigned long size;
     };
 
-    struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+    struct dentry *debugfs_create_blob(const char *name, umode_t mode,
 				       struct dentry *parent,
 				       struct debugfs_blob_wrapper *blob);
 
diff --git a/arch/arm/mach-msm/smd_debug.c b/arch/arm/mach-msm/smd_debug.c
index 8736afff82f3..0c56a5aaf588 100644
--- a/arch/arm/mach-msm/smd_debug.c
+++ b/arch/arm/mach-msm/smd_debug.c
@@ -215,7 +215,7 @@ static const struct file_operations debug_ops = {
 	.llseek = default_llseek,
 };
 
-static void debug_create(const char *name, mode_t mode,
+static void debug_create(const char *name, umode_t mode,
 			 struct dentry *dent,
 			 int (*fill)(char *buf, int max))
 {
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 18124b75a7ab..9d88db1f55d0 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -73,7 +73,7 @@ typedef struct debug_info {
 	struct dentry* debugfs_entries[DEBUG_MAX_VIEWS];
 	struct debug_view* views[DEBUG_MAX_VIEWS];	
 	char name[DEBUG_MAX_NAME_LEN];
-	mode_t mode;
+	umode_t mode;
 } debug_info_t;
 
 typedef int (debug_header_proc_t) (debug_info_t* id,
@@ -124,7 +124,7 @@ debug_info_t *debug_register(const char *name, int pages, int nr_areas,
                              int buf_size);
 
 debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
-				  int buf_size, mode_t mode, uid_t uid,
+				  int buf_size, umode_t mode, uid_t uid,
 				  gid_t gid);
 
 void debug_unregister(debug_info_t* id);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 5ad6bc078bfd..6848828b962e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -74,7 +74,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
 static int debug_open(struct inode *inode, struct file *file);
 static int debug_close(struct inode *inode, struct file *file);
 static debug_info_t *debug_info_create(const char *name, int pages_per_area,
-			int nr_areas, int buf_size, mode_t mode);
+			int nr_areas, int buf_size, umode_t mode);
 static void debug_info_get(debug_info_t *);
 static void debug_info_put(debug_info_t *);
 static int debug_prolog_level_fn(debug_info_t * id,
@@ -330,7 +330,7 @@ debug_info_free(debug_info_t* db_info){
 
 static debug_info_t*
 debug_info_create(const char *name, int pages_per_area, int nr_areas,
-		  int buf_size, mode_t mode)
+		  int buf_size, umode_t mode)
 {
 	debug_info_t* rc;
 
@@ -688,7 +688,7 @@ debug_close(struct inode *inode, struct file *file)
  */
 
 debug_info_t *debug_register_mode(const char *name, int pages_per_area,
-				  int nr_areas, int buf_size, mode_t mode,
+				  int nr_areas, int buf_size, umode_t mode,
 				  uid_t uid, gid_t gid)
 {
 	debug_info_t *rc = NULL;
@@ -1090,7 +1090,7 @@ debug_register_view(debug_info_t * id, struct debug_view *view)
 	int rc = 0;
 	int i;
 	unsigned long flags;
-	mode_t mode;
+	umode_t mode;
 	struct dentry *pde;
 
 	if (!id)
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index 7c0fedd98ea0..ef1db1900d86 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -109,7 +109,7 @@ static const struct file_operations u32_array_fops = {
 	.llseek = no_llseek,
 };
 
-struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
+struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
 					    struct dentry *parent,
 					    u32 *array, unsigned elements)
 {
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h
index e28132084832..78d25499be5b 100644
--- a/arch/x86/xen/debugfs.h
+++ b/arch/x86/xen/debugfs.h
@@ -3,7 +3,7 @@
 
 struct dentry * __init xen_init_debugfs(void);
 
-struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
+struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
 					    struct dentry *parent,
 					    u32 *array, unsigned elements);
 
diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c
index 6c47ae9793a7..b258cab9061c 100644
--- a/drivers/acpi/ec_sys.c
+++ b/drivers/acpi/ec_sys.c
@@ -105,7 +105,7 @@ int acpi_ec_add_debugfs(struct acpi_ec *ec, unsigned int ec_device_count)
 {
 	struct dentry *dev_dir;
 	char name[64];
-	mode_t mode = 0400;
+	umode_t mode = 0400;
 
 	if (ec_device_count == 0) {
 		acpi_ec_debugfs_dir = debugfs_create_dir("ec", NULL);
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index b038c4a9468b..e99bdc18002d 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -2949,7 +2949,7 @@ static void mmc_test_free_dbgfs_file(struct mmc_card *card)
 }
 
 static int __mmc_test_register_dbgfs_file(struct mmc_card *card,
-	const char *name, mode_t mode, const struct file_operations *fops)
+	const char *name, umode_t mode, const struct file_operations *fops)
 {
 	struct dentry *file = NULL;
 	struct mmc_test_dbgfs_file *df;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index da9072bfca8b..f5a24d99ef4f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2000,7 +2000,7 @@ static const struct file_operations interfaces_proc_fops = {
  */
 struct cxgb4vf_debugfs_entry {
 	const char *name;		/* name of debugfs node */
-	mode_t mode;			/* file system mode */
+	umode_t mode;			/* file system mode */
 	const struct file_operations *fops;
 };
 
diff --git a/drivers/net/wireless/ath/carl9170/debug.c b/drivers/net/wireless/ath/carl9170/debug.c
index de57f90e1d5f..3c164226687f 100644
--- a/drivers/net/wireless/ath/carl9170/debug.c
+++ b/drivers/net/wireless/ath/carl9170/debug.c
@@ -56,7 +56,7 @@ static int carl9170_debugfs_open(struct inode *inode, struct file *file)
 
 struct carl9170_debugfs_fops {
 	unsigned int read_bufsize;
-	mode_t attr;
+	umode_t attr;
 	char *(*read)(struct ar9170 *ar, char *buf, size_t bufsize,
 		      ssize_t *len);
 	ssize_t (*write)(struct ar9170 *aru, const char *buf, size_t size);
diff --git a/drivers/net/wireless/libertas/debugfs.c b/drivers/net/wireless/libertas/debugfs.c
index d8d8f0d0899f..c192671610fc 100644
--- a/drivers/net/wireless/libertas/debugfs.c
+++ b/drivers/net/wireless/libertas/debugfs.c
@@ -704,7 +704,7 @@ out_unlock:
 
 struct lbs_debugfs_files {
 	const char *name;
-	int perm;
+	umode_t perm;
 	struct file_operations fops;
 };
 
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 65894f05a801..42986d7bcf9d 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1073,7 +1073,7 @@ static const struct file_operations dasd_stats_global_fops = {
 static void dasd_profile_init(struct dasd_profile *profile,
 			      struct dentry *base_dentry)
 {
-	mode_t mode;
+	umode_t mode;
 	struct dentry *pde;
 
 	if (!base_dentry)
@@ -1112,7 +1112,7 @@ static void dasd_statistics_removeroot(void)
 
 static void dasd_statistics_createroot(void)
 {
-	mode_t mode;
+	umode_t mode;
 	struct dentry *pde;
 
 	dasd_debugfs_root_entry = NULL;
diff --git a/drivers/scsi/bfa/bfad_debugfs.c b/drivers/scsi/bfa/bfad_debugfs.c
index dee1a094c2c2..caca9b7c8309 100644
--- a/drivers/scsi/bfa/bfad_debugfs.c
+++ b/drivers/scsi/bfa/bfad_debugfs.c
@@ -472,7 +472,7 @@ static const struct file_operations bfad_debugfs_op_regwr = {
 
 struct bfad_debugfs_entry {
 	const char *name;
-	mode_t	mode;
+	umode_t	mode;
 	const struct file_operations *fops;
 };
 
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 90f76575c056..d5016606fb27 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -95,7 +95,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
  * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
-struct dentry *debugfs_create_u8(const char *name, mode_t mode,
+struct dentry *debugfs_create_u8(const char *name, umode_t mode,
 				 struct dentry *parent, u8 *value)
 {
 	/* if there are no write bits set, make read only */
@@ -147,7 +147,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
  * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
-struct dentry *debugfs_create_u16(const char *name, mode_t mode,
+struct dentry *debugfs_create_u16(const char *name, umode_t mode,
 				  struct dentry *parent, u16 *value)
 {
 	/* if there are no write bits set, make read only */
@@ -199,7 +199,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
  * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
-struct dentry *debugfs_create_u32(const char *name, mode_t mode,
+struct dentry *debugfs_create_u32(const char *name, umode_t mode,
 				 struct dentry *parent, u32 *value)
 {
 	/* if there are no write bits set, make read only */
@@ -252,7 +252,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
  * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
-struct dentry *debugfs_create_u64(const char *name, mode_t mode,
+struct dentry *debugfs_create_u64(const char *name, umode_t mode,
 				 struct dentry *parent, u64 *value)
 {
 	/* if there are no write bits set, make read only */
@@ -298,7 +298,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n
  * @value: a pointer to the variable that the file should read to and write
  *         from.
  */
-struct dentry *debugfs_create_x8(const char *name, mode_t mode,
+struct dentry *debugfs_create_x8(const char *name, umode_t mode,
 				 struct dentry *parent, u8 *value)
 {
 	/* if there are no write bits set, make read only */
@@ -322,7 +322,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x8);
  * @value: a pointer to the variable that the file should read to and write
  *         from.
  */
-struct dentry *debugfs_create_x16(const char *name, mode_t mode,
+struct dentry *debugfs_create_x16(const char *name, umode_t mode,
 				 struct dentry *parent, u16 *value)
 {
 	/* if there are no write bits set, make read only */
@@ -346,7 +346,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x16);
  * @value: a pointer to the variable that the file should read to and write
  *         from.
  */
-struct dentry *debugfs_create_x32(const char *name, mode_t mode,
+struct dentry *debugfs_create_x32(const char *name, umode_t mode,
 				 struct dentry *parent, u32 *value)
 {
 	/* if there are no write bits set, make read only */
@@ -370,7 +370,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x32);
  * @value: a pointer to the variable that the file should read to and write
  *         from.
  */
-struct dentry *debugfs_create_x64(const char *name, mode_t mode,
+struct dentry *debugfs_create_x64(const char *name, umode_t mode,
 				 struct dentry *parent, u64 *value)
 {
 	return debugfs_create_file(name, mode, parent, value, &fops_x64);
@@ -401,7 +401,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set,
  * @value: a pointer to the variable that the file should read to and write
  *         from.
  */
-struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
 				     struct dentry *parent, size_t *value)
 {
 	return debugfs_create_file(name, mode, parent, value, &fops_size_t);
@@ -473,7 +473,7 @@ static const struct file_operations fops_bool = {
  * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
-struct dentry *debugfs_create_bool(const char *name, mode_t mode,
+struct dentry *debugfs_create_bool(const char *name, umode_t mode,
 				   struct dentry *parent, u32 *value)
 {
 	return debugfs_create_file(name, mode, parent, value, &fops_bool);
@@ -518,7 +518,7 @@ static const struct file_operations fops_blob = {
  * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
-struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+struct dentry *debugfs_create_blob(const char *name, umode_t mode,
 				   struct dentry *parent,
 				   struct debugfs_blob_wrapper *blob)
 {
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index c9dc08d0c100..956d5ddddf6e 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -30,7 +30,7 @@ static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
 
-static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev,
+static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev_t dev,
 				       void *data, const struct file_operations *fops)
 
 {
@@ -69,7 +69,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
 
 /* SMP-safe */
 static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
-			 int mode, dev_t dev, void *data,
+			 umode_t mode, dev_t dev, void *data,
 			 const struct file_operations *fops)
 {
 	struct inode *inode;
@@ -87,7 +87,7 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
 	return error;
 }
 
-static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode,
+static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode,
 			 void *data, const struct file_operations *fops)
 {
 	int res;
@@ -101,14 +101,14 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode,
 	return res;
 }
 
-static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode,
+static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode,
 			void *data, const struct file_operations *fops)
 {
 	mode = (mode & S_IALLUGO) | S_IFLNK;
 	return debugfs_mknod(dir, dentry, mode, 0, data, fops);
 }
 
-static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 			  void *data, const struct file_operations *fops)
 {
 	int res;
@@ -146,7 +146,7 @@ static struct file_system_type debug_fs_type = {
 	.kill_sb =	kill_litter_super,
 };
 
-static int debugfs_create_by_name(const char *name, mode_t mode,
+static int debugfs_create_by_name(const char *name, umode_t mode,
 				  struct dentry *parent,
 				  struct dentry **dentry,
 				  void *data,
@@ -214,7 +214,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
  * If debugfs is not enabled in the kernel, the value -%ENODEV will be
  * returned.
  */
-struct dentry *debugfs_create_file(const char *name, mode_t mode,
+struct dentry *debugfs_create_file(const char *name, umode_t mode,
 				   struct dentry *parent, void *data,
 				   const struct file_operations *fops)
 {
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index dc45deb19e68..73ba81928bce 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -553,7 +553,7 @@ void o2net_debugfs_exit(void)
 
 int o2net_debugfs_init(void)
 {
-	mode_t mode = S_IFREG|S_IRUSR;
+	umode_t mode = S_IFREG|S_IRUSR;
 
 	o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
 	if (o2net_dentry)
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index e7d9b20ddc5b..d1ac841e8dc7 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -34,7 +34,7 @@ extern struct dentry *arch_debugfs_dir;
 extern const struct file_operations debugfs_file_operations;
 extern const struct inode_operations debugfs_link_operations;
 
-struct dentry *debugfs_create_file(const char *name, mode_t mode,
+struct dentry *debugfs_create_file(const char *name, umode_t mode,
 				   struct dentry *parent, void *data,
 				   const struct file_operations *fops);
 
@@ -49,28 +49,28 @@ void debugfs_remove_recursive(struct dentry *dentry);
 struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
                 struct dentry *new_dir, const char *new_name);
 
-struct dentry *debugfs_create_u8(const char *name, mode_t mode,
+struct dentry *debugfs_create_u8(const char *name, umode_t mode,
 				 struct dentry *parent, u8 *value);
-struct dentry *debugfs_create_u16(const char *name, mode_t mode,
+struct dentry *debugfs_create_u16(const char *name, umode_t mode,
 				  struct dentry *parent, u16 *value);
-struct dentry *debugfs_create_u32(const char *name, mode_t mode,
+struct dentry *debugfs_create_u32(const char *name, umode_t mode,
 				  struct dentry *parent, u32 *value);
-struct dentry *debugfs_create_u64(const char *name, mode_t mode,
+struct dentry *debugfs_create_u64(const char *name, umode_t mode,
 				  struct dentry *parent, u64 *value);
-struct dentry *debugfs_create_x8(const char *name, mode_t mode,
+struct dentry *debugfs_create_x8(const char *name, umode_t mode,
 				 struct dentry *parent, u8 *value);
-struct dentry *debugfs_create_x16(const char *name, mode_t mode,
+struct dentry *debugfs_create_x16(const char *name, umode_t mode,
 				  struct dentry *parent, u16 *value);
-struct dentry *debugfs_create_x32(const char *name, mode_t mode,
+struct dentry *debugfs_create_x32(const char *name, umode_t mode,
 				  struct dentry *parent, u32 *value);
-struct dentry *debugfs_create_x64(const char *name, mode_t mode,
+struct dentry *debugfs_create_x64(const char *name, umode_t mode,
 				  struct dentry *parent, u64 *value);
-struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
 				     struct dentry *parent, size_t *value);
-struct dentry *debugfs_create_bool(const char *name, mode_t mode,
+struct dentry *debugfs_create_bool(const char *name, umode_t mode,
 				  struct dentry *parent, u32 *value);
 
-struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+struct dentry *debugfs_create_blob(const char *name, umode_t mode,
 				  struct dentry *parent,
 				  struct debugfs_blob_wrapper *blob);
 
@@ -86,7 +86,7 @@ bool debugfs_initialized(void);
  * want to duplicate the design decision mistakes of procfs and devfs again.
  */
 
-static inline struct dentry *debugfs_create_file(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_file(const char *name, umode_t mode,
 					struct dentry *parent, void *data,
 					const struct file_operations *fops)
 {
@@ -118,70 +118,70 @@ static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentr
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_u8(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_u8(const char *name, umode_t mode,
 					       struct dentry *parent,
 					       u8 *value)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_u16(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_u16(const char *name, umode_t mode,
 						struct dentry *parent,
 						u16 *value)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_u32(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_u32(const char *name, umode_t mode,
 						struct dentry *parent,
 						u32 *value)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_u64(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_u64(const char *name, umode_t mode,
 						struct dentry *parent,
 						u64 *value)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_x8(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_x8(const char *name, umode_t mode,
 					       struct dentry *parent,
 					       u8 *value)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_x16(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_x16(const char *name, umode_t mode,
 						struct dentry *parent,
 						u16 *value)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_x32(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_x32(const char *name, umode_t mode,
 						struct dentry *parent,
 						u32 *value)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
 				     struct dentry *parent,
 				     size_t *value)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_bool(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode,
 						 struct dentry *parent,
 						 u32 *value)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode,
 				  struct dentry *parent,
 				  struct debugfs_blob_wrapper *blob)
 {
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 14a86bc7102b..a822fd71fd64 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -144,7 +144,7 @@ struct rchan_callbacks
 	 */
 	struct dentry *(*create_buf_file)(const char *filename,
 					  struct dentry *parent,
-					  int mode,
+					  umode_t mode,
 					  struct rchan_buf *buf,
 					  int *is_global);
 
diff --git a/kernel/relay.c b/kernel/relay.c
index 226fade4d727..4335e1d7ee2d 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -302,7 +302,7 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf,
  */
 static struct dentry *create_buf_file_default_callback(const char *filename,
 						       struct dentry *parent,
-						       int mode,
+						       umode_t mode,
 						       struct rchan_buf *buf,
 						       int *is_global)
 {
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 16fc34a0806f..cdea7b56b0c9 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -402,7 +402,7 @@ static int blk_remove_buf_file_callback(struct dentry *dentry)
 
 static struct dentry *blk_create_buf_file_callback(const char *filename,
 						   struct dentry *parent,
-						   int mode,
+						   umode_t mode,
 						   struct rchan_buf *buf,
 						   int *is_global)
 {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f2bd275bb60f..660b069a0f99 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4385,7 +4385,7 @@ static const struct file_operations trace_options_core_fops = {
 };
 
 struct dentry *trace_create_file(const char *name,
-				 mode_t mode,
+				 umode_t mode,
 				 struct dentry *parent,
 				 void *data,
 				 const struct file_operations *fops)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 092e1f8d18dc..0154c0b850de 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -312,7 +312,7 @@ void tracing_reset_current(int cpu);
 void tracing_reset_current_online_cpus(void);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *trace_create_file(const char *name,
-				 mode_t mode,
+				 umode_t mode,
 				 struct dentry *parent,
 				 void *data,
 				 const struct file_operations *fops);
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4f7554025e30..b4801f51b607 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -149,7 +149,7 @@ static int debugfs_ul_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_ul, debugfs_ul_get, debugfs_ul_set, "%llu\n");
 
-static struct dentry *debugfs_create_ul(const char *name, mode_t mode,
+static struct dentry *debugfs_create_ul(const char *name, umode_t mode,
 				struct dentry *parent, unsigned long *value)
 {
 	return debugfs_create_file(name, mode, parent, value, &fops_ul);
@@ -169,7 +169,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_stacktrace_depth, debugfs_ul_get,
 			debugfs_stacktrace_depth_set, "%llu\n");
 
 static struct dentry *debugfs_create_stacktrace_depth(
-	const char *name, mode_t mode,
+	const char *name, umode_t mode,
 	struct dentry *parent, unsigned long *value)
 {
 	return debugfs_create_file(name, mode, parent, value,
@@ -193,7 +193,7 @@ static int debugfs_atomic_t_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
 			debugfs_atomic_t_set, "%lld\n");
 
-static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
+static struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode,
 				struct dentry *parent, atomic_t *value)
 {
 	return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
@@ -202,7 +202,7 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
 struct dentry *fault_create_debugfs_attr(const char *name,
 			struct dentry *parent, struct fault_attr *attr)
 {
-	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 	struct dentry *dir;
 
 	dir = debugfs_create_dir(name, parent);
diff --git a/mm/failslab.c b/mm/failslab.c
index 0dd7b8fec71c..fefaabaab76d 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -35,7 +35,7 @@ __setup("failslab=", setup_failslab);
 static int __init failslab_debugfs_init(void)
 {
 	struct dentry *dir;
-	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 
 	dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
 	if (IS_ERR(dir))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2b8ba3aebf6e..99930ec7d140 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1408,7 +1408,7 @@ static int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 
 static int __init fail_page_alloc_debugfs(void)
 {
-	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 	struct dentry *dir;
 
 	dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
-- 
cgit v1.2.3


From 439475140bed762c04567c325d48409862341ae4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Jul 2011 00:05:26 -0400
Subject: configfs: convert to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/configfs/configfs.txt | 2 +-
 fs/configfs/configfs_internal.h                 | 4 ++--
 fs/configfs/inode.c                             | 6 +++---
 include/linux/configfs.h                        | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
index dd57bb6bb390..b40fec9d3f53 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -192,7 +192,7 @@ attribute value uses the store_attribute() method.
 	struct configfs_attribute {
 		char                    *ca_name;
 		struct module           *ca_owner;
-		mode_t                  ca_mode;
+		umode_t                  ca_mode;
 	};
 
 When a config_item wants an attribute to appear as a file in the item's
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 82bda8fdfc1c..ede857d20a04 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -63,8 +63,8 @@ extern struct kmem_cache *configfs_dir_cachep;
 
 extern int configfs_is_root(struct config_item *item);
 
-extern struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent *);
-extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *));
+extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *);
+extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *));
 extern int configfs_inode_init(void);
 extern void configfs_inode_exit(void);
 
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 9d8715c45f25..3ee36d418863 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -116,7 +116,7 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
 	return error;
 }
 
-static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
+static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
 {
 	inode->i_mode = mode;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -132,7 +132,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
 	inode->i_ctime = iattr->ia_ctime;
 }
 
-struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
+struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent * sd)
 {
 	struct inode * inode = new_inode(configfs_sb);
 	if (inode) {
@@ -185,7 +185,7 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
 
 #endif /* CONFIG_LOCKDEP */
 
-int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
+int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *))
 {
 	int error = 0;
 	struct inode * inode = NULL;
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 3081c58d696e..34025df61829 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -124,7 +124,7 @@ extern struct config_item *config_group_find_item(struct config_group *,
 struct configfs_attribute {
 	const char		*ca_name;
 	struct module 		*ca_owner;
-	mode_t			ca_mode;
+	umode_t			ca_mode;
 };
 
 /*
-- 
cgit v1.2.3


From 69b34f3ab30836bb736b5108f40bf76de9f656f3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 02:46:57 -0400
Subject: ext3: propagate umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext3/ialloc.c        | 2 +-
 include/linux/ext3_fs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 5c866e06e7ab..92cc86dfa23d 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -371,7 +371,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
  * group to find a free inode.
  */
 struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
-			     const struct qstr *qstr, int mode)
+			     const struct qstr *qstr, umode_t mode)
 {
 	struct super_block *sb;
 	struct buffer_head *bitmap_bh = NULL;
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index dec99116a0e4..f957085d40ed 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -884,7 +884,7 @@ extern int ext3fs_dirhash(const char *name, int len, struct
 
 /* ialloc.c */
 extern struct inode * ext3_new_inode (handle_t *, struct inode *,
-				      const struct qstr *, int);
+				      const struct qstr *, umode_t);
 extern void ext3_free_inode (handle_t *, struct inode *);
 extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
 extern unsigned long ext3_count_free_inodes (struct super_block *);
-- 
cgit v1.2.3


From 8e0718924e7d7eaf6104e54aeaeda477570e1e06 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 02:50:53 -0400
Subject: reiserfs: propagate umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/inode.c         | 2 +-
 fs/reiserfs/namei.c         | 2 +-
 include/linux/reiserfs_fs.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 950f13af0951..9e8cd5acd79c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1766,7 +1766,7 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
    for the fresh inode.  This can only be done outside a transaction, so
    if we return non-zero, we also end the transaction.  */
 int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
-		       struct inode *dir, int mode, const char *symname,
+		       struct inode *dir, umode_t mode, const char *symname,
 		       /* 0 for regular, EMTRY_DIR_SIZE for dirs,
 		          strlen (symname) for symlinks) */
 		       loff_t i_size, struct dentry *dentry,
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index a8614bd7cc8d..146378865239 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -559,7 +559,7 @@ static int drop_new_inode(struct inode *inode)
 ** outside of a transaction, so we had to pull some bits of
 ** reiserfs_new_inode out into this func.
 */
-static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
+static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
 {
 	/* Make inode invalid - just in case we are going to drop it before
 	 * the initialization happens */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 96d465f8d3e6..26be28fd7b76 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2056,7 +2056,7 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key);
 
 struct reiserfs_security_handle;
 int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
-		       struct inode *dir, int mode,
+		       struct inode *dir, umode_t mode,
 		       const char *symname, loff_t i_size,
 		       struct dentry *dentry, struct inode *inode,
 		       struct reiserfs_security_handle *security);
-- 
cgit v1.2.3


From a5e7ed3287e45f2eafbcf9e7e6fdc5a0191acf40 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 01:55:55 -0400
Subject: cgroup: propagate mode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/cgroup.h |  2 +-
 kernel/cgroup.c        | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1b7f9d525013..a17becc36ca1 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -319,7 +319,7 @@ struct cftype {
 	 * If not 0, file mode is set to this value, otherwise it will
 	 * be figured out automatically
 	 */
-	mode_t mode;
+	umode_t mode;
 
 	/*
 	 * If non-zero, defines the maximum length of string that can
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b37a0ea55114..86ebacfd9431 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -775,7 +775,7 @@ static struct backing_dev_info cgroup_backing_dev_info = {
 static int alloc_css_id(struct cgroup_subsys *ss,
 			struct cgroup *parent, struct cgroup *child);
 
-static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
+static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
 {
 	struct inode *inode = new_inode(sb);
 
@@ -2585,7 +2585,7 @@ static inline struct cftype *__file_cft(struct file *file)
 	return __d_cft(file->f_dentry);
 }
 
-static int cgroup_create_file(struct dentry *dentry, mode_t mode,
+static int cgroup_create_file(struct dentry *dentry, umode_t mode,
 				struct super_block *sb)
 {
 	struct inode *inode;
@@ -2626,7 +2626,7 @@ static int cgroup_create_file(struct dentry *dentry, mode_t mode,
  * @mode: mode to set on new directory.
  */
 static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
-				mode_t mode)
+				umode_t mode)
 {
 	struct dentry *parent;
 	int error = 0;
@@ -2653,9 +2653,9 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
  * returns S_IRUGO if it has only a read handler
  * returns S_IWUSR if it has only a write hander
  */
-static mode_t cgroup_file_mode(const struct cftype *cft)
+static umode_t cgroup_file_mode(const struct cftype *cft)
 {
-	mode_t mode = 0;
+	umode_t mode = 0;
 
 	if (cft->mode)
 		return cft->mode;
@@ -2678,7 +2678,7 @@ int cgroup_add_file(struct cgroup *cgrp,
 	struct dentry *dir = cgrp->dentry;
 	struct dentry *dentry;
 	int error;
-	mode_t mode;
+	umode_t mode;
 
 	char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
 	if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
@@ -3752,7 +3752,7 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
  * Must be called with the mutex on the parent inode held
  */
 static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
-			     mode_t mode)
+			     umode_t mode)
 {
 	struct cgroup *cgrp;
 	struct cgroupfs_root *root = parent->root;
-- 
cgit v1.2.3


From 64f1426f3c4f8dde9ac9bf3f3b19b88d17f2bae6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Jul 2011 00:35:13 -0400
Subject: sunrpc: propagate umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sunrpc/cache.h       | 2 +-
 include/linux/sunrpc/rpc_pipe_fs.h | 2 +-
 net/sunrpc/cache.c                 | 2 +-
 net/sunrpc/rpc_pipe.c              | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 5efd8cef389e..57531f8e5956 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -203,7 +203,7 @@ extern void cache_unregister(struct cache_detail *cd);
 extern void cache_unregister_net(struct cache_detail *cd, struct net *net);
 
 extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
-					mode_t, struct cache_detail *);
+					umode_t, struct cache_detail *);
 extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
 
 extern void qword_add(char **bpp, int *lp, char *str);
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index e4ea43058d8f..2bb03d77375a 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -55,7 +55,7 @@ extern int rpc_remove_client_dir(struct dentry *);
 struct cache_detail;
 extern struct dentry *rpc_create_cache_dir(struct dentry *,
 					   struct qstr *,
-					   mode_t umode,
+					   umode_t umode,
 					   struct cache_detail *);
 extern void rpc_remove_cache_dir(struct dentry *);
 
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 72ad836e4fe0..03b56bc3b659 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1778,7 +1778,7 @@ const struct file_operations cache_flush_operations_pipefs = {
 };
 
 int sunrpc_cache_register_pipefs(struct dentry *parent,
-				 const char *name, mode_t umode,
+				 const char *name, umode_t umode,
 				 struct cache_detail *cd)
 {
 	struct qstr q;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 60564bcb8067..63a7a7add21e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -953,7 +953,7 @@ static void rpc_cachedir_depopulate(struct dentry *dentry)
 }
 
 struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name,
-				    mode_t umode, struct cache_detail *cd)
+				    umode_t umode, struct cache_detail *cd)
 {
 	return rpc_mkdir_populate(parent, name, umode, NULL,
 			rpc_cachedir_populate, cd);
-- 
cgit v1.2.3


From 09208d150b5cda009b666238a7102cb45ecec2ee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 03:15:03 -0400
Subject: shmem, ramfs: propagate umode_t, open-coded S_ISREG

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/shmem_fs.h | 2 +-
 mm/shmem.c               | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 9291ac3cc627..e4c711c6f321 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -30,7 +30,7 @@ struct shmem_sb_info {
 	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
 	uid_t uid;		    /* Mount uid for root directory */
 	gid_t gid;		    /* Mount gid for root directory */
-	mode_t mode;		    /* Mount mode for root directory */
+	umode_t mode;		    /* Mount mode for root directory */
 	struct mempolicy *mpol;     /* default memory policy for mappings */
 };
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 4000f370948c..86a19efc36fb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1092,7 +1092,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 }
 
 static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
-				     int mode, dev_t dev, unsigned long flags)
+				     umode_t mode, dev_t dev, unsigned long flags)
 {
 	struct inode *inode;
 	struct shmem_inode_info *info;
@@ -2128,7 +2128,7 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	if (sbinfo->max_inodes != shmem_default_max_inodes())
 		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
 	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
-		seq_printf(seq, ",mode=%03o", sbinfo->mode);
+		seq_printf(seq, ",mode=%03ho", sbinfo->mode);
 	if (sbinfo->uid != 0)
 		seq_printf(seq, ",uid=%u", sbinfo->uid);
 	if (sbinfo->gid != 0)
@@ -2239,7 +2239,7 @@ static void shmem_destroy_callback(struct rcu_head *head)
 
 static void shmem_destroy_inode(struct inode *inode)
 {
-	if ((inode->i_mode & S_IFMT) == S_IFREG)
+	if (S_ISREG(inode->i_mode))
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
 	call_rcu(&inode->i_rcu, shmem_destroy_callback);
 }
-- 
cgit v1.2.3


From 632861f05a8e5878a267d173000880ceb608b56e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 03:16:55 -0400
Subject: pohmelfs: propagate umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/pohmelfs/dir.c   | 7 ++++---
 drivers/staging/pohmelfs/netfs.h | 2 +-
 fs/ramfs/inode.c                 | 2 +-
 include/linux/ramfs.h            | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/pohmelfs/dir.c b/drivers/staging/pohmelfs/dir.c
index c33e959b6efe..2ee4491b7136 100644
--- a/drivers/staging/pohmelfs/dir.c
+++ b/drivers/staging/pohmelfs/dir.c
@@ -590,13 +590,13 @@ out:
  * during writeback for given inode.
  */
 struct pohmelfs_inode *pohmelfs_create_entry_local(struct pohmelfs_sb *psb,
-	struct pohmelfs_inode *parent, struct qstr *str, u64 start, int mode)
+	struct pohmelfs_inode *parent, struct qstr *str, u64 start, umode_t mode)
 {
 	struct pohmelfs_inode *npi;
 	int err = -ENOMEM;
 	struct netfs_inode_info info;
 
-	dprintk("%s: name: '%s', mode: %o, start: %llu.\n",
+	dprintk("%s: name: '%s', mode: %ho, start: %llu.\n",
 			__func__, str->name, mode, start);
 
 	info.mode = mode;
@@ -630,7 +630,8 @@ err_out_unlock:
 /*
  * Create local object and bind it to dentry.
  */
-static int pohmelfs_create_entry(struct inode *dir, struct dentry *dentry, u64 start, int mode)
+static int pohmelfs_create_entry(struct inode *dir, struct dentry *dentry,
+				 u64 start, umode_t mode)
 {
 	struct pohmelfs_sb *psb = POHMELFS_SB(dir->i_sb);
 	struct pohmelfs_inode *npi, *parent;
diff --git a/drivers/staging/pohmelfs/netfs.h b/drivers/staging/pohmelfs/netfs.h
index 985b6b755d5d..f26894f2a57f 100644
--- a/drivers/staging/pohmelfs/netfs.h
+++ b/drivers/staging/pohmelfs/netfs.h
@@ -776,7 +776,7 @@ struct pohmelfs_name *pohmelfs_search_hash(struct pohmelfs_inode *pi, u32 hash);
 void pohmelfs_inode_del_inode(struct pohmelfs_sb *psb, struct pohmelfs_inode *pi);
 
 struct pohmelfs_inode *pohmelfs_create_entry_local(struct pohmelfs_sb *psb,
-	struct pohmelfs_inode *parent, struct qstr *str, u64 start, int mode);
+	struct pohmelfs_inode *parent, struct qstr *str, u64 start, umode_t mode);
 
 int pohmelfs_write_create_inode(struct pohmelfs_inode *pi);
 
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 145680e9d581..aec766abe3af 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -52,7 +52,7 @@ static struct backing_dev_info ramfs_backing_dev_info = {
 };
 
 struct inode *ramfs_get_inode(struct super_block *sb,
-				const struct inode *dir, int mode, dev_t dev)
+				const struct inode *dir, umode_t mode, dev_t dev)
 {
 	struct inode * inode = new_inode(sb);
 
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 3a8f0c9b2933..5bf5500db83d 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -2,7 +2,7 @@
 #define _LINUX_RAMFS_H
 
 struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir,
-	 int mode, dev_t dev);
+	 umode_t mode, dev_t dev);
 extern struct dentry *ramfs_mount(struct file_system_type *fs_type,
 	 int flags, const char *dev_name, void *data);
 
-- 
cgit v1.2.3


From 62bb109170375f82eb3c51c8080b72954f02dca7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jul 2011 23:20:18 -0400
Subject: switch inode_init_owner() to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 2 +-
 include/linux/fs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 24d02907e196..961355d00e38 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1646,7 +1646,7 @@ EXPORT_SYMBOL(init_special_inode);
  * @mode: mode of the new inode
  */
 void inode_init_owner(struct inode *inode, const struct inode *dir,
-			mode_t mode)
+			umode_t mode)
 {
 	inode->i_uid = current_fsuid();
 	if (dir && dir->i_mode & S_ISGID) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b89eef1d1752..9db9f6e6c98b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1534,7 +1534,7 @@ extern void dentry_unhash(struct dentry *dentry);
  * VFS file helper functions.
  */
 extern void inode_init_owner(struct inode *inode, const struct inode *dir,
-			mode_t mode);
+			umode_t mode);
 /*
  * VFS FS_IOC_FIEMAP helper definitions.
  */
-- 
cgit v1.2.3


From 8d334acdd2c1f57c7a574c6f24d08e4c95582ff0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jul 2011 23:21:59 -0400
Subject: switch is_sxid() to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/attr.c          | 4 ++--
 include/linux/fs.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/attr.c b/fs/attr.c
index 7ee7ba488313..95053ad8abcc 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -166,7 +166,7 @@ EXPORT_SYMBOL(setattr_copy);
 int notify_change(struct dentry * dentry, struct iattr * attr)
 {
 	struct inode *inode = dentry->d_inode;
-	mode_t mode = inode->i_mode;
+	umode_t mode = inode->i_mode;
 	int error;
 	struct timespec now;
 	unsigned int ia_valid = attr->ia_valid;
@@ -177,7 +177,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 	}
 
 	if ((ia_valid & ATTR_MODE)) {
-		mode_t amode = attr->ia_mode;
+		umode_t amode = attr->ia_mode;
 		/* Flag setting protected by i_mutex */
 		if (is_sxid(amode))
 			inode->i_flags &= ~S_NOSEC;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9db9f6e6c98b..9d02fab420c6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2690,7 +2690,7 @@ int __init get_filesystem_list(char *buf);
 #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
 					    (flag & __FMODE_NONOTIFY)))
 
-static inline int is_sxid(mode_t mode)
+static inline int is_sxid(umode_t mode)
 {
 	return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
 }
-- 
cgit v1.2.3


From 36fcb589e752fa9c71f8a447db94126d102fd937 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 03:47:31 -0400
Subject: sysctl: use umode_t for table permissions

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sysctl.h | 2 +-
 kernel/sched.c         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 703cfa33a3ca..bb9127dd814b 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -1038,7 +1038,7 @@ struct ctl_table
 	const char *procname;		/* Text ID for /proc/sys, or zero */
 	void *data;
 	int maxlen;
-	mode_t mode;
+	umode_t mode;
 	struct ctl_table *child;
 	struct ctl_table *parent;	/* Automatically set */
 	proc_handler *proc_handler;	/* Callback for text formatting */
diff --git a/kernel/sched.c b/kernel/sched.c
index d6b149ccf925..e64f45741e0e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6480,7 +6480,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
 static void
 set_table_entry(struct ctl_table *entry,
 		const char *procname, void *data, int maxlen,
-		mode_t mode, proc_handler *proc_handler)
+		umode_t mode, proc_handler *proc_handler)
 {
 	entry->procname = procname;
 	entry->data = data;
-- 
cgit v1.2.3


From 49f0a0767211d3076974e59a26f36b567cbe8621 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 04:22:01 -0400
Subject: switch sys_chmod()/sys_fchmod()/sys_fchmodat() to umode_t

SYSCALLx magic should take care of things, according to Linus...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c                | 6 +++---
 include/linux/syscalls.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index 4ef8d868a448..834e3e1adeb9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -468,7 +468,7 @@ out_unlock:
 	return error;
 }
 
-SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
 {
 	struct file * file;
 	int err = -EBADF;
@@ -482,7 +482,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
 	return err;
 }
 
-SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
+SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
 {
 	struct path path;
 	int error;
@@ -495,7 +495,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
 	return error;
 }
 
-SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode)
+SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
 {
 	return sys_fchmodat(AT_FDCWD, filename, mode);
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b3c16d8a6383..e1a4b9b81cf2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -483,8 +483,8 @@ asmlinkage long sys_symlink(const char __user *old, const char __user *new);
 asmlinkage long sys_unlink(const char __user *pathname);
 asmlinkage long sys_rename(const char __user *oldname,
 				const char __user *newname);
-asmlinkage long sys_chmod(const char __user *filename, mode_t mode);
-asmlinkage long sys_fchmod(unsigned int fd, mode_t mode);
+asmlinkage long sys_chmod(const char __user *filename, umode_t mode);
+asmlinkage long sys_fchmod(unsigned int fd, umode_t mode);
 
 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg);
 #if BITS_PER_LONG == 32
@@ -769,7 +769,7 @@ asmlinkage long sys_futimesat(int dfd, const char __user *filename,
 			      struct timeval __user *utimes);
 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode);
 asmlinkage long sys_fchmodat(int dfd, const char __user * filename,
-			     mode_t mode);
+			     umode_t mode);
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag);
 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
-- 
cgit v1.2.3


From 910f4ecef3f67714ebff69d0bc34313e48afaed2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 04:25:58 -0400
Subject: switch security_path_chmod() to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h | 6 +++---
 security/apparmor/lsm.c  | 2 +-
 security/capability.c    | 2 +-
 security/security.c      | 2 +-
 security/tomoyo/tomoyo.c | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 0e5aeb86dfc4..f2c1fd7978a5 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1436,7 +1436,7 @@ struct security_operations {
 	int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
 			    struct path *new_dir, struct dentry *new_dentry);
 	int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt,
-			   mode_t mode);
+			   umode_t mode);
 	int (*path_chown) (struct path *path, uid_t uid, gid_t gid);
 	int (*path_chroot) (struct path *path);
 #endif
@@ -2867,7 +2867,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
 			 struct path *new_dir, struct dentry *new_dentry);
 int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			mode_t mode);
+			umode_t mode);
 int security_path_chown(struct path *path, uid_t uid, gid_t gid);
 int security_path_chroot(struct path *path);
 #else	/* CONFIG_SECURITY_PATH */
@@ -2921,7 +2921,7 @@ static inline int security_path_rename(struct path *old_dir,
 
 static inline int security_path_chmod(struct dentry *dentry,
 				      struct vfsmount *mnt,
-				      mode_t mode)
+				      umode_t mode)
 {
 	return 0;
 }
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 37832026e58a..afbe49822bed 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -345,7 +345,7 @@ static int apparmor_path_rename(struct path *old_dir, struct dentry *old_dentry,
 }
 
 static int apparmor_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			       mode_t mode)
+			       umode_t mode)
 {
 	if (!mediated_filesystem(dentry->d_inode))
 		return 0;
diff --git a/security/capability.c b/security/capability.c
index 9def035cd572..4f24bee49f26 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -280,7 +280,7 @@ static int cap_path_truncate(struct path *path)
 }
 
 static int cap_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			  mode_t mode)
+			  umode_t mode)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 8cc0f0caa640..3635a13cd4ab 100644
--- a/security/security.c
+++ b/security/security.c
@@ -455,7 +455,7 @@ int security_path_truncate(struct path *path)
 }
 
 int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			mode_t mode)
+			umode_t mode)
 {
 	if (unlikely(IS_PRIVATE(dentry->d_inode)))
 		return 0;
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 4b327b691745..a4b840ea0078 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -360,7 +360,7 @@ static int tomoyo_file_ioctl(struct file *file, unsigned int cmd,
  * Returns 0 on success, negative value otherwise.
  */
 static int tomoyo_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			     mode_t mode)
+			     umode_t mode)
 {
 	struct path path = { mnt, dentry };
 	return tomoyo_path_number_perm(TOMOYO_TYPE_CHMOD, &path,
-- 
cgit v1.2.3


From 52ef0c042bf06f6aef382fade175075627beebc1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 04:30:04 -0400
Subject: switch securityfs_create_file() to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h        |  4 ++--
 security/apparmor/apparmorfs.c  |  2 +-
 security/inode.c                | 12 ++++++------
 security/tomoyo/securityfs_if.c |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index f2c1fd7978a5..fab659edf11a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -3010,7 +3010,7 @@ static inline void security_audit_rule_free(void *lsmrule)
 
 #ifdef CONFIG_SECURITYFS
 
-extern struct dentry *securityfs_create_file(const char *name, mode_t mode,
+extern struct dentry *securityfs_create_file(const char *name, umode_t mode,
 					     struct dentry *parent, void *data,
 					     const struct file_operations *fops);
 extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent);
@@ -3025,7 +3025,7 @@ static inline struct dentry *securityfs_create_dir(const char *name,
 }
 
 static inline struct dentry *securityfs_create_file(const char *name,
-						    mode_t mode,
+						    umode_t mode,
 						    struct dentry *parent,
 						    void *data,
 						    const struct file_operations *fops)
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 69ddb47787b2..e39df6d43779 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -165,7 +165,7 @@ static void __init aafs_remove(const char *name)
  *
  * Used aafs_remove to remove entries created with this fn.
  */
-static int __init aafs_create(const char *name, int mask,
+static int __init aafs_create(const char *name, umode_t mask,
 			      const struct file_operations *fops)
 {
 	struct dentry *dentry;
diff --git a/security/inode.c b/security/inode.c
index bfe02e68f92e..90a70a67d835 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -56,7 +56,7 @@ static const struct file_operations default_file_ops = {
 	.llseek =	noop_llseek,
 };
 
-static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev)
+static struct inode *get_inode(struct super_block *sb, umode_t mode, dev_t dev)
 {
 	struct inode *inode = new_inode(sb);
 
@@ -85,7 +85,7 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev)
 
 /* SMP-safe */
 static int mknod(struct inode *dir, struct dentry *dentry,
-			 int mode, dev_t dev)
+			 umode_t mode, dev_t dev)
 {
 	struct inode *inode;
 	int error = -ENOMEM;
@@ -102,7 +102,7 @@ static int mknod(struct inode *dir, struct dentry *dentry,
 	return error;
 }
 
-static int mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int res;
 
@@ -113,7 +113,7 @@ static int mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	return res;
 }
 
-static int create(struct inode *dir, struct dentry *dentry, int mode)
+static int create(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	mode = (mode & S_IALLUGO) | S_IFREG;
 	return mknod(dir, dentry, mode, 0);
@@ -145,7 +145,7 @@ static struct file_system_type fs_type = {
 	.kill_sb =	kill_litter_super,
 };
 
-static int create_by_name(const char *name, mode_t mode,
+static int create_by_name(const char *name, umode_t mode,
 			  struct dentry *parent,
 			  struct dentry **dentry)
 {
@@ -205,7 +205,7 @@ static int create_by_name(const char *name, mode_t mode,
  * If securityfs is not enabled in the kernel, the value %-ENODEV is
  * returned.
  */
-struct dentry *securityfs_create_file(const char *name, mode_t mode,
+struct dentry *securityfs_create_file(const char *name, umode_t mode,
 				   struct dentry *parent, void *data,
 				   const struct file_operations *fops)
 {
diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c
index 2672ac4f3beb..482b2a5f48f0 100644
--- a/security/tomoyo/securityfs_if.c
+++ b/security/tomoyo/securityfs_if.c
@@ -224,7 +224,7 @@ static const struct file_operations tomoyo_operations = {
  *
  * Returns nothing.
  */
-static void __init tomoyo_create_entry(const char *name, const mode_t mode,
+static void __init tomoyo_create_entry(const char *name, const umode_t mode,
 				       struct dentry *parent, const u8 key)
 {
 	securityfs_create_file(name, mode, parent, ((u8 *) NULL) + key,
-- 
cgit v1.2.3


From a85cfdaec935ede36be6c54f98878624b0d9fbad Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 04:47:38 -0400
Subject: switch miscdevice to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/miscdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index c41d7270c6c6..32085249e9cb 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -54,7 +54,7 @@ struct miscdevice  {
 	struct device *parent;
 	struct device *this_device;
 	const char *nodename;
-	mode_t mode;
+	umode_t mode;
 };
 
 extern int misc_register(struct miscdevice * misc);
-- 
cgit v1.2.3


From df0a42837b86567a130c44515ab620d23e7f182b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 05:26:10 -0400
Subject: switch mq_open() to umode_t

---
 include/linux/audit.h    | 4 ++--
 include/linux/syscalls.h | 2 +-
 ipc/mqueue.c             | 2 +-
 kernel/auditsc.c         | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 2f81c6f3b630..75ed193b11f8 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -474,7 +474,7 @@ extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern void __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
-extern void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr);
+extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr);
 extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
 extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
 extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
@@ -499,7 +499,7 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
-static inline void audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr)
+static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
 {
 	if (unlikely(!audit_dummy_context()))
 		__audit_mq_open(oflag, mode, attr);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e1a4b9b81cf2..d86e5253f84f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -679,7 +679,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second,
 		unsigned long third, void __user *ptr, long fifth);
 
-asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr);
+asmlinkage long sys_mq_open(const char __user *name, int oflag, umode_t mode, struct mq_attr __user *attr);
 asmlinkage long sys_mq_unlink(const char __user *name);
 asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout);
 asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 5eaecf4fc04e..9a142a290749 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -679,7 +679,7 @@ err:
 	return ERR_PTR(ret);
 }
 
-SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
+SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
 		struct mq_attr __user *, u_attr)
 {
 	struct dentry *dentry;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 47b7fc1ea893..9849213e501c 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -234,7 +234,7 @@ struct audit_context {
 		} mq_sendrecv;
 		struct {
 			int			oflag;
-			mode_t			mode;
+			umode_t			mode;
 			struct mq_attr		attr;
 		} mq_open;
 		struct {
@@ -1278,7 +1278,7 @@ static void show_special(struct audit_context *context, int *call_panic)
 		break; }
 	case AUDIT_MQ_OPEN: {
 		audit_log_format(ab,
-			"oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld "
+			"oflag=0x%x mode=%#ho mq_flags=0x%lx mq_maxmsg=%ld "
 			"mq_msgsize=%ld mq_curmsgs=%ld",
 			context->mq_open.oflag, context->mq_open.mode,
 			context->mq_open.attr.mq_flags,
@@ -2160,7 +2160,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
  * @attr: queue attributes
  *
  */
-void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr)
+void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
 {
 	struct audit_context *context = current->audit_context;
 
-- 
cgit v1.2.3


From 1bc94226d5c642b78cf6b2e3e843ef24eb740df0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 16:50:23 -0400
Subject: switch spu_create(2) to use of SYSCALL_DEFINE4, make it use umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spu_syscalls.c | 4 ++--
 include/linux/syscalls.h                   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index 75530d99eda6..714bbfc3162c 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -65,8 +65,8 @@ static inline void spufs_calls_put(struct spufs_calls *calls) { }
 
 #endif /* CONFIG_SPU_FS_MODULE */
 
-asmlinkage long sys_spu_create(const char __user *name,
-		unsigned int flags, mode_t mode, int neighbor_fd)
+SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
+	umode_t, mode, int, neighbor_fd)
 {
 	long ret;
 	struct file *neighbor;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d86e5253f84f..b25621476316 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -753,7 +753,7 @@ asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd);
 asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
 				 __u32 __user *ustatus);
 asmlinkage long sys_spu_create(const char __user *name,
-		unsigned int flags, mode_t mode, int fd);
+		unsigned int flags, umode_t mode, int fd);
 
 asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode,
 			    unsigned dev);
-- 
cgit v1.2.3


From 0583fcc96bb117763c0fa74c123573c0112dec65 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jul 2011 17:04:15 -0400
Subject: consolidate umode_t declarations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/types.h       |  5 -----
 arch/arm/include/asm/types.h         |  6 ------
 arch/avr32/include/asm/types.h       |  6 ------
 arch/cris/include/asm/types.h        |  6 ------
 arch/frv/include/asm/types.h         |  6 ------
 arch/h8300/include/asm/types.h       | 17 -----------------
 arch/ia64/include/asm/types.h        |  2 --
 arch/m32r/include/asm/types.h        |  6 ------
 arch/m68k/include/asm/types.h        |  6 ------
 arch/mips/include/asm/types.h        |  6 ------
 arch/mn10300/include/asm/types.h     |  6 ------
 arch/parisc/include/asm/types.h      |  6 ------
 arch/powerpc/include/asm/types.h     |  6 ------
 arch/s390/include/asm/types.h        |  2 --
 arch/sparc/include/asm/posix_types.h |  2 --
 arch/sparc/include/asm/types.h       |  6 ------
 arch/xtensa/include/asm/types.h      |  2 --
 include/asm-generic/types.h          |  6 ------
 include/linux/types.h                |  1 +
 19 files changed, 1 insertion(+), 102 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index 881544339c21..0a0579076f4a 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -15,9 +15,4 @@
 #include <asm-generic/int-l64.h>
 #endif
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned int umode_t;
-
-#endif /* __ASSEMBLY__ */
 #endif /* _ALPHA_TYPES_H */
diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
index 48192ac3a23a..28beab917ffc 100644
--- a/arch/arm/include/asm/types.h
+++ b/arch/arm/include/asm/types.h
@@ -3,12 +3,6 @@
 
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/arch/avr32/include/asm/types.h b/arch/avr32/include/asm/types.h
index 72667a3b1af7..9bb2d8b2e6ca 100644
--- a/arch/avr32/include/asm/types.h
+++ b/arch/avr32/include/asm/types.h
@@ -10,12 +10,6 @@
 
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/arch/cris/include/asm/types.h b/arch/cris/include/asm/types.h
index 551a12c0aa01..adaf82780bb4 100644
--- a/arch/cris/include/asm/types.h
+++ b/arch/cris/include/asm/types.h
@@ -3,12 +3,6 @@
 
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/arch/frv/include/asm/types.h b/arch/frv/include/asm/types.h
index aa3e7fdc7f29..390a612f3a58 100644
--- a/arch/frv/include/asm/types.h
+++ b/arch/frv/include/asm/types.h
@@ -14,12 +14,6 @@
 
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/arch/h8300/include/asm/types.h b/arch/h8300/include/asm/types.h
index bb2c91a3522e..07257d9487d8 100644
--- a/arch/h8300/include/asm/types.h
+++ b/arch/h8300/include/asm/types.h
@@ -3,27 +3,10 @@
 
 #include <asm-generic/int-ll64.h>
 
-#if !defined(__ASSEMBLY__)
-
-/*
- * This file is never included by application software unless
- * explicitly requested (e.g., via linux/types.h) in which case the
- * application is Linux specific so (user-) name space pollution is
- * not a major issue.  However, for interoperability, libraries still
- * need to be careful to avoid a name clashes.
- */
-
-typedef unsigned short umode_t;
-
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
 #ifdef __KERNEL__
 
 #define BITS_PER_LONG 32
 
 #endif /* __KERNEL__ */
 
-#endif /* __ASSEMBLY__ */
-
 #endif /* _H8300_TYPES_H */
diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h
index 82b3939d2718..3f5b122d9975 100644
--- a/arch/ia64/include/asm/types.h
+++ b/arch/ia64/include/asm/types.h
@@ -28,8 +28,6 @@
 # define __IA64_UL(x)		((unsigned long)(x))
 # define __IA64_UL_CONST(x)	x##UL
 
-typedef unsigned int umode_t;
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/arch/m32r/include/asm/types.h b/arch/m32r/include/asm/types.h
index bd0035597b3b..bb2eeadecf99 100644
--- a/arch/m32r/include/asm/types.h
+++ b/arch/m32r/include/asm/types.h
@@ -3,12 +3,6 @@
 
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/arch/m68k/include/asm/types.h b/arch/m68k/include/asm/types.h
index b17fd115a4e7..89705adcbd52 100644
--- a/arch/m68k/include/asm/types.h
+++ b/arch/m68k/include/asm/types.h
@@ -10,12 +10,6 @@
  */
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/arch/mips/include/asm/types.h b/arch/mips/include/asm/types.h
index 533812b61881..43bf70ebd3a2 100644
--- a/arch/mips/include/asm/types.h
+++ b/arch/mips/include/asm/types.h
@@ -21,12 +21,6 @@
 # include <asm-generic/int-ll64.h>
 #endif
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/arch/mn10300/include/asm/types.h b/arch/mn10300/include/asm/types.h
index c1833eb192e3..713d4ba108a5 100644
--- a/arch/mn10300/include/asm/types.h
+++ b/arch/mn10300/include/asm/types.h
@@ -13,12 +13,6 @@
 
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/arch/parisc/include/asm/types.h b/arch/parisc/include/asm/types.h
index 80e415c9936d..8866f9bbdeaf 100644
--- a/arch/parisc/include/asm/types.h
+++ b/arch/parisc/include/asm/types.h
@@ -3,10 +3,4 @@
 
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 #endif
diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h
index 8947b9827bc4..b15a52e84b37 100644
--- a/arch/powerpc/include/asm/types.h
+++ b/arch/powerpc/include/asm/types.h
@@ -27,12 +27,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifdef __powerpc64__
-typedef unsigned int umode_t;
-#else
-typedef unsigned short umode_t;
-#endif
-
 typedef struct {
 	__u32 u[4];
 } __attribute__((aligned(16))) __vector128;
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
index eeb52ccf499f..05ebbcdbbf6b 100644
--- a/arch/s390/include/asm/types.h
+++ b/arch/s390/include/asm/types.h
@@ -13,8 +13,6 @@
 
 #ifndef __ASSEMBLY__
 
-typedef unsigned short umode_t;
-
 /* A address type so that arithmetic can be done on it & it can be upgraded to
    64 bit when necessary 
 */
diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h
index 98d6ebb922fb..dbfc1a34b3a2 100644
--- a/arch/sparc/include/asm/posix_types.h
+++ b/arch/sparc/include/asm/posix_types.h
@@ -20,7 +20,6 @@ typedef unsigned int           __kernel_uid_t;
 typedef unsigned int           __kernel_gid_t;
 typedef unsigned long          __kernel_ino_t;
 typedef unsigned int           __kernel_mode_t;
-typedef unsigned short         __kernel_umode_t;
 typedef unsigned int           __kernel_nlink_t;
 typedef int                    __kernel_daddr_t;
 typedef long                   __kernel_off_t;
@@ -55,7 +54,6 @@ typedef unsigned short         __kernel_uid_t;
 typedef unsigned short         __kernel_gid_t;
 typedef unsigned long          __kernel_ino_t;
 typedef unsigned short         __kernel_mode_t;
-typedef unsigned short         __kernel_umode_t;
 typedef short                  __kernel_nlink_t;
 typedef long                   __kernel_daddr_t;
 typedef long                   __kernel_off_t;
diff --git a/arch/sparc/include/asm/types.h b/arch/sparc/include/asm/types.h
index 91e5a034f987..383d156cde9c 100644
--- a/arch/sparc/include/asm/types.h
+++ b/arch/sparc/include/asm/types.h
@@ -12,12 +12,6 @@
 
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 #endif /* defined(__sparc__) */
 
 #endif /* defined(_SPARC_TYPES_H) */
diff --git a/arch/xtensa/include/asm/types.h b/arch/xtensa/include/asm/types.h
index b1c981e39b52..6d4db7e8ffac 100644
--- a/arch/xtensa/include/asm/types.h
+++ b/arch/xtensa/include/asm/types.h
@@ -23,8 +23,6 @@
 
 #ifndef __ASSEMBLY__
 
-typedef unsigned short umode_t;
-
 /*
  * These aren't exported outside the kernel to avoid name space clashes
  */
diff --git a/include/asm-generic/types.h b/include/asm-generic/types.h
index 7a0f69e6c618..bd39806013b5 100644
--- a/include/asm-generic/types.h
+++ b/include/asm-generic/types.h
@@ -6,10 +6,4 @@
  */
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
-
-#endif /* __ASSEMBLY__ */
-
 #endif /* _ASM_GENERIC_TYPES_H */
diff --git a/include/linux/types.h b/include/linux/types.h
index 57a97234bec1..f0ac9bda0f78 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -24,6 +24,7 @@ typedef __kernel_fd_set		fd_set;
 typedef __kernel_dev_t		dev_t;
 typedef __kernel_ino_t		ino_t;
 typedef __kernel_mode_t		mode_t;
+typedef unsigned short		umode_t;
 typedef __kernel_nlink_t	nlink_t;
 typedef __kernel_off_t		off_t;
 typedef __kernel_pid_t		pid_t;
-- 
cgit v1.2.3


From 2570ebbd1f1ce1ef31f568b0660354fc59424be2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Jul 2011 14:03:22 -0400
Subject: switch kern_ipc_perm to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  4 ++--
 include/linux/ipc.h   |  2 +-
 kernel/auditsc.c      | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 75ed193b11f8..426ab9f4dd85 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -468,7 +468,7 @@ extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 #define audit_get_sessionid(t) ((t)->sessionid)
 extern void audit_log_task_context(struct audit_buffer *ab);
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
-extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
+extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
 extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
@@ -494,7 +494,7 @@ static inline void audit_fd_pair(int fd1, int fd2)
 	if (unlikely(!audit_dummy_context()))
 		__audit_fd_pair(fd1, fd2);
 }
-static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
+static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode)
 {
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_set_perm(qbytes, uid, gid, mode);
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 3b1594d662b0..30e816148df4 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -93,7 +93,7 @@ struct kern_ipc_perm
 	gid_t		gid;
 	uid_t		cuid;
 	gid_t		cgid;
-	mode_t		mode; 
+	umode_t		mode; 
 	unsigned long	seq;
 	void		*security;
 };
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9849213e501c..7a074d65fff4 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -210,12 +210,12 @@ struct audit_context {
 		struct {
 			uid_t			uid;
 			gid_t			gid;
-			mode_t			mode;
+			umode_t			mode;
 			u32			osid;
 			int			has_perm;
 			uid_t			perm_uid;
 			gid_t			perm_gid;
-			mode_t			perm_mode;
+			umode_t			perm_mode;
 			unsigned long		qbytes;
 		} ipc;
 		struct {
@@ -1249,7 +1249,7 @@ static void show_special(struct audit_context *context, int *call_panic)
 	case AUDIT_IPC: {
 		u32 osid = context->ipc.osid;
 
-		audit_log_format(ab, "ouid=%u ogid=%u mode=%#o",
+		audit_log_format(ab, "ouid=%u ogid=%u mode=%#ho",
 			 context->ipc.uid, context->ipc.gid, context->ipc.mode);
 		if (osid) {
 			char *ctx = NULL;
@@ -1267,7 +1267,7 @@ static void show_special(struct audit_context *context, int *call_panic)
 			ab = audit_log_start(context, GFP_KERNEL,
 					     AUDIT_IPC_SET_PERM);
 			audit_log_format(ab,
-				"qbytes=%lx ouid=%u ogid=%u mode=%#o",
+				"qbytes=%lx ouid=%u ogid=%u mode=%#ho",
 				context->ipc.qbytes,
 				context->ipc.perm_uid,
 				context->ipc.perm_gid,
@@ -2260,7 +2260,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
  *
  * Called only after audit_ipc_obj().
  */
-void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
+void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode)
 {
 	struct audit_context *context = current->audit_context;
 
-- 
cgit v1.2.3


From 4572befe248fd0d94aedc98775e3f0ddc8a26651 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 21 Nov 2011 14:56:21 -0500
Subject: switch ->path_mkdir() to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h | 6 +++---
 security/apparmor/lsm.c  | 2 +-
 security/capability.c    | 2 +-
 security/security.c      | 2 +-
 security/tomoyo/tomoyo.c | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index fab659edf11a..24cd7cf48564 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1424,7 +1424,7 @@ struct security_operations {
 
 #ifdef CONFIG_SECURITY_PATH
 	int (*path_unlink) (struct path *dir, struct dentry *dentry);
-	int (*path_mkdir) (struct path *dir, struct dentry *dentry, int mode);
+	int (*path_mkdir) (struct path *dir, struct dentry *dentry, umode_t mode);
 	int (*path_rmdir) (struct path *dir, struct dentry *dentry);
 	int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode,
 			   unsigned int dev);
@@ -2855,7 +2855,7 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi
 
 #ifdef CONFIG_SECURITY_PATH
 int security_path_unlink(struct path *dir, struct dentry *dentry);
-int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode);
+int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode);
 int security_path_rmdir(struct path *dir, struct dentry *dentry);
 int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
 			unsigned int dev);
@@ -2877,7 +2877,7 @@ static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
 }
 
 static inline int security_path_mkdir(struct path *dir, struct dentry *dentry,
-				      int mode)
+				      umode_t mode)
 {
 	return 0;
 }
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index afbe49822bed..3271bd38d860 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -262,7 +262,7 @@ static int apparmor_path_unlink(struct path *dir, struct dentry *dentry)
 }
 
 static int apparmor_path_mkdir(struct path *dir, struct dentry *dentry,
-			       int mode)
+			       umode_t mode)
 {
 	return common_perm_create(OP_MKDIR, dir, dentry, AA_MAY_CREATE,
 				  S_IFDIR);
diff --git a/security/capability.c b/security/capability.c
index 4f24bee49f26..2e1fe45d1486 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -241,7 +241,7 @@ static int cap_path_mknod(struct path *dir, struct dentry *dentry, int mode,
 	return 0;
 }
 
-static int cap_path_mkdir(struct path *dir, struct dentry *dentry, int mode)
+static int cap_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 3635a13cd4ab..e9724e058b43 100644
--- a/security/security.c
+++ b/security/security.c
@@ -397,7 +397,7 @@ int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
 }
 EXPORT_SYMBOL(security_path_mknod);
 
-int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode)
+int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode)
 {
 	if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
 		return 0;
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index a4b840ea0078..95e4a7db8b86 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -186,7 +186,7 @@ static int tomoyo_path_unlink(struct path *parent, struct dentry *dentry)
  * Returns 0 on success, negative value otherwise.
  */
 static int tomoyo_path_mkdir(struct path *parent, struct dentry *dentry,
-			     int mode)
+			     umode_t mode)
 {
 	struct path path = { parent->mnt, dentry };
 	return tomoyo_path_number_perm(TOMOYO_TYPE_MKDIR, &path,
-- 
cgit v1.2.3


From 04fc66e789a896e684bfdca30208e57eb832dd96 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 21 Nov 2011 14:58:38 -0500
Subject: switch ->path_mknod() to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h | 6 +++---
 net/unix/af_unix.c       | 2 +-
 security/apparmor/lsm.c  | 2 +-
 security/capability.c    | 2 +-
 security/security.c      | 2 +-
 security/tomoyo/tomoyo.c | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 24cd7cf48564..535721cc374a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1426,7 +1426,7 @@ struct security_operations {
 	int (*path_unlink) (struct path *dir, struct dentry *dentry);
 	int (*path_mkdir) (struct path *dir, struct dentry *dentry, umode_t mode);
 	int (*path_rmdir) (struct path *dir, struct dentry *dentry);
-	int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode,
+	int (*path_mknod) (struct path *dir, struct dentry *dentry, umode_t mode,
 			   unsigned int dev);
 	int (*path_truncate) (struct path *path);
 	int (*path_symlink) (struct path *dir, struct dentry *dentry,
@@ -2857,7 +2857,7 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi
 int security_path_unlink(struct path *dir, struct dentry *dentry);
 int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode);
 int security_path_rmdir(struct path *dir, struct dentry *dentry);
-int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode,
 			unsigned int dev);
 int security_path_truncate(struct path *path);
 int security_path_symlink(struct path *dir, struct dentry *dentry,
@@ -2888,7 +2888,7 @@ static inline int security_path_rmdir(struct path *dir, struct dentry *dentry)
 }
 
 static inline int security_path_mknod(struct path *dir, struct dentry *dentry,
-				      int mode, unsigned int dev)
+				      umode_t mode, unsigned int dev)
 {
 	return 0;
 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b595a3d8679f..412a99f4a3f7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -847,7 +847,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	atomic_set(&addr->refcnt, 1);
 
 	if (sun_path[0]) {
-		unsigned int mode;
+		umode_t mode;
 		err = 0;
 		/*
 		 * Get the parent directory, calculate the hash for last
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 3271bd38d860..c0a399ec1df9 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -274,7 +274,7 @@ static int apparmor_path_rmdir(struct path *dir, struct dentry *dentry)
 }
 
 static int apparmor_path_mknod(struct path *dir, struct dentry *dentry,
-			       int mode, unsigned int dev)
+			       umode_t mode, unsigned int dev)
 {
 	return common_perm_create(OP_MKNOD, dir, dentry, AA_MAY_CREATE, mode);
 }
diff --git a/security/capability.c b/security/capability.c
index 2e1fe45d1486..156816d451ba 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -235,7 +235,7 @@ static void cap_inode_getsecid(const struct inode *inode, u32 *secid)
 }
 
 #ifdef CONFIG_SECURITY_PATH
-static int cap_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+static int cap_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode,
 			  unsigned int dev)
 {
 	return 0;
diff --git a/security/security.c b/security/security.c
index e9724e058b43..151152de1a0a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -388,7 +388,7 @@ int security_old_inode_init_security(struct inode *inode, struct inode *dir,
 EXPORT_SYMBOL(security_old_inode_init_security);
 
 #ifdef CONFIG_SECURITY_PATH
-int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode,
 			unsigned int dev)
 {
 	if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 95e4a7db8b86..75c956a51e75 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -234,7 +234,7 @@ static int tomoyo_path_symlink(struct path *parent, struct dentry *dentry,
  * Returns 0 on success, negative value otherwise.
  */
 static int tomoyo_path_mknod(struct path *parent, struct dentry *dentry,
-			     int mode, unsigned int dev)
+			     umode_t mode, unsigned int dev)
 {
 	struct path path = { parent->mnt, dentry };
 	int type = TOMOYO_TYPE_CREATE;
-- 
cgit v1.2.3


From a218d0fdc5f9004164ff151d274487f6799907d0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 21 Nov 2011 14:59:34 -0500
Subject: switch open and mkdir syscalls to umode_t

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat.c              |  4 ++--
 fs/internal.h            |  2 +-
 fs/namei.c               |  6 +++---
 fs/open.c                | 12 ++++++------
 include/linux/compat.h   |  4 ++--
 include/linux/fs.h       |  4 ++--
 include/linux/syscalls.h | 10 +++++-----
 7 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat.c b/fs/compat.c
index 9db5a6076610..fa9d721ecfee 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1281,7 +1281,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
  * O_LARGEFILE flag.
  */
 asmlinkage long
-compat_sys_open(const char __user *filename, int flags, int mode)
+compat_sys_open(const char __user *filename, int flags, umode_t mode)
 {
 	return do_sys_open(AT_FDCWD, filename, flags, mode);
 }
@@ -1291,7 +1291,7 @@ compat_sys_open(const char __user *filename, int flags, int mode)
  * O_LARGEFILE flag.
  */
 asmlinkage long
-compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int mode)
+compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, umode_t mode)
 {
 	return do_sys_open(dfd, filename, flags, mode);
 }
diff --git a/fs/internal.h b/fs/internal.h
index 7b1cb1528ac2..23599f88d1a5 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -88,7 +88,7 @@ extern struct file *nameidata_to_filp(struct nameidata *);
 extern void release_open_intent(struct nameidata *);
 struct open_flags {
 	int open_flag;
-	int mode;
+	umode_t mode;
 	int acc_mode;
 	int intent;
 };
diff --git a/fs/namei.c b/fs/namei.c
index e275dc36d7c5..afd5876cd072 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2177,7 +2177,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode) {
-		int mode = op->mode;
+		umode_t mode = op->mode;
 		if (!IS_POSIXACL(dir->d_inode))
 			mode &= ~current_umask();
 		/*
@@ -2562,7 +2562,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	return error;
 }
 
-SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
+SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
 {
 	struct dentry *dentry;
 	struct path path;
@@ -2590,7 +2590,7 @@ out_dput:
 	return error;
 }
 
-SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode)
+SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
 {
 	return sys_mkdirat(AT_FDCWD, pathname, mode);
 }
diff --git a/fs/open.c b/fs/open.c
index 834e3e1adeb9..2659f596f4c5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -877,7 +877,7 @@ void fd_install(unsigned int fd, struct file *file)
 
 EXPORT_SYMBOL(fd_install);
 
-static inline int build_open_flags(int flags, int mode, struct open_flags *op)
+static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
 {
 	int lookup_flags = 0;
 	int acc_mode;
@@ -948,7 +948,7 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op)
  * have to.  But in generally you should not do this, so please move
  * along, nothing to see here..
  */
-struct file *filp_open(const char *filename, int flags, int mode)
+struct file *filp_open(const char *filename, int flags, umode_t mode)
 {
 	struct open_flags op;
 	int lookup = build_open_flags(flags, mode, &op);
@@ -970,7 +970,7 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 }
 EXPORT_SYMBOL(file_open_root);
 
-long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
+long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
 {
 	struct open_flags op;
 	int lookup = build_open_flags(flags, mode, &op);
@@ -994,7 +994,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
 	return fd;
 }
 
-SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
+SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
 {
 	long ret;
 
@@ -1008,7 +1008,7 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
 }
 
 SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
-		int, mode)
+		umode_t, mode)
 {
 	long ret;
 
@@ -1027,7 +1027,7 @@ SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
  * For backward compatibility?  Maybe this should be moved
  * into arch/i386 instead?
  */
-SYSCALL_DEFINE2(creat, const char __user *, pathname, int, mode)
+SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
 {
 	return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
 }
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 66ed067fb729..41c9f6515f46 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -422,9 +422,9 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
 asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
 				    unsigned int nr_segs, unsigned int flags);
 asmlinkage long compat_sys_open(const char __user *filename, int flags,
-				int mode);
+				umode_t mode);
 asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
-				  int flags, int mode);
+				  int flags, umode_t mode);
 asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
 					     struct file_handle __user *handle,
 					     int flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9d02fab420c6..f0e57b7e4297 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2054,8 +2054,8 @@ extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 extern int do_fallocate(struct file *file, int mode, loff_t offset,
 			loff_t len);
 extern long do_sys_open(int dfd, const char __user *filename, int flags,
-			int mode);
-extern struct file *filp_open(const char *, int, int);
+			umode_t mode);
+extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int);
 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b25621476316..515669fa3c1d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -517,9 +517,9 @@ asmlinkage long sys_sendfile64(int out_fd, int in_fd,
 			       loff_t __user *offset, size_t count);
 asmlinkage long sys_readlink(const char __user *path,
 				char __user *buf, int bufsiz);
-asmlinkage long sys_creat(const char __user *pathname, int mode);
+asmlinkage long sys_creat(const char __user *pathname, umode_t mode);
 asmlinkage long sys_open(const char __user *filename,
-				int flags, int mode);
+				int flags, umode_t mode);
 asmlinkage long sys_close(unsigned int fd);
 asmlinkage long sys_access(const char __user *filename, int mode);
 asmlinkage long sys_vhangup(void);
@@ -582,7 +582,7 @@ asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
 asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
 			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
-asmlinkage long sys_mkdir(const char __user *pathname, int mode);
+asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode);
 asmlinkage long sys_chdir(const char __user *filename);
 asmlinkage long sys_fchdir(unsigned int fd);
 asmlinkage long sys_rmdir(const char __user *pathname);
@@ -757,7 +757,7 @@ asmlinkage long sys_spu_create(const char __user *name,
 
 asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode,
 			    unsigned dev);
-asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, int mode);
+asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, umode_t mode);
 asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag);
 asmlinkage long sys_symlinkat(const char __user * oldname,
 			      int newdfd, const char __user * newname);
@@ -773,7 +773,7 @@ asmlinkage long sys_fchmodat(int dfd, const char __user * filename,
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag);
 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
-			   int mode);
+			   umode_t mode);
 asmlinkage long sys_newfstatat(int dfd, const char __user *filename,
 			       struct stat __user *statbuf, int flag);
 asmlinkage long sys_fstatat64(int dfd, const char __user *filename,
-- 
cgit v1.2.3


From 1b8e5564b9d34cbeb3047dd2be8ec9cd5e2785e2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Nov 2011 21:01:32 -0500
Subject: vfs: the first spoils - mnt_hash moved

taken out of struct vfsmount into struct mount

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  1 +
 fs/namespace.c        | 24 ++++++++++++------------
 fs/pnode.c            | 10 +++++-----
 include/linux/mount.h |  1 -
 4 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 44e5b6f54b7e..831e7c86835b 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -1,6 +1,7 @@
 #include <linux/mount.h>
 
 struct mount {
+	struct list_head mnt_hash;
 	struct vfsmount mnt;
 };
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 76412348d5be..121e0032c9de 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -199,7 +199,7 @@ static struct mount *alloc_vfsmnt(const char *name)
 		mnt->mnt_writers = 0;
 #endif
 
-		INIT_LIST_HEAD(&p->mnt.mnt_hash);
+		INIT_LIST_HEAD(&p->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
 		INIT_LIST_HEAD(&mnt->mnt_mounts);
 		INIT_LIST_HEAD(&mnt->mnt_list);
@@ -475,7 +475,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
 		p = NULL;
 		if (tmp == head)
 			break;
-		p = list_entry(tmp, struct mount, mnt.mnt_hash);
+		p = list_entry(tmp, struct mount, mnt_hash);
 		if (p->mnt.mnt_parent == mnt && p->mnt.mnt_mountpoint == dentry) {
 			found = p;
 			break;
@@ -542,7 +542,7 @@ static void dentry_reset_mounted(struct dentry *dentry)
 	for (u = 0; u < HASH_SIZE; u++) {
 		struct mount *p;
 
-		list_for_each_entry(p, &mount_hashtable[u], mnt.mnt_hash) {
+		list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
 			if (p->mnt.mnt_mountpoint == dentry)
 				return;
 		}
@@ -562,7 +562,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
 	mnt->mnt.mnt_parent = &mnt->mnt;
 	mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;
 	list_del_init(&mnt->mnt.mnt_child);
-	list_del_init(&mnt->mnt.mnt_hash);
+	list_del_init(&mnt->mnt_hash);
 	dentry_reset_mounted(old_path->dentry);
 }
 
@@ -585,7 +585,7 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
 static void attach_mnt(struct mount *mnt, struct path *path)
 {
 	mnt_set_mountpoint(path->mnt, path->dentry, &mnt->mnt);
-	list_add_tail(&mnt->mnt.mnt_hash, mount_hashtable +
+	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 			hash(path->mnt, path->dentry));
 	list_add_tail(&mnt->mnt.mnt_child, &path->mnt->mnt_mounts);
 }
@@ -625,7 +625,7 @@ static void commit_tree(struct mount *mnt)
 
 	list_splice(&head, n->list.prev);
 
-	list_add_tail(&mnt->mnt.mnt_hash, mount_hashtable +
+	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 				hash(parent, mnt->mnt.mnt_mountpoint));
 	list_add_tail(&mnt->mnt.mnt_child, &parent->mnt_mounts);
 	touch_mnt_namespace(n);
@@ -1193,8 +1193,8 @@ void release_mounts(struct list_head *head)
 {
 	struct mount *mnt;
 	while (!list_empty(head)) {
-		mnt = list_first_entry(head, struct mount, mnt.mnt_hash);
-		list_del_init(&mnt->mnt.mnt_hash);
+		mnt = list_first_entry(head, struct mount, mnt_hash);
+		list_del_init(&mnt->mnt_hash);
 		if (mnt_has_parent(&mnt->mnt)) {
 			struct dentry *dentry;
 			struct vfsmount *m;
@@ -1223,12 +1223,12 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 	struct mount *p;
 
 	for (p = real_mount(mnt); p; p = next_mnt(p, mnt))
-		list_move(&p->mnt.mnt_hash, &tmp_list);
+		list_move(&p->mnt_hash, &tmp_list);
 
 	if (propagate)
 		propagate_umount(&tmp_list);
 
-	list_for_each_entry(p, &tmp_list, mnt.mnt_hash) {
+	list_for_each_entry(p, &tmp_list, mnt_hash) {
 		list_del_init(&p->mnt.mnt_expire);
 		list_del_init(&p->mnt.mnt_list);
 		__touch_mnt_namespace(p->mnt.mnt_ns);
@@ -1620,8 +1620,8 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 		commit_tree(source_mnt);
 	}
 
-	list_for_each_entry_safe(child, p, &tree_list, mnt.mnt_hash) {
-		list_del_init(&child->mnt.mnt_hash);
+	list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
+		list_del_init(&child->mnt_hash);
 		commit_tree(child);
 	}
 	br_write_unlock(vfsmount_lock);
diff --git a/fs/pnode.c b/fs/pnode.c
index 916c8e87cf4e..a2f0f3e0e127 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -247,13 +247,13 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
 
 		if (is_subdir(dest_dentry, m->mnt_root)) {
 			mnt_set_mountpoint(m, dest_dentry, &child->mnt);
-			list_add_tail(&child->mnt.mnt_hash, tree_list);
+			list_add_tail(&child->mnt_hash, tree_list);
 		} else {
 			/*
 			 * This can happen if the parent mount was bind mounted
 			 * on some subdirectory of a shared/slave mount.
 			 */
-			list_add_tail(&child->mnt.mnt_hash, &tmp_list);
+			list_add_tail(&child->mnt_hash, &tmp_list);
 		}
 		prev_dest_mnt = m;
 		prev_src_mnt  = &child->mnt;
@@ -261,7 +261,7 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
 out:
 	br_write_lock(vfsmount_lock);
 	while (!list_empty(&tmp_list)) {
-		child = list_first_entry(&tmp_list, struct mount, mnt.mnt_hash);
+		child = list_first_entry(&tmp_list, struct mount, mnt_hash);
 		umount_tree(&child->mnt, 0, &umount_list);
 	}
 	br_write_unlock(vfsmount_lock);
@@ -337,7 +337,7 @@ static void __propagate_umount(struct mount *mnt)
 		 * other children
 		 */
 		if (child && list_empty(&child->mnt.mnt_mounts))
-			list_move_tail(&child->mnt.mnt_hash, &mnt->mnt.mnt_hash);
+			list_move_tail(&child->mnt_hash, &mnt->mnt_hash);
 	}
 }
 
@@ -352,7 +352,7 @@ int propagate_umount(struct list_head *list)
 {
 	struct mount *mnt;
 
-	list_for_each_entry(mnt, list, mnt.mnt_hash)
+	list_for_each_entry(mnt, list, mnt_hash)
 		__propagate_umount(mnt);
 	return 0;
 }
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 00f5c4f2160b..77c913dc7397 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -53,7 +53,6 @@ struct mnt_pcp {
 };
 
 struct vfsmount {
-	struct list_head mnt_hash;
 	struct vfsmount *mnt_parent;	/* fs we are mounted on */
 	struct dentry *mnt_mountpoint;	/* dentry of mountpoint */
 	struct dentry *mnt_root;	/* root of the mounted tree */
-- 
cgit v1.2.3


From 3376f34fff5be9954fd9a9c4fd68f4a0a36d480e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Nov 2011 22:05:19 -0500
Subject: vfs: mnt_parent moved to struct mount

the second victim...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c           |  2 +-
 fs/mount.h            |  3 ++-
 fs/namei.c            |  4 ++--
 fs/namespace.c        | 45 +++++++++++++++++++++++----------------------
 fs/pnode.c            |  4 ++--
 include/linux/mount.h |  1 -
 6 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 1834e715f814..eef2d5472f9c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2465,7 +2465,7 @@ static int prepend_path(const struct path *path,
 			if (!mnt_has_parent(mnt))
 				goto global_root;
 			dentry = vfsmnt->mnt_mountpoint;
-			vfsmnt = vfsmnt->mnt_parent;
+			vfsmnt = mnt->mnt_parent;
 			continue;
 		}
 		parent = dentry->d_parent;
diff --git a/fs/mount.h b/fs/mount.h
index 541daf568f63..5126c0861102 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -2,6 +2,7 @@
 
 struct mount {
 	struct list_head mnt_hash;
+	struct vfsmount *mnt_parent;
 	struct vfsmount mnt;
 };
 
@@ -12,7 +13,7 @@ static inline struct mount *real_mount(struct vfsmount *mnt)
 
 static inline int mnt_has_parent(struct mount *mnt)
 {
-	return &mnt->mnt != mnt->mnt.mnt_parent;
+	return &mnt->mnt != mnt->mnt_parent;
 }
 
 extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
diff --git a/fs/namei.c b/fs/namei.c
index d1c6a559f8f0..89248bf1b906 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -680,7 +680,7 @@ static int follow_up_rcu(struct path *path)
 	struct vfsmount *parent;
 	struct dentry *mountpoint;
 
-	parent = path->mnt->mnt_parent;
+	parent = real_mount(path->mnt)->mnt_parent;
 	if (parent == path->mnt)
 		return 0;
 	mountpoint = path->mnt->mnt_mountpoint;
@@ -695,7 +695,7 @@ int follow_up(struct path *path)
 	struct dentry *mountpoint;
 
 	br_read_lock(vfsmount_lock);
-	parent = path->mnt->mnt_parent;
+	parent = real_mount(path->mnt)->mnt_parent;
 	if (parent == path->mnt) {
 		br_read_unlock(vfsmount_lock);
 		return 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index b117d94fcdc1..c6384bc39db1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -476,7 +476,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
 		if (tmp == head)
 			break;
 		p = list_entry(tmp, struct mount, mnt_hash);
-		if (p->mnt.mnt_parent == mnt && p->mnt.mnt_mountpoint == dentry) {
+		if (p->mnt_parent == mnt && p->mnt.mnt_mountpoint == dentry) {
 			found = p;
 			break;
 		}
@@ -558,8 +558,8 @@ static void dentry_reset_mounted(struct dentry *dentry)
 static void detach_mnt(struct mount *mnt, struct path *old_path)
 {
 	old_path->dentry = mnt->mnt.mnt_mountpoint;
-	old_path->mnt = mnt->mnt.mnt_parent;
-	mnt->mnt.mnt_parent = &mnt->mnt;
+	old_path->mnt = mnt->mnt_parent;
+	mnt->mnt_parent = &mnt->mnt;
 	mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;
 	list_del_init(&mnt->mnt.mnt_child);
 	list_del_init(&mnt->mnt_hash);
@@ -572,7 +572,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
 void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
 			struct mount *child_mnt)
 {
-	child_mnt->mnt.mnt_parent = mntget(mnt);
+	child_mnt->mnt_parent = mntget(mnt);
 	child_mnt->mnt.mnt_mountpoint = dget(dentry);
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags |= DCACHE_MOUNTED;
@@ -610,7 +610,7 @@ static inline void __mnt_make_shortterm(struct vfsmount *mnt)
  */
 static void commit_tree(struct mount *mnt)
 {
-	struct vfsmount *parent = mnt->mnt.mnt_parent;
+	struct vfsmount *parent = mnt->mnt_parent;
 	struct vfsmount *m;
 	LIST_HEAD(head);
 	struct mnt_namespace *n = parent->mnt_ns;
@@ -639,9 +639,9 @@ static struct mount *next_mnt(struct mount *p, struct vfsmount *root)
 			if (&p->mnt == root)
 				return NULL;
 			next = p->mnt.mnt_child.next;
-			if (next != &p->mnt.mnt_parent->mnt_mounts)
+			if (next != &p->mnt_parent->mnt_mounts)
 				break;
-			p = real_mount(p->mnt.mnt_parent);
+			p = real_mount(p->mnt_parent);
 		}
 	}
 	return list_entry(next, struct mount, mnt.mnt_child);
@@ -682,7 +682,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	mnt->mnt.mnt_root = root;
 	mnt->mnt.mnt_sb = root->d_sb;
 	mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;
-	mnt->mnt.mnt_parent = &mnt->mnt;
+	mnt->mnt_parent = &mnt->mnt;
 	return &mnt->mnt;
 }
 EXPORT_SYMBOL_GPL(vfs_kern_mount);
@@ -710,7 +710,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 		mnt->mnt.mnt_sb = sb;
 		mnt->mnt.mnt_root = dget(root);
 		mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;
-		mnt->mnt.mnt_parent = &mnt->mnt;
+		mnt->mnt_parent = &mnt->mnt;
 
 		if (flag & CL_SLAVE) {
 			list_add(&mnt->mnt.mnt_slave, &old->mnt.mnt_slave_list);
@@ -1021,12 +1021,13 @@ static int show_mountinfo(struct seq_file *m, void *v)
 {
 	struct proc_mounts *p = m->private;
 	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
+	struct mount *r = real_mount(mnt);
 	struct super_block *sb = mnt->mnt_sb;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
 	struct path root = p->root;
 	int err = 0;
 
-	seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
+	seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, r->mnt_parent->mnt_id,
 		   MAJOR(sb->s_dev), MINOR(sb->s_dev));
 	if (sb->s_op->show_path)
 		err = sb->s_op->show_path(m, mnt);
@@ -1201,9 +1202,9 @@ void release_mounts(struct list_head *head)
 
 			br_write_lock(vfsmount_lock);
 			dentry = mnt->mnt.mnt_mountpoint;
-			m = mnt->mnt.mnt_parent;
+			m = mnt->mnt_parent;
 			mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;
-			mnt->mnt.mnt_parent = &mnt->mnt;
+			mnt->mnt_parent = &mnt->mnt;
 			m->mnt_ghosts--;
 			br_write_unlock(vfsmount_lock);
 			dput(dentry);
@@ -1236,7 +1237,7 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 		__mnt_make_shortterm(&p->mnt);
 		list_del_init(&p->mnt.mnt_child);
 		if (mnt_has_parent(p)) {
-			p->mnt.mnt_parent->mnt_ghosts++;
+			p->mnt_parent->mnt_ghosts++;
 			dentry_reset_mounted(p->mnt.mnt_mountpoint);
 		}
 		change_mnt_propagation(p, MS_PRIVATE);
@@ -1434,9 +1435,9 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 				s = skip_mnt_tree(s);
 				continue;
 			}
-			while (p != real_mount(s->mnt.mnt_parent)) {
-				p = real_mount(p->mnt.mnt_parent);
-				q = real_mount(q->mnt.mnt_parent);
+			while (p != real_mount(s->mnt_parent)) {
+				p = real_mount(p->mnt_parent);
+				q = real_mount(q->mnt_parent);
 			}
 			p = s;
 			path.mnt = &q->mnt;
@@ -1898,7 +1899,7 @@ static int do_move_mount(struct path *path, char *old_name)
 	/*
 	 * Don't move a mount residing in a shared parent.
 	 */
-	if (IS_MNT_SHARED(old_path.mnt->mnt_parent))
+	if (IS_MNT_SHARED(old->mnt_parent))
 		goto out1;
 	/*
 	 * Don't move a mount tree containing unbindable mounts to a destination
@@ -1908,7 +1909,7 @@ static int do_move_mount(struct path *path, char *old_name)
 	    tree_contains_unbindable(old))
 		goto out1;
 	err = -ELOOP;
-	for (p = real_mount(path->mnt); mnt_has_parent(p); p = real_mount(p->mnt.mnt_parent))
+	for (p = real_mount(path->mnt); mnt_has_parent(p); p = real_mount(p->mnt_parent))
 		if (p == old)
 			goto out1;
 
@@ -2158,7 +2159,7 @@ resume:
 	 */
 	if (this_parent != parent) {
 		next = this_parent->mnt.mnt_child.next;
-		this_parent = real_mount(this_parent->mnt.mnt_parent);
+		this_parent = real_mount(this_parent->mnt_parent);
 		goto resume;
 	}
 	return found;
@@ -2564,7 +2565,7 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
 {
 	while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
 		dentry = mnt->mnt.mnt_mountpoint;
-		mnt = real_mount(mnt->mnt.mnt_parent);
+		mnt = real_mount(mnt->mnt_parent);
 	}
 	return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
 }
@@ -2635,8 +2636,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	new_mnt = real_mount(new.mnt);
 	root_mnt = real_mount(root.mnt);
 	if (IS_MNT_SHARED(old.mnt) ||
-		IS_MNT_SHARED(new.mnt->mnt_parent) ||
-		IS_MNT_SHARED(root.mnt->mnt_parent))
+		IS_MNT_SHARED(new_mnt->mnt_parent) ||
+		IS_MNT_SHARED(root_mnt->mnt_parent))
 		goto out4;
 	if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
 		goto out4;
diff --git a/fs/pnode.c b/fs/pnode.c
index 25f74b53dea6..2ff4dfa018e1 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -292,7 +292,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
 {
 	struct vfsmount *m;
 	struct mount *child;
-	struct vfsmount *parent = mnt->mnt.mnt_parent;
+	struct vfsmount *parent = mnt->mnt_parent;
 	int ret = 0;
 
 	if (&mnt->mnt == parent)
@@ -322,7 +322,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
  */
 static void __propagate_umount(struct mount *mnt)
 {
-	struct vfsmount *parent = mnt->mnt.mnt_parent;
+	struct vfsmount *parent = mnt->mnt_parent;
 	struct vfsmount *m;
 
 	BUG_ON(parent == &mnt->mnt);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 77c913dc7397..b69362d2b5b2 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -53,7 +53,6 @@ struct mnt_pcp {
 };
 
 struct vfsmount {
-	struct vfsmount *mnt_parent;	/* fs we are mounted on */
 	struct dentry *mnt_mountpoint;	/* dentry of mountpoint */
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
-- 
cgit v1.2.3


From a73324da7af4052e1d1ddec6a5980f552420e58b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Nov 2011 22:25:07 -0500
Subject: vfs: move mnt_mountpoint to struct mount

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c           |  2 +-
 fs/mount.h            |  1 +
 fs/namei.c            |  4 ++--
 fs/namespace.c        | 35 +++++++++++++++++------------------
 fs/pnode.c            |  4 ++--
 include/linux/mount.h |  1 -
 6 files changed, 23 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 98b48753f77b..24790041ea76 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2464,7 +2464,7 @@ static int prepend_path(const struct path *path,
 			/* Global root? */
 			if (!mnt_has_parent(mnt))
 				goto global_root;
-			dentry = mnt->mnt.mnt_mountpoint;
+			dentry = mnt->mnt_mountpoint;
 			mnt = mnt->mnt_parent;
 			vfsmnt = &mnt->mnt;
 			continue;
diff --git a/fs/mount.h b/fs/mount.h
index 201dd616e6c4..853738f5897f 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -3,6 +3,7 @@
 struct mount {
 	struct list_head mnt_hash;
 	struct mount *mnt_parent;
+	struct dentry *mnt_mountpoint;
 	struct vfsmount mnt;
 };
 
diff --git a/fs/namei.c b/fs/namei.c
index 2e9110a37c0e..87363aab43f0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -684,7 +684,7 @@ static int follow_up_rcu(struct path *path)
 	parent = mnt->mnt_parent;
 	if (&parent->mnt == path->mnt)
 		return 0;
-	mountpoint = mnt->mnt.mnt_mountpoint;
+	mountpoint = mnt->mnt_mountpoint;
 	path->dentry = mountpoint;
 	path->mnt = &parent->mnt;
 	return 1;
@@ -703,7 +703,7 @@ int follow_up(struct path *path)
 		return 0;
 	}
 	mntget(&parent->mnt);
-	mountpoint = dget(mnt->mnt.mnt_mountpoint);
+	mountpoint = dget(mnt->mnt_mountpoint);
 	br_read_unlock(vfsmount_lock);
 	dput(path->dentry);
 	path->dentry = mountpoint;
diff --git a/fs/namespace.c b/fs/namespace.c
index 5e700c6df579..ec798e77b726 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -476,7 +476,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
 		if (tmp == head)
 			break;
 		p = list_entry(tmp, struct mount, mnt_hash);
-		if (&p->mnt_parent->mnt == mnt && p->mnt.mnt_mountpoint == dentry) {
+		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {
 			found = p;
 			break;
 		}
@@ -543,7 +543,7 @@ static void dentry_reset_mounted(struct dentry *dentry)
 		struct mount *p;
 
 		list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
-			if (p->mnt.mnt_mountpoint == dentry)
+			if (p->mnt_mountpoint == dentry)
 				return;
 		}
 	}
@@ -557,10 +557,10 @@ static void dentry_reset_mounted(struct dentry *dentry)
  */
 static void detach_mnt(struct mount *mnt, struct path *old_path)
 {
-	old_path->dentry = mnt->mnt.mnt_mountpoint;
+	old_path->dentry = mnt->mnt_mountpoint;
 	old_path->mnt = &mnt->mnt_parent->mnt;
 	mnt->mnt_parent = mnt;
-	mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;
+	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 	list_del_init(&mnt->mnt.mnt_child);
 	list_del_init(&mnt->mnt_hash);
 	dentry_reset_mounted(old_path->dentry);
@@ -573,7 +573,7 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
 			struct mount *child_mnt)
 {
 	child_mnt->mnt_parent = real_mount(mntget(mnt));
-	child_mnt->mnt.mnt_mountpoint = dget(dentry);
+	child_mnt->mnt_mountpoint = dget(dentry);
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags |= DCACHE_MOUNTED;
 	spin_unlock(&dentry->d_lock);
@@ -626,7 +626,7 @@ static void commit_tree(struct mount *mnt)
 	list_splice(&head, n->list.prev);
 
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
-				hash(&parent->mnt, mnt->mnt.mnt_mountpoint));
+				hash(&parent->mnt, mnt->mnt_mountpoint));
 	list_add_tail(&mnt->mnt.mnt_child, &parent->mnt.mnt_mounts);
 	touch_mnt_namespace(n);
 }
@@ -681,7 +681,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 
 	mnt->mnt.mnt_root = root;
 	mnt->mnt.mnt_sb = root->d_sb;
-	mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;
+	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 	mnt->mnt_parent = mnt;
 	return &mnt->mnt;
 }
@@ -709,7 +709,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 		atomic_inc(&sb->s_active);
 		mnt->mnt.mnt_sb = sb;
 		mnt->mnt.mnt_root = dget(root);
-		mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;
+		mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 		mnt->mnt_parent = mnt;
 
 		if (flag & CL_SLAVE) {
@@ -1201,9 +1201,9 @@ void release_mounts(struct list_head *head)
 			struct vfsmount *m;
 
 			br_write_lock(vfsmount_lock);
-			dentry = mnt->mnt.mnt_mountpoint;
+			dentry = mnt->mnt_mountpoint;
 			m = &mnt->mnt_parent->mnt;
-			mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;
+			mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 			mnt->mnt_parent = mnt;
 			m->mnt_ghosts--;
 			br_write_unlock(vfsmount_lock);
@@ -1238,7 +1238,7 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 		list_del_init(&p->mnt.mnt_child);
 		if (mnt_has_parent(p)) {
 			p->mnt_parent->mnt.mnt_ghosts++;
-			dentry_reset_mounted(p->mnt.mnt_mountpoint);
+			dentry_reset_mounted(p->mnt_mountpoint);
 		}
 		change_mnt_propagation(p, MS_PRIVATE);
 	}
@@ -1412,8 +1412,7 @@ static int mount_is_safe(struct path *path)
 struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 					int flag)
 {
-	struct mount *res, *p, *q;
-	struct vfsmount *r;
+	struct mount *res, *p, *q, *r;
 	struct path path;
 
 	if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(&mnt->mnt))
@@ -1422,15 +1421,15 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 	res = q = clone_mnt(mnt, dentry, flag);
 	if (!q)
 		goto Enomem;
-	q->mnt.mnt_mountpoint = mnt->mnt.mnt_mountpoint;
+	q->mnt_mountpoint = mnt->mnt_mountpoint;
 
 	p = mnt;
-	list_for_each_entry(r, &mnt->mnt.mnt_mounts, mnt_child) {
+	list_for_each_entry(r, &mnt->mnt.mnt_mounts, mnt.mnt_child) {
 		struct mount *s;
 		if (!is_subdir(r->mnt_mountpoint, dentry))
 			continue;
 
-		for (s = real_mount(r); s; s = next_mnt(s, r)) {
+		for (s = r; s; s = next_mnt(s, &r->mnt)) {
 			if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(&s->mnt)) {
 				s = skip_mnt_tree(s);
 				continue;
@@ -1441,7 +1440,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 			}
 			p = s;
 			path.mnt = &q->mnt;
-			path.dentry = p->mnt.mnt_mountpoint;
+			path.dentry = p->mnt_mountpoint;
 			q = clone_mnt(p, p->mnt.mnt_root, flag);
 			if (!q)
 				goto Enomem;
@@ -2564,7 +2563,7 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
 			 const struct path *root)
 {
 	while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
-		dentry = mnt->mnt.mnt_mountpoint;
+		dentry = mnt->mnt_mountpoint;
 		mnt = mnt->mnt_parent;
 	}
 	return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
diff --git a/fs/pnode.c b/fs/pnode.c
index 7fddc671f729..bd280200bd37 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -308,7 +308,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
 
 	for (m = propagation_next(&parent->mnt, &parent->mnt); m;
 	     		m = propagation_next(m, &parent->mnt)) {
-		child = __lookup_mnt(m, mnt->mnt.mnt_mountpoint, 0);
+		child = __lookup_mnt(m, mnt->mnt_mountpoint, 0);
 		if (child && list_empty(&child->mnt.mnt_mounts) &&
 		    (ret = do_refcount_check(child, 1)))
 			break;
@@ -331,7 +331,7 @@ static void __propagate_umount(struct mount *mnt)
 			m = propagation_next(m, &parent->mnt)) {
 
 		struct mount *child = __lookup_mnt(m,
-					mnt->mnt.mnt_mountpoint, 0);
+					mnt->mnt_mountpoint, 0);
 		/*
 		 * umount the child only if the child has no
 		 * other children
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b69362d2b5b2..e3f005993d0f 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -53,7 +53,6 @@ struct mnt_pcp {
 };
 
 struct vfsmount {
-	struct dentry *mnt_mountpoint;	/* dentry of mountpoint */
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From 68e8a9feab251f9d3c8fd9e9893c97843bcd4bd0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Nov 2011 22:53:09 -0500
Subject: vfs: all counters taken to struct mount

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            | 12 ++++++++++++
 fs/namespace.c        | 40 ++++++++++++++++++++--------------------
 include/linux/mount.h | 12 ------------
 3 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 853738f5897f..452ae41e0131 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -1,10 +1,22 @@
 #include <linux/mount.h>
 
+struct mnt_pcp {
+	int mnt_count;
+	int mnt_writers;
+};
+
 struct mount {
 	struct list_head mnt_hash;
 	struct mount *mnt_parent;
 	struct dentry *mnt_mountpoint;
 	struct vfsmount mnt;
+#ifdef CONFIG_SMP
+	struct mnt_pcp __percpu *mnt_pcp;
+	atomic_t mnt_longterm;		/* how many of the refs are longterm */
+#else
+	int mnt_count;
+	int mnt_writers;
+#endif
 };
 
 static inline struct mount *real_mount(struct vfsmount *mnt)
diff --git a/fs/namespace.c b/fs/namespace.c
index a13165c871c2..3fdd30add4f9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -144,10 +144,10 @@ void mnt_release_group_id(struct mount *mnt)
 static inline void mnt_add_count(struct mount *mnt, int n)
 {
 #ifdef CONFIG_SMP
-	this_cpu_add(mnt->mnt.mnt_pcp->mnt_count, n);
+	this_cpu_add(mnt->mnt_pcp->mnt_count, n);
 #else
 	preempt_disable();
-	mnt->mnt.mnt_count += n;
+	mnt->mnt_count += n;
 	preempt_enable();
 #endif
 }
@@ -162,12 +162,12 @@ unsigned int mnt_get_count(struct mount *mnt)
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		count += per_cpu_ptr(mnt->mnt.mnt_pcp, cpu)->mnt_count;
+		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
 	}
 
 	return count;
 #else
-	return mnt->mnt.mnt_count;
+	return mnt->mnt_count;
 #endif
 }
 
@@ -189,14 +189,14 @@ static struct mount *alloc_vfsmnt(const char *name)
 		}
 
 #ifdef CONFIG_SMP
-		mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
-		if (!mnt->mnt_pcp)
+		p->mnt_pcp = alloc_percpu(struct mnt_pcp);
+		if (!p->mnt_pcp)
 			goto out_free_devname;
 
-		this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
+		this_cpu_add(p->mnt_pcp->mnt_count, 1);
 #else
-		mnt->mnt_count = 1;
-		mnt->mnt_writers = 0;
+		p->mnt_count = 1;
+		p->mnt_writers = 0;
 #endif
 
 		INIT_LIST_HEAD(&p->mnt_hash);
@@ -256,18 +256,18 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly);
 static inline void mnt_inc_writers(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
-	this_cpu_inc(mnt->mnt.mnt_pcp->mnt_writers);
+	this_cpu_inc(mnt->mnt_pcp->mnt_writers);
 #else
-	mnt->mnt.mnt_writers++;
+	mnt->mnt_writers++;
 #endif
 }
 
 static inline void mnt_dec_writers(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
-	this_cpu_dec(mnt->mnt.mnt_pcp->mnt_writers);
+	this_cpu_dec(mnt->mnt_pcp->mnt_writers);
 #else
-	mnt->mnt.mnt_writers--;
+	mnt->mnt_writers--;
 #endif
 }
 
@@ -278,7 +278,7 @@ static unsigned int mnt_get_writers(struct mount *mnt)
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		count += per_cpu_ptr(mnt->mnt.mnt_pcp, cpu)->mnt_writers;
+		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
 	}
 
 	return count;
@@ -454,7 +454,7 @@ static void free_vfsmnt(struct mount *mnt)
 	kfree(mnt->mnt.mnt_devname);
 	mnt_free_id(mnt);
 #ifdef CONFIG_SMP
-	free_percpu(mnt->mnt.mnt_pcp);
+	free_percpu(mnt->mnt_pcp);
 #endif
 	kmem_cache_free(mnt_cache, mnt);
 }
@@ -594,7 +594,7 @@ static void attach_mnt(struct mount *mnt, struct path *path)
 static inline void __mnt_make_longterm(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
-	atomic_inc(&mnt->mnt.mnt_longterm);
+	atomic_inc(&mnt->mnt_longterm);
 #endif
 }
 
@@ -602,7 +602,7 @@ static inline void __mnt_make_longterm(struct mount *mnt)
 static inline void __mnt_make_shortterm(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
-	atomic_dec(&mnt->mnt.mnt_longterm);
+	atomic_dec(&mnt->mnt_longterm);
 #endif
 }
 
@@ -769,7 +769,7 @@ static void mntput_no_expire(struct vfsmount *m)
 put_again:
 #ifdef CONFIG_SMP
 	br_read_lock(vfsmount_lock);
-	if (likely(atomic_read(&mnt->mnt.mnt_longterm))) {
+	if (likely(atomic_read(&mnt->mnt_longterm))) {
 		mnt_add_count(mnt, -1);
 		br_read_unlock(vfsmount_lock);
 		return;
@@ -2375,10 +2375,10 @@ void mnt_make_shortterm(struct vfsmount *m)
 {
 #ifdef CONFIG_SMP
 	struct mount *mnt = real_mount(m);
-	if (atomic_add_unless(&mnt->mnt.mnt_longterm, -1, 1))
+	if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
 		return;
 	br_write_lock(vfsmount_lock);
-	atomic_dec(&mnt->mnt.mnt_longterm);
+	atomic_dec(&mnt->mnt_longterm);
 	br_write_unlock(vfsmount_lock);
 #endif
 }
diff --git a/include/linux/mount.h b/include/linux/mount.h
index e3f005993d0f..cc01ed1bc719 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -47,21 +47,9 @@ struct mnt_namespace;
 
 #define MNT_INTERNAL	0x4000
 
-struct mnt_pcp {
-	int mnt_count;
-	int mnt_writers;
-};
-
 struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
-#ifdef CONFIG_SMP
-	struct mnt_pcp __percpu *mnt_pcp;
-	atomic_t mnt_longterm;		/* how many of the refs are longterm */
-#else
-	int mnt_count;
-	int mnt_writers;
-#endif
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
 	int mnt_flags;
-- 
cgit v1.2.3


From 6b41d536f7c84e7cb1c1462073150277e46f6ea8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Nov 2011 23:24:33 -0500
Subject: vfs: take mnt_child/mnt_mounts to struct mount

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  2 ++
 fs/namespace.c        | 42 +++++++++++++++++++++---------------------
 fs/pnode.c            |  6 +++---
 include/linux/mount.h |  2 --
 4 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 452ae41e0131..e4ecf59c9353 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -17,6 +17,8 @@ struct mount {
 	int mnt_count;
 	int mnt_writers;
 #endif
+	struct list_head mnt_mounts;	/* list of children, anchored here */
+	struct list_head mnt_child;	/* and going through their mnt_child */
 };
 
 static inline struct mount *real_mount(struct vfsmount *mnt)
diff --git a/fs/namespace.c b/fs/namespace.c
index 3fdd30add4f9..9ceb03fe176f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -200,8 +200,8 @@ static struct mount *alloc_vfsmnt(const char *name)
 #endif
 
 		INIT_LIST_HEAD(&p->mnt_hash);
-		INIT_LIST_HEAD(&mnt->mnt_child);
-		INIT_LIST_HEAD(&mnt->mnt_mounts);
+		INIT_LIST_HEAD(&p->mnt_child);
+		INIT_LIST_HEAD(&p->mnt_mounts);
 		INIT_LIST_HEAD(&mnt->mnt_list);
 		INIT_LIST_HEAD(&mnt->mnt_expire);
 		INIT_LIST_HEAD(&mnt->mnt_share);
@@ -562,7 +562,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
 	old_path->mnt = &mnt->mnt_parent->mnt;
 	mnt->mnt_parent = mnt;
 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
-	list_del_init(&mnt->mnt.mnt_child);
+	list_del_init(&mnt->mnt_child);
 	list_del_init(&mnt->mnt_hash);
 	dentry_reset_mounted(old_path->dentry);
 }
@@ -588,7 +588,7 @@ static void attach_mnt(struct mount *mnt, struct path *path)
 	mnt_set_mountpoint(path->mnt, path->dentry, mnt);
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 			hash(path->mnt, path->dentry));
-	list_add_tail(&mnt->mnt.mnt_child, &path->mnt->mnt_mounts);
+	list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
 }
 
 static inline void __mnt_make_longterm(struct mount *mnt)
@@ -628,32 +628,32 @@ static void commit_tree(struct mount *mnt)
 
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 				hash(&parent->mnt, mnt->mnt_mountpoint));
-	list_add_tail(&mnt->mnt.mnt_child, &parent->mnt.mnt_mounts);
+	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
 	touch_mnt_namespace(n);
 }
 
 static struct mount *next_mnt(struct mount *p, struct vfsmount *root)
 {
-	struct list_head *next = p->mnt.mnt_mounts.next;
-	if (next == &p->mnt.mnt_mounts) {
+	struct list_head *next = p->mnt_mounts.next;
+	if (next == &p->mnt_mounts) {
 		while (1) {
 			if (&p->mnt == root)
 				return NULL;
-			next = p->mnt.mnt_child.next;
-			if (next != &p->mnt_parent->mnt.mnt_mounts)
+			next = p->mnt_child.next;
+			if (next != &p->mnt_parent->mnt_mounts)
 				break;
 			p = p->mnt_parent;
 		}
 	}
-	return list_entry(next, struct mount, mnt.mnt_child);
+	return list_entry(next, struct mount, mnt_child);
 }
 
 static struct mount *skip_mnt_tree(struct mount *p)
 {
-	struct list_head *prev = p->mnt.mnt_mounts.prev;
-	while (prev != &p->mnt.mnt_mounts) {
-		p = list_entry(prev, struct mount, mnt.mnt_child);
-		prev = p->mnt.mnt_mounts.prev;
+	struct list_head *prev = p->mnt_mounts.prev;
+	while (prev != &p->mnt_mounts) {
+		p = list_entry(prev, struct mount, mnt_child);
+		prev = p->mnt_mounts.prev;
 	}
 	return p;
 }
@@ -1238,7 +1238,7 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 		__touch_mnt_namespace(p->mnt.mnt_ns);
 		p->mnt.mnt_ns = NULL;
 		__mnt_make_shortterm(p);
-		list_del_init(&p->mnt.mnt_child);
+		list_del_init(&p->mnt_child);
 		if (mnt_has_parent(p)) {
 			p->mnt_parent->mnt.mnt_ghosts++;
 			dentry_reset_mounted(p->mnt_mountpoint);
@@ -1427,7 +1427,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 	q->mnt_mountpoint = mnt->mnt_mountpoint;
 
 	p = mnt;
-	list_for_each_entry(r, &mnt->mnt.mnt_mounts, mnt.mnt_child) {
+	list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
 		struct mount *s;
 		if (!is_subdir(r->mnt_mountpoint, dentry))
 			continue;
@@ -2134,11 +2134,11 @@ static int select_submounts(struct mount *parent, struct list_head *graveyard)
 	int found = 0;
 
 repeat:
-	next = this_parent->mnt.mnt_mounts.next;
+	next = this_parent->mnt_mounts.next;
 resume:
-	while (next != &this_parent->mnt.mnt_mounts) {
+	while (next != &this_parent->mnt_mounts) {
 		struct list_head *tmp = next;
-		struct mount *mnt = list_entry(tmp, struct mount, mnt.mnt_child);
+		struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
 
 		next = tmp->next;
 		if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
@@ -2146,7 +2146,7 @@ resume:
 		/*
 		 * Descend a level if the d_mounts list is non-empty.
 		 */
-		if (!list_empty(&mnt->mnt.mnt_mounts)) {
+		if (!list_empty(&mnt->mnt_mounts)) {
 			this_parent = mnt;
 			goto repeat;
 		}
@@ -2160,7 +2160,7 @@ resume:
 	 * All done at this level ... ascend and resume the search
 	 */
 	if (this_parent != parent) {
-		next = this_parent->mnt.mnt_child.next;
+		next = this_parent->mnt_child.next;
 		this_parent = this_parent->mnt_parent;
 		goto resume;
 	}
diff --git a/fs/pnode.c b/fs/pnode.c
index 50fdb29eebfe..8cd90d2ec05e 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -303,13 +303,13 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
 	 * If not, we don't have to go checking for all other
 	 * mounts
 	 */
-	if (!list_empty(&mnt->mnt.mnt_mounts) || do_refcount_check(mnt, refcnt))
+	if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
 		return 1;
 
 	for (m = propagation_next(&parent->mnt, &parent->mnt); m;
 	     		m = propagation_next(m, &parent->mnt)) {
 		child = __lookup_mnt(m, mnt->mnt_mountpoint, 0);
-		if (child && list_empty(&child->mnt.mnt_mounts) &&
+		if (child && list_empty(&child->mnt_mounts) &&
 		    (ret = do_refcount_check(child, 1)))
 			break;
 	}
@@ -336,7 +336,7 @@ static void __propagate_umount(struct mount *mnt)
 		 * umount the child only if the child has no
 		 * other children
 		 */
-		if (child && list_empty(&child->mnt.mnt_mounts))
+		if (child && list_empty(&child->mnt_mounts))
 			list_move_tail(&child->mnt_hash, &mnt->mnt_hash);
 	}
 }
diff --git a/include/linux/mount.h b/include/linux/mount.h
index cc01ed1bc719..e9990254d4d0 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -50,8 +50,6 @@ struct mnt_namespace;
 struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
-	struct list_head mnt_mounts;	/* list of children, anchored here */
-	struct list_head mnt_child;	/* and going through their mnt_child */
 	int mnt_flags;
 	/* 4 bytes hole on 64bits arches without fsnotify */
 #ifdef CONFIG_FSNOTIFY
-- 
cgit v1.2.3


From d10e8def07fc87488c396d2eff2c26c43bb541dd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Nov 2011 00:07:16 -0500
Subject: vfs: take mnt_master to struct mount

make IS_MNT_SLAVE take struct mount * at the same time

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  2 ++
 fs/namespace.c        | 10 +++++-----
 fs/pnode.c            | 26 +++++++++++++-------------
 include/linux/mount.h |  1 -
 4 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index e4ecf59c9353..7071d8fa9307 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -19,6 +19,8 @@ struct mount {
 #endif
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
+	/* yet to be moved - up to mnt_slave */
+	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
 };
 
 static inline struct mount *real_mount(struct vfsmount *mnt)
diff --git a/fs/namespace.c b/fs/namespace.c
index ee42e671afdc..3439042fc9f2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -715,14 +715,14 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 
 		if (flag & CL_SLAVE) {
 			list_add(&mnt->mnt.mnt_slave, &old->mnt.mnt_slave_list);
-			mnt->mnt.mnt_master = &old->mnt;
+			mnt->mnt_master = &old->mnt;
 			CLEAR_MNT_SHARED(&mnt->mnt);
 		} else if (!(flag & CL_PRIVATE)) {
 			if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(&old->mnt))
 				list_add(&mnt->mnt.mnt_share, &old->mnt.mnt_share);
-			if (IS_MNT_SLAVE(&old->mnt))
+			if (IS_MNT_SLAVE(old))
 				list_add(&mnt->mnt.mnt_slave, &old->mnt.mnt_slave);
-			mnt->mnt.mnt_master = old->mnt.mnt_master;
+			mnt->mnt_master = old->mnt_master;
 		}
 		if (flag & CL_MAKE_SHARED)
 			set_mnt_shared(mnt);
@@ -1051,8 +1051,8 @@ static int show_mountinfo(struct seq_file *m, void *v)
 	/* Tagged fields ("foo:X" or "bar") */
 	if (IS_MNT_SHARED(mnt))
 		seq_printf(m, " shared:%i", mnt->mnt_group_id);
-	if (IS_MNT_SLAVE(mnt)) {
-		int master = mnt->mnt_master->mnt_group_id;
+	if (IS_MNT_SLAVE(r)) {
+		int master = r->mnt_master->mnt_group_id;
 		int dom = get_dominating_id(r, &p->root);
 		seq_printf(m, " master:%i", master);
 		if (dom && dom != master)
diff --git a/fs/pnode.c b/fs/pnode.c
index 0e1de28b1b2e..3ac44d15fe58 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -55,7 +55,7 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
 {
 	struct mount *m;
 
-	for (m = real_mount(mnt->mnt.mnt_master); m != NULL; m = real_mount(m->mnt.mnt_master)) {
+	for (m = real_mount(mnt->mnt_master); m != NULL; m = real_mount(m->mnt_master)) {
 		struct mount *d = get_peer_under_root(m, mnt->mnt.mnt_ns, root);
 		if (d)
 			return d->mnt.mnt_group_id;
@@ -66,8 +66,8 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
 
 static int do_make_slave(struct mount *mnt)
 {
-	struct mount *peer_mnt = mnt, *master = real_mount(mnt->mnt.mnt_master);
-	struct vfsmount *slave_mnt;
+	struct mount *peer_mnt = mnt, *master = real_mount(mnt->mnt_master);
+	struct mount *slave_mnt;
 
 	/*
 	 * slave 'mnt' to a peer mount that has the
@@ -92,7 +92,7 @@ static int do_make_slave(struct mount *mnt)
 		master = peer_mnt;
 
 	if (master) {
-		list_for_each_entry(slave_mnt, &mnt->mnt.mnt_slave_list, mnt_slave)
+		list_for_each_entry(slave_mnt, &mnt->mnt.mnt_slave_list, mnt.mnt_slave)
 			slave_mnt->mnt_master = &master->mnt;
 		list_move(&mnt->mnt.mnt_slave, &master->mnt.mnt_slave_list);
 		list_splice(&mnt->mnt.mnt_slave_list, master->mnt.mnt_slave_list.prev);
@@ -101,12 +101,12 @@ static int do_make_slave(struct mount *mnt)
 		struct list_head *p = &mnt->mnt.mnt_slave_list;
 		while (!list_empty(p)) {
                         slave_mnt = list_first_entry(p,
-					struct vfsmount, mnt_slave);
-			list_del_init(&slave_mnt->mnt_slave);
+					struct mount, mnt.mnt_slave);
+			list_del_init(&slave_mnt->mnt.mnt_slave);
 			slave_mnt->mnt_master = NULL;
 		}
 	}
-	mnt->mnt.mnt_master = &master->mnt;
+	mnt->mnt_master = &master->mnt;
 	CLEAR_MNT_SHARED(&mnt->mnt);
 	return 0;
 }
@@ -123,7 +123,7 @@ void change_mnt_propagation(struct mount *mnt, int type)
 	do_make_slave(mnt);
 	if (type != MS_SLAVE) {
 		list_del_init(&mnt->mnt.mnt_slave);
-		mnt->mnt.mnt_master = NULL;
+		mnt->mnt_master = NULL;
 		if (type == MS_UNBINDABLE)
 			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
 		else
@@ -149,9 +149,9 @@ static struct mount *propagation_next(struct mount *m,
 		return first_slave(m);
 
 	while (1) {
-		struct mount *master = real_mount(m->mnt.mnt_master);
+		struct mount *master = real_mount(m->mnt_master);
 
-		if (&master->mnt == origin->mnt.mnt_master) {
+		if (&master->mnt == origin->mnt_master) {
 			struct mount *next = next_peer(m);
 			return (next == origin) ? NULL : next;
 		} else if (m->mnt.mnt_slave.next != &master->mnt.mnt_slave_list)
@@ -179,11 +179,11 @@ static struct mount *get_source(struct mount *dest,
 	struct mount *p_last_src = NULL;
 	struct mount *p_last_dest = NULL;
 
-	while (&last_dest->mnt != dest->mnt.mnt_master) {
+	while (&last_dest->mnt != dest->mnt_master) {
 		p_last_dest = last_dest;
 		p_last_src = last_src;
-		last_dest = real_mount(last_dest->mnt.mnt_master);
-		last_src = real_mount(last_src->mnt.mnt_master);
+		last_dest = real_mount(last_dest->mnt_master);
+		last_src = real_mount(last_src->mnt_master);
 	}
 
 	if (p_last_dest) {
diff --git a/include/linux/mount.h b/include/linux/mount.h
index e9990254d4d0..2d5beb5e3a8c 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -62,7 +62,6 @@ struct vfsmount {
 	struct list_head mnt_share;	/* circular list of shared mounts */
 	struct list_head mnt_slave_list;/* list of slave mounts */
 	struct list_head mnt_slave;	/* slave list entry */
-	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
-- 
cgit v1.2.3


From 6776db3d32b2a59198ec7ac6d32be0b9fdbd8a68 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Nov 2011 00:22:05 -0500
Subject: vfs: take mnt_share/mnt_slave/mnt_slave_list and mnt_expire to struct
 mount

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  6 +++++-
 fs/namespace.c        | 41 +++++++++++++++++++++--------------------
 fs/pnode.c            | 30 +++++++++++++++---------------
 include/linux/mount.h |  4 ----
 4 files changed, 41 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index d4db4c7e1815..eb62ad232e4d 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -19,7 +19,11 @@ struct mount {
 #endif
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
-	/* yet to be moved - up to mnt_slave */
+	/* yet to be moved - up to mnt_list */
+	struct list_head mnt_expire;	/* link in fs-specific expiry list */
+	struct list_head mnt_share;	/* circular list of shared mounts */
+	struct list_head mnt_slave_list;/* list of slave mounts */
+	struct list_head mnt_slave;	/* slave list entry */
 	struct mount *mnt_master;	/* slave is on master->mnt_slave_list */
 };
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 847b7240c512..a14750be7a70 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -203,10 +203,10 @@ static struct mount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&p->mnt_child);
 		INIT_LIST_HEAD(&p->mnt_mounts);
 		INIT_LIST_HEAD(&mnt->mnt_list);
-		INIT_LIST_HEAD(&mnt->mnt_expire);
-		INIT_LIST_HEAD(&mnt->mnt_share);
-		INIT_LIST_HEAD(&mnt->mnt_slave_list);
-		INIT_LIST_HEAD(&mnt->mnt_slave);
+		INIT_LIST_HEAD(&p->mnt_expire);
+		INIT_LIST_HEAD(&p->mnt_share);
+		INIT_LIST_HEAD(&p->mnt_slave_list);
+		INIT_LIST_HEAD(&p->mnt_slave);
 #ifdef CONFIG_FSNOTIFY
 		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
 #endif
@@ -714,14 +714,14 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 		mnt->mnt_parent = mnt;
 
 		if (flag & CL_SLAVE) {
-			list_add(&mnt->mnt.mnt_slave, &old->mnt.mnt_slave_list);
+			list_add(&mnt->mnt_slave, &old->mnt_slave_list);
 			mnt->mnt_master = old;
 			CLEAR_MNT_SHARED(&mnt->mnt);
 		} else if (!(flag & CL_PRIVATE)) {
 			if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(&old->mnt))
-				list_add(&mnt->mnt.mnt_share, &old->mnt.mnt_share);
+				list_add(&mnt->mnt_share, &old->mnt_share);
 			if (IS_MNT_SLAVE(old))
-				list_add(&mnt->mnt.mnt_slave, &old->mnt.mnt_slave);
+				list_add(&mnt->mnt_slave, &old->mnt_slave);
 			mnt->mnt_master = old->mnt_master;
 		}
 		if (flag & CL_MAKE_SHARED)
@@ -730,8 +730,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 		/* stick the duplicate mount on the same expiry list
 		 * as the original if that was on one */
 		if (flag & CL_EXPIRE) {
-			if (!list_empty(&old->mnt.mnt_expire))
-				list_add(&mnt->mnt.mnt_expire, &old->mnt.mnt_expire);
+			if (!list_empty(&old->mnt_expire))
+				list_add(&mnt->mnt_expire, &old->mnt_expire);
 		}
 	}
 	return mnt;
@@ -1233,7 +1233,7 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 		propagate_umount(&tmp_list);
 
 	list_for_each_entry(p, &tmp_list, mnt_hash) {
-		list_del_init(&p->mnt.mnt_expire);
+		list_del_init(&p->mnt_expire);
 		list_del_init(&p->mnt.mnt_list);
 		__touch_mnt_namespace(p->mnt.mnt_ns);
 		p->mnt.mnt_ns = NULL;
@@ -1921,7 +1921,7 @@ static int do_move_mount(struct path *path, char *old_name)
 
 	/* if the mount is moved, it should no longer be expire
 	 * automatically */
-	list_del_init(&old_path.mnt->mnt_expire);
+	list_del_init(&old->mnt_expire);
 out1:
 	unlock_mount(path);
 out:
@@ -2033,11 +2033,12 @@ static int do_new_mount(struct path *path, char *type, int flags,
 
 int finish_automount(struct vfsmount *m, struct path *path)
 {
+	struct mount *mnt = real_mount(m);
 	int err;
 	/* The new mount record should have at least 2 refs to prevent it being
 	 * expired before we get a chance to add it
 	 */
-	BUG_ON(mnt_get_count(real_mount(m)) < 2);
+	BUG_ON(mnt_get_count(mnt) < 2);
 
 	if (m->mnt_sb == path->mnt->mnt_sb &&
 	    m->mnt_root == path->dentry) {
@@ -2050,10 +2051,10 @@ int finish_automount(struct vfsmount *m, struct path *path)
 		return 0;
 fail:
 	/* remove m from any expiration list it may be on */
-	if (!list_empty(&m->mnt_expire)) {
+	if (!list_empty(&mnt->mnt_expire)) {
 		down_write(&namespace_sem);
 		br_write_lock(vfsmount_lock);
-		list_del_init(&m->mnt_expire);
+		list_del_init(&mnt->mnt_expire);
 		br_write_unlock(vfsmount_lock);
 		up_write(&namespace_sem);
 	}
@@ -2072,7 +2073,7 @@ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
 	down_write(&namespace_sem);
 	br_write_lock(vfsmount_lock);
 
-	list_add_tail(&mnt->mnt_expire, expiry_list);
+	list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
 
 	br_write_unlock(vfsmount_lock);
 	up_write(&namespace_sem);
@@ -2102,14 +2103,14 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	 * - still marked for expiry (marked on the last call here; marks are
 	 *   cleared by mntput())
 	 */
-	list_for_each_entry_safe(mnt, next, mounts, mnt.mnt_expire) {
+	list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
 		if (!xchg(&mnt->mnt.mnt_expiry_mark, 1) ||
 			propagate_mount_busy(mnt, 1))
 			continue;
-		list_move(&mnt->mnt.mnt_expire, &graveyard);
+		list_move(&mnt->mnt_expire, &graveyard);
 	}
 	while (!list_empty(&graveyard)) {
-		mnt = list_first_entry(&graveyard, struct mount, mnt.mnt_expire);
+		mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
 		touch_mnt_namespace(mnt->mnt.mnt_ns);
 		umount_tree(mnt, 1, &umounts);
 	}
@@ -2152,7 +2153,7 @@ resume:
 		}
 
 		if (!propagate_mount_busy(mnt, 1)) {
-			list_move_tail(&mnt->mnt.mnt_expire, graveyard);
+			list_move_tail(&mnt->mnt_expire, graveyard);
 			found++;
 		}
 	}
@@ -2182,7 +2183,7 @@ static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
 	while (select_submounts(mnt, &graveyard)) {
 		while (!list_empty(&graveyard)) {
 			m = list_first_entry(&graveyard, struct mount,
-						mnt.mnt_expire);
+						mnt_expire);
 			touch_mnt_namespace(m->mnt.mnt_ns);
 			umount_tree(m, 1, umounts);
 		}
diff --git a/fs/pnode.c b/fs/pnode.c
index 9bf22b61f8fb..12cc1518e0cd 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -15,17 +15,17 @@
 /* return the next shared peer mount of @p */
 static inline struct mount *next_peer(struct mount *p)
 {
-	return list_entry(p->mnt.mnt_share.next, struct mount, mnt.mnt_share);
+	return list_entry(p->mnt_share.next, struct mount, mnt_share);
 }
 
 static inline struct mount *first_slave(struct mount *p)
 {
-	return list_entry(p->mnt.mnt_slave_list.next, struct mount, mnt.mnt_slave);
+	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
 }
 
 static inline struct mount *next_slave(struct mount *p)
 {
-	return list_entry(p->mnt.mnt_slave.next, struct mount, mnt.mnt_slave);
+	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
 }
 
 static struct mount *get_peer_under_root(struct mount *mnt,
@@ -82,27 +82,27 @@ static int do_make_slave(struct mount *mnt)
 		if (peer_mnt == mnt)
 			peer_mnt = NULL;
 	}
-	if (IS_MNT_SHARED(&mnt->mnt) && list_empty(&mnt->mnt.mnt_share))
+	if (IS_MNT_SHARED(&mnt->mnt) && list_empty(&mnt->mnt_share))
 		mnt_release_group_id(mnt);
 
-	list_del_init(&mnt->mnt.mnt_share);
+	list_del_init(&mnt->mnt_share);
 	mnt->mnt.mnt_group_id = 0;
 
 	if (peer_mnt)
 		master = peer_mnt;
 
 	if (master) {
-		list_for_each_entry(slave_mnt, &mnt->mnt.mnt_slave_list, mnt.mnt_slave)
+		list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
 			slave_mnt->mnt_master = master;
-		list_move(&mnt->mnt.mnt_slave, &master->mnt.mnt_slave_list);
-		list_splice(&mnt->mnt.mnt_slave_list, master->mnt.mnt_slave_list.prev);
-		INIT_LIST_HEAD(&mnt->mnt.mnt_slave_list);
+		list_move(&mnt->mnt_slave, &master->mnt_slave_list);
+		list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
+		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 	} else {
-		struct list_head *p = &mnt->mnt.mnt_slave_list;
+		struct list_head *p = &mnt->mnt_slave_list;
 		while (!list_empty(p)) {
                         slave_mnt = list_first_entry(p,
-					struct mount, mnt.mnt_slave);
-			list_del_init(&slave_mnt->mnt.mnt_slave);
+					struct mount, mnt_slave);
+			list_del_init(&slave_mnt->mnt_slave);
 			slave_mnt->mnt_master = NULL;
 		}
 	}
@@ -122,7 +122,7 @@ void change_mnt_propagation(struct mount *mnt, int type)
 	}
 	do_make_slave(mnt);
 	if (type != MS_SLAVE) {
-		list_del_init(&mnt->mnt.mnt_slave);
+		list_del_init(&mnt->mnt_slave);
 		mnt->mnt_master = NULL;
 		if (type == MS_UNBINDABLE)
 			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
@@ -145,7 +145,7 @@ static struct mount *propagation_next(struct mount *m,
 					 struct mount *origin)
 {
 	/* are there any slaves of this mount? */
-	if (!IS_MNT_NEW(&m->mnt) && !list_empty(&m->mnt.mnt_slave_list))
+	if (!IS_MNT_NEW(&m->mnt) && !list_empty(&m->mnt_slave_list))
 		return first_slave(m);
 
 	while (1) {
@@ -154,7 +154,7 @@ static struct mount *propagation_next(struct mount *m,
 		if (master == origin->mnt_master) {
 			struct mount *next = next_peer(m);
 			return (next == origin) ? NULL : next;
-		} else if (m->mnt.mnt_slave.next != &master->mnt.mnt_slave_list)
+		} else if (m->mnt_slave.next != &master->mnt_slave_list)
 			return next_slave(m);
 
 		/* back at master */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 2d5beb5e3a8c..2f5f3ae3bd2d 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -58,10 +58,6 @@ struct vfsmount {
 #endif
 	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
-	struct list_head mnt_expire;	/* link in fs-specific expiry list */
-	struct list_head mnt_share;	/* circular list of shared mounts */
-	struct list_head mnt_slave_list;/* list of slave mounts */
-	struct list_head mnt_slave;	/* slave list entry */
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
-- 
cgit v1.2.3


From 143c8c91cee7efdd732ec5f61b3471fc46192f20 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Nov 2011 00:46:35 -0500
Subject: vfs: mnt_ns moved to struct mount

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c           |  2 +-
 fs/mount.h            |  1 +
 fs/namespace.c        | 45 +++++++++++++++++++++++----------------------
 fs/pnode.c            | 10 +++++-----
 include/linux/mount.h |  1 -
 5 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 24790041ea76..9791b1e7eee4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2503,7 +2503,7 @@ global_root:
 	if (!slash)
 		error = prepend(buffer, buflen, "/", 1);
 	if (!error)
-		error = vfsmnt->mnt_ns ? 1 : 2;
+		error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
 	goto out;
 }
 
diff --git a/fs/mount.h b/fs/mount.h
index eb62ad232e4d..4a5f1dca0c2e 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -25,6 +25,7 @@ struct mount {
 	struct list_head mnt_slave_list;/* list of slave mounts */
 	struct list_head mnt_slave;	/* slave list entry */
 	struct mount *mnt_master;	/* slave is on master->mnt_slave_list */
+	struct mnt_namespace *mnt_ns;	/* containing namespace */
 };
 
 static inline struct mount *real_mount(struct vfsmount *mnt)
diff --git a/fs/namespace.c b/fs/namespace.c
index 3e95cc26dda6..4cdb7f698613 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -505,7 +505,7 @@ struct vfsmount *lookup_mnt(struct path *path)
 	}
 }
 
-static inline int check_mnt(struct vfsmount *mnt)
+static inline int check_mnt(struct mount *mnt)
 {
 	return mnt->mnt_ns == current->nsproxy->mnt_ns;
 }
@@ -614,13 +614,13 @@ static void commit_tree(struct mount *mnt)
 	struct mount *parent = mnt->mnt_parent;
 	struct mount *m;
 	LIST_HEAD(head);
-	struct mnt_namespace *n = parent->mnt.mnt_ns;
+	struct mnt_namespace *n = parent->mnt_ns;
 
 	BUG_ON(parent == mnt);
 
 	list_add_tail(&head, &mnt->mnt.mnt_list);
 	list_for_each_entry(m, &head, mnt.mnt_list) {
-		m->mnt.mnt_ns = n;
+		m->mnt_ns = n;
 		__mnt_make_longterm(m);
 	}
 
@@ -1234,8 +1234,8 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 	list_for_each_entry(p, &tmp_list, mnt_hash) {
 		list_del_init(&p->mnt_expire);
 		list_del_init(&p->mnt.mnt_list);
-		__touch_mnt_namespace(p->mnt.mnt_ns);
-		p->mnt.mnt_ns = NULL;
+		__touch_mnt_namespace(p->mnt_ns);
+		p->mnt_ns = NULL;
 		__mnt_make_shortterm(p);
 		list_del_init(&p->mnt_child);
 		if (mnt_has_parent(p)) {
@@ -1367,7 +1367,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 	retval = -EINVAL;
 	if (path.dentry != path.mnt->mnt_root)
 		goto dput_and_out;
-	if (!check_mnt(path.mnt))
+	if (!check_mnt(mnt))
 		goto dput_and_out;
 
 	retval = -EPERM;
@@ -1619,7 +1619,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 	if (parent_path) {
 		detach_mnt(source_mnt, parent_path);
 		attach_mnt(source_mnt, path);
-		touch_mnt_namespace(parent_path->mnt->mnt_ns);
+		touch_mnt_namespace(source_mnt->mnt_ns);
 	} else {
 		mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
 		commit_tree(source_mnt);
@@ -1765,7 +1765,7 @@ static int do_loopback(struct path *path, char *old_name,
 	if (IS_MNT_UNBINDABLE(old_path.mnt))
 		goto out2;
 
-	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
+	if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
 		goto out2;
 
 	err = -ENOMEM;
@@ -1818,11 +1818,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 {
 	int err;
 	struct super_block *sb = path->mnt->mnt_sb;
+	struct mount *mnt = real_mount(path->mnt);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!check_mnt(path->mnt))
+	if (!check_mnt(mnt))
 		return -EINVAL;
 
 	if (path->dentry != path->mnt->mnt_root)
@@ -1839,14 +1840,14 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 		err = do_remount_sb(sb, flags, data, 0);
 	if (!err) {
 		br_write_lock(vfsmount_lock);
-		mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
-		path->mnt->mnt_flags = mnt_flags;
+		mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
+		mnt->mnt.mnt_flags = mnt_flags;
 		br_write_unlock(vfsmount_lock);
 	}
 	up_write(&sb->s_umount);
 	if (!err) {
 		br_write_lock(vfsmount_lock);
-		touch_mnt_namespace(path->mnt->mnt_ns);
+		touch_mnt_namespace(mnt->mnt_ns);
 		br_write_unlock(vfsmount_lock);
 	}
 	return err;
@@ -1880,8 +1881,10 @@ static int do_move_mount(struct path *path, char *old_name)
 	if (err < 0)
 		goto out;
 
+	old = real_mount(old_path.mnt);
+
 	err = -EINVAL;
-	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
+	if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
 		goto out1;
 
 	if (d_unlinked(path->dentry))
@@ -1891,8 +1894,6 @@ static int do_move_mount(struct path *path, char *old_name)
 	if (old_path.dentry != old_path.mnt->mnt_root)
 		goto out1;
 
-	old = real_mount(old_path.mnt);
-
 	if (!mnt_has_parent(old))
 		goto out1;
 
@@ -1984,7 +1985,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
 		return err;
 
 	err = -EINVAL;
-	if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
+	if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt)))
 		goto unlock;
 
 	/* Refuse the same filesystem on the same mount point */
@@ -2112,7 +2113,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	}
 	while (!list_empty(&graveyard)) {
 		mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
-		touch_mnt_namespace(mnt->mnt.mnt_ns);
+		touch_mnt_namespace(mnt->mnt_ns);
 		umount_tree(mnt, 1, &umounts);
 	}
 	br_write_unlock(vfsmount_lock);
@@ -2185,7 +2186,7 @@ static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
 		while (!list_empty(&graveyard)) {
 			m = list_first_entry(&graveyard, struct mount,
 						mnt_expire);
-			touch_mnt_namespace(m->mnt.mnt_ns);
+			touch_mnt_namespace(m->mnt_ns);
 			umount_tree(m, 1, umounts);
 		}
 	}
@@ -2423,7 +2424,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	p = real_mount(mnt_ns->root);
 	q = new;
 	while (p) {
-		q->mnt.mnt_ns = new_ns;
+		q->mnt_ns = new_ns;
 		__mnt_make_longterm(q);
 		if (fs) {
 			if (&p->mnt == fs->root.mnt) {
@@ -2479,7 +2480,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 
 	new_ns = alloc_mnt_ns();
 	if (!IS_ERR(new_ns)) {
-		mnt->mnt_ns = new_ns;
+		real_mount(mnt)->mnt_ns = new_ns;
 		__mnt_make_longterm(real_mount(mnt));
 		new_ns->root = mnt;
 		list_add(&new_ns->list, &new_ns->root->mnt_list);
@@ -2644,7 +2645,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 		IS_MNT_SHARED(&new_mnt->mnt_parent->mnt) ||
 		IS_MNT_SHARED(&root_mnt->mnt_parent->mnt))
 		goto out4;
-	if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
+	if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
 		goto out4;
 	error = -ENOENT;
 	if (d_unlinked(new.dentry))
@@ -2793,5 +2794,5 @@ EXPORT_SYMBOL(kern_unmount);
 
 bool our_mnt(struct vfsmount *mnt)
 {
-	return check_mnt(mnt);
+	return check_mnt(real_mount(mnt));
 }
diff --git a/fs/pnode.c b/fs/pnode.c
index 12cc1518e0cd..cec329822a16 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -36,7 +36,7 @@ static struct mount *get_peer_under_root(struct mount *mnt,
 
 	do {
 		/* Check the namespace first for optimization */
-		if (m->mnt.mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
+		if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
 			return m;
 
 		m = next_peer(m);
@@ -56,7 +56,7 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
 	struct mount *m;
 
 	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
-		struct mount *d = get_peer_under_root(m, mnt->mnt.mnt_ns, root);
+		struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
 		if (d)
 			return d->mnt.mnt_group_id;
 	}
@@ -145,7 +145,7 @@ static struct mount *propagation_next(struct mount *m,
 					 struct mount *origin)
 {
 	/* are there any slaves of this mount? */
-	if (!IS_MNT_NEW(&m->mnt) && !list_empty(&m->mnt_slave_list))
+	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
 		return first_slave(m);
 
 	while (1) {
@@ -189,7 +189,7 @@ static struct mount *get_source(struct mount *dest,
 	if (p_last_dest) {
 		do {
 			p_last_dest = next_peer(p_last_dest);
-		} while (IS_MNT_NEW(&p_last_dest->mnt));
+		} while (IS_MNT_NEW(p_last_dest));
 		/* is that a peer of the earlier? */
 		if (dest == p_last_dest) {
 			*type = CL_MAKE_SHARED;
@@ -232,7 +232,7 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
 		int type;
 		struct mount *source;
 
-		if (IS_MNT_NEW(&m->mnt))
+		if (IS_MNT_NEW(m))
 			continue;
 
 		source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 2f5f3ae3bd2d..eb8c1f1be90c 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -58,7 +58,6 @@ struct vfsmount {
 #endif
 	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
-	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
-- 
cgit v1.2.3


From 15169fe784a9846b24cdb0840329d41aebc23249 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Nov 2011 00:50:41 -0500
Subject: vfs: mnt_id/mnt_group_id moved

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fhandle.c          |  4 +++-
 fs/mount.h            |  2 ++
 fs/namespace.c        | 30 +++++++++++++++---------------
 fs/pnode.c            |  4 ++--
 include/linux/mount.h |  2 --
 5 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fhandle.c b/fs/fhandle.c
index 6b088641f5bf..5eff7116951e 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -10,6 +10,7 @@
 #include <linux/personality.h>
 #include <asm/uaccess.h>
 #include "internal.h"
+#include "mount.h"
 
 static long do_sys_name_to_handle(struct path *path,
 				  struct file_handle __user *ufh,
@@ -66,7 +67,8 @@ static long do_sys_name_to_handle(struct path *path,
 	} else
 		retval = 0;
 	/* copy the mount id */
-	if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
+	if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id,
+			 sizeof(*mnt_id)) ||
 	    copy_to_user(ufh, handle,
 			 sizeof(struct file_handle) + handle_bytes))
 		retval = -EFAULT;
diff --git a/fs/mount.h b/fs/mount.h
index 4a5f1dca0c2e..c7bd401960ea 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -26,6 +26,8 @@ struct mount {
 	struct list_head mnt_slave;	/* slave list entry */
 	struct mount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
+	int mnt_id;			/* mount identifier */
+	int mnt_group_id;		/* peer group identifier */
 };
 
 static inline struct mount *real_mount(struct vfsmount *mnt)
diff --git a/fs/namespace.c b/fs/namespace.c
index 4cdb7f698613..dfed9a25f204 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -85,9 +85,9 @@ static int mnt_alloc_id(struct mount *mnt)
 retry:
 	ida_pre_get(&mnt_id_ida, GFP_KERNEL);
 	spin_lock(&mnt_id_lock);
-	res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt.mnt_id);
+	res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
 	if (!res)
-		mnt_id_start = mnt->mnt.mnt_id + 1;
+		mnt_id_start = mnt->mnt_id + 1;
 	spin_unlock(&mnt_id_lock);
 	if (res == -EAGAIN)
 		goto retry;
@@ -97,7 +97,7 @@ retry:
 
 static void mnt_free_id(struct mount *mnt)
 {
-	int id = mnt->mnt.mnt_id;
+	int id = mnt->mnt_id;
 	spin_lock(&mnt_id_lock);
 	ida_remove(&mnt_id_ida, id);
 	if (mnt_id_start > id)
@@ -119,9 +119,9 @@ static int mnt_alloc_group_id(struct mount *mnt)
 
 	res = ida_get_new_above(&mnt_group_ida,
 				mnt_group_start,
-				&mnt->mnt.mnt_group_id);
+				&mnt->mnt_group_id);
 	if (!res)
-		mnt_group_start = mnt->mnt.mnt_group_id + 1;
+		mnt_group_start = mnt->mnt_group_id + 1;
 
 	return res;
 }
@@ -131,11 +131,11 @@ static int mnt_alloc_group_id(struct mount *mnt)
  */
 void mnt_release_group_id(struct mount *mnt)
 {
-	int id = mnt->mnt.mnt_group_id;
+	int id = mnt->mnt_group_id;
 	ida_remove(&mnt_group_ida, id);
 	if (mnt_group_start > id)
 		mnt_group_start = id;
-	mnt->mnt.mnt_group_id = 0;
+	mnt->mnt_group_id = 0;
 }
 
 /*
@@ -696,11 +696,11 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 
 	if (mnt) {
 		if (flag & (CL_SLAVE | CL_PRIVATE))
-			mnt->mnt.mnt_group_id = 0; /* not a peer of original */
+			mnt->mnt_group_id = 0; /* not a peer of original */
 		else
-			mnt->mnt.mnt_group_id = old->mnt.mnt_group_id;
+			mnt->mnt_group_id = old->mnt_group_id;
 
-		if ((flag & CL_MAKE_SHARED) && !mnt->mnt.mnt_group_id) {
+		if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
 			int err = mnt_alloc_group_id(mnt);
 			if (err)
 				goto out_free;
@@ -1029,7 +1029,7 @@ static int show_mountinfo(struct seq_file *m, void *v)
 	struct path root = p->root;
 	int err = 0;
 
-	seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, r->mnt_parent->mnt.mnt_id,
+	seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
 		   MAJOR(sb->s_dev), MINOR(sb->s_dev));
 	if (sb->s_op->show_path)
 		err = sb->s_op->show_path(m, mnt);
@@ -1049,9 +1049,9 @@ static int show_mountinfo(struct seq_file *m, void *v)
 
 	/* Tagged fields ("foo:X" or "bar") */
 	if (IS_MNT_SHARED(mnt))
-		seq_printf(m, " shared:%i", mnt->mnt_group_id);
+		seq_printf(m, " shared:%i", r->mnt_group_id);
 	if (IS_MNT_SLAVE(r)) {
-		int master = r->mnt_master->mnt.mnt_group_id;
+		int master = r->mnt_master->mnt_group_id;
 		int dom = get_dominating_id(r, &p->root);
 		seq_printf(m, " master:%i", master);
 		if (dom && dom != master)
@@ -1507,7 +1507,7 @@ static void cleanup_group_ids(struct mount *mnt, struct mount *end)
 	struct mount *p;
 
 	for (p = mnt; p != end; p = next_mnt(p, &mnt->mnt)) {
-		if (p->mnt.mnt_group_id && !IS_MNT_SHARED(&p->mnt))
+		if (p->mnt_group_id && !IS_MNT_SHARED(&p->mnt))
 			mnt_release_group_id(p);
 	}
 }
@@ -1517,7 +1517,7 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
 	struct mount *p;
 
 	for (p = mnt; p; p = recurse ? next_mnt(p, &mnt->mnt) : NULL) {
-		if (!p->mnt.mnt_group_id && !IS_MNT_SHARED(&p->mnt)) {
+		if (!p->mnt_group_id && !IS_MNT_SHARED(&p->mnt)) {
 			int err = mnt_alloc_group_id(p);
 			if (err) {
 				cleanup_group_ids(mnt, p);
diff --git a/fs/pnode.c b/fs/pnode.c
index cec329822a16..001c8b0df379 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -58,7 +58,7 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
 	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
 		struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
 		if (d)
-			return d->mnt.mnt_group_id;
+			return d->mnt_group_id;
 	}
 
 	return 0;
@@ -86,7 +86,7 @@ static int do_make_slave(struct mount *mnt)
 		mnt_release_group_id(mnt);
 
 	list_del_init(&mnt->mnt_share);
-	mnt->mnt.mnt_group_id = 0;
+	mnt->mnt_group_id = 0;
 
 	if (peer_mnt)
 		master = peer_mnt;
diff --git a/include/linux/mount.h b/include/linux/mount.h
index eb8c1f1be90c..b26dc40bfafc 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -58,8 +58,6 @@ struct vfsmount {
 #endif
 	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
-	int mnt_id;			/* mount identifier */
-	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
 	int mnt_ghosts;
-- 
cgit v1.2.3


From 863d684f946eb240c7dd57d265d88315950ca5cc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Nov 2011 00:57:42 -0500
Subject: vfs: move the rest of int fields to struct mount

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  3 +++
 fs/namespace.c        | 32 +++++++++++++++++---------------
 fs/pnode.c            |  2 +-
 include/linux/mount.h |  3 ---
 4 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index c7bd401960ea..9217e03ba5e7 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -28,6 +28,9 @@ struct mount {
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
+	int mnt_expiry_mark;		/* true if marked for expiry */
+	int mnt_pinned;
+	int mnt_ghosts;
 };
 
 static inline struct mount *real_mount(struct vfsmount *mnt)
diff --git a/fs/namespace.c b/fs/namespace.c
index dfed9a25f204..c7b8dbc88fe5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -787,9 +787,9 @@ put_again:
 		return;
 	br_write_lock(vfsmount_lock);
 #endif
-	if (unlikely(mnt->mnt.mnt_pinned)) {
-		mnt_add_count(mnt, mnt->mnt.mnt_pinned + 1);
-		mnt->mnt.mnt_pinned = 0;
+	if (unlikely(mnt->mnt_pinned)) {
+		mnt_add_count(mnt, mnt->mnt_pinned + 1);
+		mnt->mnt_pinned = 0;
 		br_write_unlock(vfsmount_lock);
 		acct_auto_close_mnt(&mnt->mnt);
 		goto put_again;
@@ -801,10 +801,11 @@ put_again:
 void mntput(struct vfsmount *mnt)
 {
 	if (mnt) {
+		struct mount *m = real_mount(mnt);
 		/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
-		if (unlikely(mnt->mnt_expiry_mark))
-			mnt->mnt_expiry_mark = 0;
-		mntput_no_expire(real_mount(mnt));
+		if (unlikely(m->mnt_expiry_mark))
+			m->mnt_expiry_mark = 0;
+		mntput_no_expire(m);
 	}
 }
 EXPORT_SYMBOL(mntput);
@@ -820,16 +821,17 @@ EXPORT_SYMBOL(mntget);
 void mnt_pin(struct vfsmount *mnt)
 {
 	br_write_lock(vfsmount_lock);
-	mnt->mnt_pinned++;
+	real_mount(mnt)->mnt_pinned++;
 	br_write_unlock(vfsmount_lock);
 }
 EXPORT_SYMBOL(mnt_pin);
 
-void mnt_unpin(struct vfsmount *mnt)
+void mnt_unpin(struct vfsmount *m)
 {
+	struct mount *mnt = real_mount(m);
 	br_write_lock(vfsmount_lock);
 	if (mnt->mnt_pinned) {
-		mnt_add_count(real_mount(mnt), 1);
+		mnt_add_count(mnt, 1);
 		mnt->mnt_pinned--;
 	}
 	br_write_unlock(vfsmount_lock);
@@ -1200,17 +1202,17 @@ void release_mounts(struct list_head *head)
 		list_del_init(&mnt->mnt_hash);
 		if (mnt_has_parent(mnt)) {
 			struct dentry *dentry;
-			struct vfsmount *m;
+			struct mount *m;
 
 			br_write_lock(vfsmount_lock);
 			dentry = mnt->mnt_mountpoint;
-			m = &mnt->mnt_parent->mnt;
+			m = mnt->mnt_parent;
 			mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 			mnt->mnt_parent = mnt;
 			m->mnt_ghosts--;
 			br_write_unlock(vfsmount_lock);
 			dput(dentry);
-			mntput(m);
+			mntput(&m->mnt);
 		}
 		mntput(&mnt->mnt);
 	}
@@ -1239,7 +1241,7 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 		__mnt_make_shortterm(p);
 		list_del_init(&p->mnt_child);
 		if (mnt_has_parent(p)) {
-			p->mnt_parent->mnt.mnt_ghosts++;
+			p->mnt_parent->mnt_ghosts++;
 			dentry_reset_mounted(p->mnt_mountpoint);
 		}
 		change_mnt_propagation(p, MS_PRIVATE);
@@ -1281,7 +1283,7 @@ static int do_umount(struct mount *mnt, int flags)
 		}
 		br_write_unlock(vfsmount_lock);
 
-		if (!xchg(&mnt->mnt.mnt_expiry_mark, 1))
+		if (!xchg(&mnt->mnt_expiry_mark, 1))
 			return -EAGAIN;
 	}
 
@@ -2106,7 +2108,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	 *   cleared by mntput())
 	 */
 	list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
-		if (!xchg(&mnt->mnt.mnt_expiry_mark, 1) ||
+		if (!xchg(&mnt->mnt_expiry_mark, 1) ||
 			propagate_mount_busy(mnt, 1))
 			continue;
 		list_move(&mnt->mnt_expire, &graveyard);
diff --git a/fs/pnode.c b/fs/pnode.c
index 001c8b0df379..a40abf20f35e 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -272,7 +272,7 @@ out:
  */
 static inline int do_refcount_check(struct mount *mnt, int count)
 {
-	int mycount = mnt_get_count(mnt) - mnt->mnt.mnt_ghosts;
+	int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
 	return (mycount > count);
 }
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b26dc40bfafc..080e3088ca81 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -58,9 +58,6 @@ struct vfsmount {
 #endif
 	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
-	int mnt_expiry_mark;		/* true if marked for expiry */
-	int mnt_pinned;
-	int mnt_ghosts;
 };
 
 struct file; /* forward dec */
-- 
cgit v1.2.3


From 1a4eeaf2a8c07404e2d1c3ff99b393fd4c207170 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Nov 2011 02:19:55 -0500
Subject: vfs: move mnt_list to struct mount

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  3 ++-
 fs/namespace.c        | 47 ++++++++++++++++++++++++-----------------------
 include/linux/mount.h |  1 -
 3 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 9217e03ba5e7..7060d2a6f802 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -19,7 +19,8 @@ struct mount {
 #endif
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
-	/* yet to be moved - up to mnt_list */
+	/* yet to be moved - up to mnt_devname */
+	struct list_head mnt_list;
 	struct list_head mnt_expire;	/* link in fs-specific expiry list */
 	struct list_head mnt_share;	/* circular list of shared mounts */
 	struct list_head mnt_slave_list;/* list of slave mounts */
diff --git a/fs/namespace.c b/fs/namespace.c
index bbe24defcac7..e15125356ac1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -202,7 +202,7 @@ static struct mount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&p->mnt_hash);
 		INIT_LIST_HEAD(&p->mnt_child);
 		INIT_LIST_HEAD(&p->mnt_mounts);
-		INIT_LIST_HEAD(&mnt->mnt_list);
+		INIT_LIST_HEAD(&p->mnt_list);
 		INIT_LIST_HEAD(&p->mnt_expire);
 		INIT_LIST_HEAD(&p->mnt_share);
 		INIT_LIST_HEAD(&p->mnt_slave_list);
@@ -618,8 +618,8 @@ static void commit_tree(struct mount *mnt)
 
 	BUG_ON(parent == mnt);
 
-	list_add_tail(&head, &mnt->mnt.mnt_list);
-	list_for_each_entry(m, &head, mnt.mnt_list) {
+	list_add_tail(&head, &mnt->mnt_list);
+	list_for_each_entry(m, &head, mnt_list) {
 		m->mnt_ns = n;
 		__mnt_make_longterm(m);
 	}
@@ -987,7 +987,8 @@ static void show_type(struct seq_file *m, struct super_block *sb)
 
 static int show_vfsmnt(struct seq_file *m, void *v)
 {
-	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
+	struct mount *r = list_entry(v, struct mount, mnt_list);
+	struct vfsmount *mnt = &r->mnt;
 	int err = 0;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
 
@@ -1024,8 +1025,8 @@ const struct seq_operations mounts_op = {
 static int show_mountinfo(struct seq_file *m, void *v)
 {
 	struct proc_mounts *p = m->private;
-	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
-	struct mount *r = real_mount(mnt);
+	struct mount *r = list_entry(v, struct mount, mnt_list);
+	struct vfsmount *mnt = &r->mnt;
 	struct super_block *sb = mnt->mnt_sb;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
 	struct path root = p->root;
@@ -1092,7 +1093,8 @@ const struct seq_operations mountinfo_op = {
 
 static int show_vfsstat(struct seq_file *m, void *v)
 {
-	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
+	struct mount *r = list_entry(v, struct mount, mnt_list);
+	struct vfsmount *mnt = &r->mnt;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
 	int err = 0;
 
@@ -1235,7 +1237,7 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 
 	list_for_each_entry(p, &tmp_list, mnt_hash) {
 		list_del_init(&p->mnt_expire);
-		list_del_init(&p->mnt.mnt_list);
+		list_del_init(&p->mnt_list);
 		__touch_mnt_namespace(p->mnt_ns);
 		p->mnt_ns = NULL;
 		__mnt_make_shortterm(p);
@@ -1331,7 +1333,7 @@ static int do_umount(struct mount *mnt, int flags)
 
 	retval = -EBUSY;
 	if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
-		if (!list_empty(&mnt->mnt.mnt_list))
+		if (!list_empty(&mnt->mnt_list))
 			umount_tree(mnt, 1, &umount_list);
 		retval = 0;
 	}
@@ -1451,7 +1453,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 			if (!q)
 				goto Enomem;
 			br_write_lock(vfsmount_lock);
-			list_add_tail(&q->mnt.mnt_list, &res->mnt.mnt_list);
+			list_add_tail(&q->mnt_list, &res->mnt_list);
 			attach_mnt(q, &path);
 			br_write_unlock(vfsmount_lock);
 		}
@@ -1492,12 +1494,12 @@ void drop_collected_mounts(struct vfsmount *mnt)
 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
 		   struct vfsmount *root)
 {
-	struct vfsmount *mnt;
+	struct mount *mnt;
 	int res = f(root, arg);
 	if (res)
 		return res;
-	list_for_each_entry(mnt, &root->mnt_list, mnt_list) {
-		res = f(mnt, arg);
+	list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
+		res = f(&mnt->mnt, arg);
 		if (res)
 			return res;
 	}
@@ -2415,7 +2417,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	}
 	new_ns->root = &new->mnt;
 	br_write_lock(vfsmount_lock);
-	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
+	list_add_tail(&new_ns->list, &new->mnt_list);
 	br_write_unlock(vfsmount_lock);
 
 	/*
@@ -2476,18 +2478,17 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  * create_mnt_ns - creates a private namespace and adds a root filesystem
  * @mnt: pointer to the new root filesystem mountpoint
  */
-static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
 {
-	struct mnt_namespace *new_ns;
-
-	new_ns = alloc_mnt_ns();
+	struct mnt_namespace *new_ns = alloc_mnt_ns();
 	if (!IS_ERR(new_ns)) {
-		real_mount(mnt)->mnt_ns = new_ns;
-		__mnt_make_longterm(real_mount(mnt));
-		new_ns->root = mnt;
-		list_add(&new_ns->list, &new_ns->root->mnt_list);
+		struct mount *mnt = real_mount(m);
+		mnt->mnt_ns = new_ns;
+		__mnt_make_longterm(mnt);
+		new_ns->root = m;
+		list_add(&new_ns->list, &mnt->mnt_list);
 	} else {
-		mntput(mnt);
+		mntput(m);
 	}
 	return new_ns;
 }
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 080e3088ca81..16ae3d46b30a 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -57,7 +57,6 @@ struct vfsmount {
 	struct hlist_head mnt_fsnotify_marks;
 #endif
 	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
-	struct list_head mnt_list;
 };
 
 struct file; /* forward dec */
-- 
cgit v1.2.3


From 52ba1621de1479ce7e52b6d167860462e483313c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Nov 2011 02:25:17 -0500
Subject: vfs: move mnt_devname

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  3 ++-
 fs/namespace.c        | 18 +++++++++---------
 include/linux/mount.h |  1 -
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 7060d2a6f802..c5fc3f7a9580 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -19,7 +19,8 @@ struct mount {
 #endif
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
-	/* yet to be moved - up to mnt_devname */
+	/* yet to be moved - fsnotify ones go here */
+	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
 	struct list_head mnt_expire;	/* link in fs-specific expiry list */
 	struct list_head mnt_share;	/* circular list of shared mounts */
diff --git a/fs/namespace.c b/fs/namespace.c
index e15125356ac1..b8a30928d0c1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -183,8 +183,8 @@ static struct mount *alloc_vfsmnt(const char *name)
 			goto out_free_cache;
 
 		if (name) {
-			mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
-			if (!mnt->mnt_devname)
+			p->mnt_devname = kstrdup(name, GFP_KERNEL);
+			if (!p->mnt_devname)
 				goto out_free_id;
 		}
 
@@ -215,7 +215,7 @@ static struct mount *alloc_vfsmnt(const char *name)
 
 #ifdef CONFIG_SMP
 out_free_devname:
-	kfree(p->mnt.mnt_devname);
+	kfree(p->mnt_devname);
 #endif
 out_free_id:
 	mnt_free_id(p);
@@ -451,7 +451,7 @@ static void __mnt_unmake_readonly(struct mount *mnt)
 
 static void free_vfsmnt(struct mount *mnt)
 {
-	kfree(mnt->mnt.mnt_devname);
+	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
 #ifdef CONFIG_SMP
 	free_percpu(mnt->mnt_pcp);
@@ -692,7 +692,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 					int flag)
 {
 	struct super_block *sb = old->mnt.mnt_sb;
-	struct mount *mnt = alloc_vfsmnt(old->mnt.mnt_devname);
+	struct mount *mnt = alloc_vfsmnt(old->mnt_devname);
 
 	if (mnt) {
 		if (flag & (CL_SLAVE | CL_PRIVATE))
@@ -997,7 +997,7 @@ static int show_vfsmnt(struct seq_file *m, void *v)
 		if (err)
 			goto out;
 	} else {
-		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
 	}
 	seq_putc(m, ' ');
 	seq_path(m, &mnt_path, " \t\n\\");
@@ -1070,7 +1070,7 @@ static int show_mountinfo(struct seq_file *m, void *v)
 	if (sb->s_op->show_devname)
 		err = sb->s_op->show_devname(m, mnt);
 	else
-		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
 	if (err)
 		goto out;
 	seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
@@ -1103,9 +1103,9 @@ static int show_vfsstat(struct seq_file *m, void *v)
 		seq_puts(m, "device ");
 		err = mnt->mnt_sb->s_op->show_devname(m, mnt);
 	} else {
-		if (mnt->mnt_devname) {
+		if (r->mnt_devname) {
 			seq_puts(m, "device ");
-			mangle(m, mnt->mnt_devname);
+			mangle(m, r->mnt_devname);
 		} else
 			seq_puts(m, "no device");
 	}
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 16ae3d46b30a..f18dd1bfcbda 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -56,7 +56,6 @@ struct vfsmount {
 	__u32 mnt_fsnotify_mask;
 	struct hlist_head mnt_fsnotify_marks;
 #endif
-	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
 };
 
 struct file; /* forward dec */
-- 
cgit v1.2.3


From c63181e6b6df89176b3984c6977bb5ec03d0df23 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Nov 2011 02:35:16 -0500
Subject: vfs: move fsnotify junk to struct mount

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h                         |  5 ++++-
 fs/namespace.c                     | 45 +++++++++++++++++++-------------------
 fs/notify/fanotify/fanotify_user.c |  6 +++--
 fs/notify/fsnotify.c               |  9 ++++----
 fs/notify/vfsmount_mark.c          | 19 ++++++++++------
 include/linux/mount.h              |  5 -----
 6 files changed, 47 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index c5fc3f7a9580..e094c863c8af 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -19,7 +19,6 @@ struct mount {
 #endif
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
-	/* yet to be moved - fsnotify ones go here */
 	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
 	struct list_head mnt_expire;	/* link in fs-specific expiry list */
@@ -28,6 +27,10 @@ struct mount {
 	struct list_head mnt_slave;	/* slave list entry */
 	struct mount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
+#ifdef CONFIG_FSNOTIFY
+	struct hlist_head mnt_fsnotify_marks;
+	__u32 mnt_fsnotify_mask;
+#endif
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
diff --git a/fs/namespace.c b/fs/namespace.c
index b8a30928d0c1..124a12555fe4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -173,54 +173,53 @@ unsigned int mnt_get_count(struct mount *mnt)
 
 static struct mount *alloc_vfsmnt(const char *name)
 {
-	struct mount *p = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
-	if (p) {
-		struct vfsmount *mnt = &p->mnt;
+	struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
+	if (mnt) {
 		int err;
 
-		err = mnt_alloc_id(p);
+		err = mnt_alloc_id(mnt);
 		if (err)
 			goto out_free_cache;
 
 		if (name) {
-			p->mnt_devname = kstrdup(name, GFP_KERNEL);
-			if (!p->mnt_devname)
+			mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
+			if (!mnt->mnt_devname)
 				goto out_free_id;
 		}
 
 #ifdef CONFIG_SMP
-		p->mnt_pcp = alloc_percpu(struct mnt_pcp);
-		if (!p->mnt_pcp)
+		mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
+		if (!mnt->mnt_pcp)
 			goto out_free_devname;
 
-		this_cpu_add(p->mnt_pcp->mnt_count, 1);
+		this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
 #else
-		p->mnt_count = 1;
-		p->mnt_writers = 0;
+		mnt->mnt_count = 1;
+		mnt->mnt_writers = 0;
 #endif
 
-		INIT_LIST_HEAD(&p->mnt_hash);
-		INIT_LIST_HEAD(&p->mnt_child);
-		INIT_LIST_HEAD(&p->mnt_mounts);
-		INIT_LIST_HEAD(&p->mnt_list);
-		INIT_LIST_HEAD(&p->mnt_expire);
-		INIT_LIST_HEAD(&p->mnt_share);
-		INIT_LIST_HEAD(&p->mnt_slave_list);
-		INIT_LIST_HEAD(&p->mnt_slave);
+		INIT_LIST_HEAD(&mnt->mnt_hash);
+		INIT_LIST_HEAD(&mnt->mnt_child);
+		INIT_LIST_HEAD(&mnt->mnt_mounts);
+		INIT_LIST_HEAD(&mnt->mnt_list);
+		INIT_LIST_HEAD(&mnt->mnt_expire);
+		INIT_LIST_HEAD(&mnt->mnt_share);
+		INIT_LIST_HEAD(&mnt->mnt_slave_list);
+		INIT_LIST_HEAD(&mnt->mnt_slave);
 #ifdef CONFIG_FSNOTIFY
 		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
 #endif
 	}
-	return p;
+	return mnt;
 
 #ifdef CONFIG_SMP
 out_free_devname:
-	kfree(p->mnt_devname);
+	kfree(mnt->mnt_devname);
 #endif
 out_free_id:
-	mnt_free_id(p);
+	mnt_free_id(mnt);
 out_free_cache:
-	kmem_cache_free(mnt_cache, p);
+	kmem_cache_free(mnt_cache, mnt);
 	return NULL;
 }
 
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9fde1c00a296..3568c8a8b138 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,8 @@
 
 #include <asm/ioctls.h>
 
+#include "../../mount.h"
+
 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
 #define FANOTIFY_DEFAULT_MAX_MARKS	8192
 #define FANOTIFY_DEFAULT_MAX_LISTENERS	128
@@ -546,7 +548,7 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
 	fsnotify_put_mark(fsn_mark);
-	if (removed & mnt->mnt_fsnotify_mask)
+	if (removed & real_mount(mnt)->mnt_fsnotify_mask)
 		fsnotify_recalc_vfsmount_mask(mnt);
 
 	return 0;
@@ -623,7 +625,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
 
-	if (added & ~mnt->mnt_fsnotify_mask)
+	if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
 		fsnotify_recalc_vfsmount_mask(mnt);
 err:
 	fsnotify_put_mark(fsn_mark);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 79b47cbb5cd8..ccb14d3fc0de 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -26,6 +26,7 @@
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
+#include "../mount.h"
 
 /*
  * Clear all of the marks on an inode when it is being evicted from core
@@ -205,13 +206,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
 	struct fsnotify_group *inode_group, *vfsmount_group;
 	struct fsnotify_event *event = NULL;
-	struct vfsmount *mnt;
+	struct mount *mnt;
 	int idx, ret = 0;
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
 	if (data_is == FSNOTIFY_EVENT_PATH)
-		mnt = ((struct path *)data)->mnt;
+		mnt = real_mount(((struct path *)data)->mnt);
 	else
 		mnt = NULL;
 
@@ -262,11 +263,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 			/* we didn't use the vfsmount_mark */
 			vfsmount_group = NULL;
 		} else if (vfsmount_group > inode_group) {
-			ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
+			ret = send_to_group(to_tell, &mnt->mnt, NULL, vfsmount_mark, mask, data,
 					    data_is, cookie, file_name, &event);
 			inode_group = NULL;
 		} else {
-			ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
+			ret = send_to_group(to_tell, &mnt->mnt, inode_mark, vfsmount_mark,
 					    mask, data, data_is, cookie, file_name,
 					    &event);
 		}
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 778fe6cae3b0..b7b4b0e8554f 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -28,15 +28,17 @@
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
+#include "../mount.h"
 
 void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 {
 	struct fsnotify_mark *mark, *lmark;
 	struct hlist_node *pos, *n;
+	struct mount *m = real_mount(mnt);
 	LIST_HEAD(free_list);
 
 	spin_lock(&mnt->mnt_root->d_lock);
-	hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) {
+	hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) {
 		list_add(&mark->m.free_m_list, &free_list);
 		hlist_del_init_rcu(&mark->m.m_list);
 		fsnotify_get_mark(mark);
@@ -59,15 +61,16 @@ void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
  */
 static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
 {
+	struct mount *m = real_mount(mnt);
 	struct fsnotify_mark *mark;
 	struct hlist_node *pos;
 	__u32 new_mask = 0;
 
 	assert_spin_locked(&mnt->mnt_root->d_lock);
 
-	hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list)
+	hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list)
 		new_mask |= mark->mask;
-	mnt->mnt_fsnotify_mask = new_mask;
+	m->mnt_fsnotify_mask = new_mask;
 }
 
 /*
@@ -101,12 +104,13 @@ void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
 static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
 								struct vfsmount *mnt)
 {
+	struct mount *m = real_mount(mnt);
 	struct fsnotify_mark *mark;
 	struct hlist_node *pos;
 
 	assert_spin_locked(&mnt->mnt_root->d_lock);
 
-	hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) {
+	hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) {
 		if (mark->group == group) {
 			fsnotify_get_mark(mark);
 			return mark;
@@ -140,6 +144,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 			       struct fsnotify_group *group, struct vfsmount *mnt,
 			       int allow_dups)
 {
+	struct mount *m = real_mount(mnt);
 	struct fsnotify_mark *lmark;
 	struct hlist_node *node, *last = NULL;
 	int ret = 0;
@@ -154,13 +159,13 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 	mark->m.mnt = mnt;
 
 	/* is mark the first mark? */
-	if (hlist_empty(&mnt->mnt_fsnotify_marks)) {
-		hlist_add_head_rcu(&mark->m.m_list, &mnt->mnt_fsnotify_marks);
+	if (hlist_empty(&m->mnt_fsnotify_marks)) {
+		hlist_add_head_rcu(&mark->m.m_list, &m->mnt_fsnotify_marks);
 		goto out;
 	}
 
 	/* should mark be in the middle of the current list? */
-	hlist_for_each_entry(lmark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
+	hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) {
 		last = node;
 
 		if ((lmark->group == group) && !allow_dups) {
diff --git a/include/linux/mount.h b/include/linux/mount.h
index f18dd1bfcbda..d7029f4a191a 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -51,11 +51,6 @@ struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
 	int mnt_flags;
-	/* 4 bytes hole on 64bits arches without fsnotify */
-#ifdef CONFIG_FSNOTIFY
-	__u32 mnt_fsnotify_mask;
-	struct hlist_head mnt_fsnotify_marks;
-#endif
 };
 
 struct file; /* forward dec */
-- 
cgit v1.2.3


From 0226f4923f6c9b40cfa1c1c1b19a6ac6b3924ead Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 6 Dec 2011 12:21:54 -0500
Subject: vfs: take /proc/*/mounts and friends to fs/proc_namespace.c

rationale: that stuff is far tighter bound to fs/namespace.c than to
the guts of procfs proper.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/Makefile                   |   2 +
 fs/mount.h                    |  24 +++
 fs/namespace.c                | 218 +---------------------------
 fs/proc/base.c                | 114 ---------------
 fs/proc_namespace.c           | 331 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mnt_namespace.h |  30 +---
 6 files changed, 368 insertions(+), 351 deletions(-)
 create mode 100644 fs/proc_namespace.c

(limited to 'include/linux')

diff --git a/fs/Makefile b/fs/Makefile
index d2c3353d5477..310cfc4e69d3 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,8 @@ else
 obj-y +=	no-block.o
 endif
 
+obj-$(CONFIG_PROC_FS) += proc_namespace.o
+
 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
 obj-y				+= notify/
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
diff --git a/fs/mount.h b/fs/mount.h
index e094c863c8af..c6e99e03350a 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -1,4 +1,14 @@
 #include <linux/mount.h>
+#include <linux/seq_file.h>
+#include <linux/poll.h>
+
+struct mnt_namespace {
+	atomic_t		count;
+	struct vfsmount *	root;
+	struct list_head	list;
+	wait_queue_head_t poll;
+	int event;
+};
 
 struct mnt_pcp {
 	int mnt_count;
@@ -49,3 +59,17 @@ static inline int mnt_has_parent(struct mount *mnt)
 }
 
 extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
+
+static inline void get_mnt_ns(struct mnt_namespace *ns)
+{
+	atomic_inc(&ns->count);
+}
+
+struct proc_mounts {
+	struct seq_file m; /* must be the first element */
+	struct mnt_namespace *ns;
+	struct path root;
+	int (*show)(struct seq_file *, struct vfsmount *);
+};
+
+extern const struct seq_operations mounts_op;
diff --git a/fs/namespace.c b/fs/namespace.c
index cd6389387d1f..21a8261256dd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -898,10 +898,10 @@ void replace_mount_options(struct super_block *sb, char *options)
 EXPORT_SYMBOL(replace_mount_options);
 
 #ifdef CONFIG_PROC_FS
-/* iterator */
+/* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
-	struct proc_mounts *p = m->private;
+	struct proc_mounts *p = container_of(m, struct proc_mounts, m);
 
 	down_read(&namespace_sem);
 	return seq_list_start(&p->ns->list, *pos);
@@ -909,7 +909,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct proc_mounts *p = m->private;
+	struct proc_mounts *p = container_of(m, struct proc_mounts, m);
 
 	return seq_list_next(v, &p->ns->list, pos);
 }
@@ -919,222 +919,18 @@ static void m_stop(struct seq_file *m, void *v)
 	up_read(&namespace_sem);
 }
 
-int mnt_had_events(struct proc_mounts *p)
-{
-	struct mnt_namespace *ns = p->ns;
-	int res = 0;
-
-	br_read_lock(vfsmount_lock);
-	if (p->m.poll_event != ns->event) {
-		p->m.poll_event = ns->event;
-		res = 1;
-	}
-	br_read_unlock(vfsmount_lock);
-
-	return res;
-}
-
-struct proc_fs_info {
-	int flag;
-	const char *str;
-};
-
-static int show_sb_opts(struct seq_file *m, struct super_block *sb)
-{
-	static const struct proc_fs_info fs_info[] = {
-		{ MS_SYNCHRONOUS, ",sync" },
-		{ MS_DIRSYNC, ",dirsync" },
-		{ MS_MANDLOCK, ",mand" },
-		{ 0, NULL }
-	};
-	const struct proc_fs_info *fs_infop;
-
-	for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
-		if (sb->s_flags & fs_infop->flag)
-			seq_puts(m, fs_infop->str);
-	}
-
-	return security_sb_show_options(m, sb);
-}
-
-static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
-{
-	static const struct proc_fs_info mnt_info[] = {
-		{ MNT_NOSUID, ",nosuid" },
-		{ MNT_NODEV, ",nodev" },
-		{ MNT_NOEXEC, ",noexec" },
-		{ MNT_NOATIME, ",noatime" },
-		{ MNT_NODIRATIME, ",nodiratime" },
-		{ MNT_RELATIME, ",relatime" },
-		{ 0, NULL }
-	};
-	const struct proc_fs_info *fs_infop;
-
-	for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
-		if (mnt->mnt_flags & fs_infop->flag)
-			seq_puts(m, fs_infop->str);
-	}
-}
-
-static void show_type(struct seq_file *m, struct super_block *sb)
-{
-	mangle(m, sb->s_type->name);
-	if (sb->s_subtype && sb->s_subtype[0]) {
-		seq_putc(m, '.');
-		mangle(m, sb->s_subtype);
-	}
-}
-
-static int show_vfsmnt(struct seq_file *m, void *v)
+static int m_show(struct seq_file *m, void *v)
 {
+	struct proc_mounts *p = container_of(m, struct proc_mounts, m);
 	struct mount *r = list_entry(v, struct mount, mnt_list);
-	struct vfsmount *mnt = &r->mnt;
-	int err = 0;
-	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-
-	if (mnt->mnt_sb->s_op->show_devname) {
-		err = mnt->mnt_sb->s_op->show_devname(m, mnt);
-		if (err)
-			goto out;
-	} else {
-		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
-	}
-	seq_putc(m, ' ');
-	seq_path(m, &mnt_path, " \t\n\\");
-	seq_putc(m, ' ');
-	show_type(m, mnt->mnt_sb);
-	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
-	err = show_sb_opts(m, mnt->mnt_sb);
-	if (err)
-		goto out;
-	show_mnt_opts(m, mnt);
-	if (mnt->mnt_sb->s_op->show_options)
-		err = mnt->mnt_sb->s_op->show_options(m, mnt);
-	seq_puts(m, " 0 0\n");
-out:
-	return err;
+	return p->show(m, &r->mnt);
 }
 
 const struct seq_operations mounts_op = {
 	.start	= m_start,
 	.next	= m_next,
 	.stop	= m_stop,
-	.show	= show_vfsmnt
-};
-
-static int show_mountinfo(struct seq_file *m, void *v)
-{
-	struct proc_mounts *p = m->private;
-	struct mount *r = list_entry(v, struct mount, mnt_list);
-	struct vfsmount *mnt = &r->mnt;
-	struct super_block *sb = mnt->mnt_sb;
-	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-	struct path root = p->root;
-	int err = 0;
-
-	seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
-		   MAJOR(sb->s_dev), MINOR(sb->s_dev));
-	if (sb->s_op->show_path)
-		err = sb->s_op->show_path(m, mnt);
-	else
-		seq_dentry(m, mnt->mnt_root, " \t\n\\");
-	if (err)
-		goto out;
-	seq_putc(m, ' ');
-
-	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
-	err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
-	if (err)
-		goto out;
-
-	seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
-	show_mnt_opts(m, mnt);
-
-	/* Tagged fields ("foo:X" or "bar") */
-	if (IS_MNT_SHARED(r))
-		seq_printf(m, " shared:%i", r->mnt_group_id);
-	if (IS_MNT_SLAVE(r)) {
-		int master = r->mnt_master->mnt_group_id;
-		int dom = get_dominating_id(r, &p->root);
-		seq_printf(m, " master:%i", master);
-		if (dom && dom != master)
-			seq_printf(m, " propagate_from:%i", dom);
-	}
-	if (IS_MNT_UNBINDABLE(r))
-		seq_puts(m, " unbindable");
-
-	/* Filesystem specific data */
-	seq_puts(m, " - ");
-	show_type(m, sb);
-	seq_putc(m, ' ');
-	if (sb->s_op->show_devname)
-		err = sb->s_op->show_devname(m, mnt);
-	else
-		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
-	if (err)
-		goto out;
-	seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
-	err = show_sb_opts(m, sb);
-	if (err)
-		goto out;
-	if (sb->s_op->show_options)
-		err = sb->s_op->show_options(m, mnt);
-	seq_putc(m, '\n');
-out:
-	return err;
-}
-
-const struct seq_operations mountinfo_op = {
-	.start	= m_start,
-	.next	= m_next,
-	.stop	= m_stop,
-	.show	= show_mountinfo,
-};
-
-static int show_vfsstat(struct seq_file *m, void *v)
-{
-	struct mount *r = list_entry(v, struct mount, mnt_list);
-	struct vfsmount *mnt = &r->mnt;
-	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-	int err = 0;
-
-	/* device */
-	if (mnt->mnt_sb->s_op->show_devname) {
-		seq_puts(m, "device ");
-		err = mnt->mnt_sb->s_op->show_devname(m, mnt);
-	} else {
-		if (r->mnt_devname) {
-			seq_puts(m, "device ");
-			mangle(m, r->mnt_devname);
-		} else
-			seq_puts(m, "no device");
-	}
-
-	/* mount point */
-	seq_puts(m, " mounted on ");
-	seq_path(m, &mnt_path, " \t\n\\");
-	seq_putc(m, ' ');
-
-	/* file system type */
-	seq_puts(m, "with fstype ");
-	show_type(m, mnt->mnt_sb);
-
-	/* optional statistics */
-	if (mnt->mnt_sb->s_op->show_stats) {
-		seq_putc(m, ' ');
-		if (!err)
-			err = mnt->mnt_sb->s_op->show_stats(m, mnt);
-	}
-
-	seq_putc(m, '\n');
-	return err;
-}
-
-const struct seq_operations mountstats_op = {
-	.start	= m_start,
-	.next	= m_next,
-	.stop	= m_stop,
-	.show	= show_vfsstat,
+	.show	= m_show,
 };
 #endif  /* CONFIG_PROC_FS */
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 851ba3dcdc29..07446b55b7cc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -631,120 +631,6 @@ static const struct inode_operations proc_def_inode_operations = {
 	.setattr	= proc_setattr,
 };
 
-static int mounts_open_common(struct inode *inode, struct file *file,
-			      const struct seq_operations *op)
-{
-	struct task_struct *task = get_proc_task(inode);
-	struct nsproxy *nsp;
-	struct mnt_namespace *ns = NULL;
-	struct path root;
-	struct proc_mounts *p;
-	int ret = -EINVAL;
-
-	if (task) {
-		rcu_read_lock();
-		nsp = task_nsproxy(task);
-		if (nsp) {
-			ns = nsp->mnt_ns;
-			if (ns)
-				get_mnt_ns(ns);
-		}
-		rcu_read_unlock();
-		if (ns && get_task_root(task, &root) == 0)
-			ret = 0;
-		put_task_struct(task);
-	}
-
-	if (!ns)
-		goto err;
-	if (ret)
-		goto err_put_ns;
-
-	ret = -ENOMEM;
-	p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
-	if (!p)
-		goto err_put_path;
-
-	file->private_data = &p->m;
-	ret = seq_open(file, op);
-	if (ret)
-		goto err_free;
-
-	p->m.private = p;
-	p->ns = ns;
-	p->root = root;
-	p->m.poll_event = ns->event;
-
-	return 0;
-
- err_free:
-	kfree(p);
- err_put_path:
-	path_put(&root);
- err_put_ns:
-	put_mnt_ns(ns);
- err:
-	return ret;
-}
-
-static int mounts_release(struct inode *inode, struct file *file)
-{
-	struct proc_mounts *p = file->private_data;
-	path_put(&p->root);
-	put_mnt_ns(p->ns);
-	return seq_release(inode, file);
-}
-
-static unsigned mounts_poll(struct file *file, poll_table *wait)
-{
-	struct proc_mounts *p = file->private_data;
-	unsigned res = POLLIN | POLLRDNORM;
-
-	poll_wait(file, &p->ns->poll, wait);
-	if (mnt_had_events(p))
-		res |= POLLERR | POLLPRI;
-
-	return res;
-}
-
-static int mounts_open(struct inode *inode, struct file *file)
-{
-	return mounts_open_common(inode, file, &mounts_op);
-}
-
-static const struct file_operations proc_mounts_operations = {
-	.open		= mounts_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= mounts_release,
-	.poll		= mounts_poll,
-};
-
-static int mountinfo_open(struct inode *inode, struct file *file)
-{
-	return mounts_open_common(inode, file, &mountinfo_op);
-}
-
-static const struct file_operations proc_mountinfo_operations = {
-	.open		= mountinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= mounts_release,
-	.poll		= mounts_poll,
-};
-
-static int mountstats_open(struct inode *inode, struct file *file)
-{
-	return mounts_open_common(inode, file, &mountstats_op);
-}
-
-static const struct file_operations proc_mountstats_operations = {
-	.open		= mountstats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= mounts_release,
-};
-
 #define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */
 
 static ssize_t proc_info_read(struct file * file, char __user * buf,
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
new file mode 100644
index 000000000000..9dcd9543ca12
--- /dev/null
+++ b/fs/proc_namespace.c
@@ -0,0 +1,331 @@
+/*
+ * fs/proc_namespace.c - handling of /proc/<pid>/{mounts,mountinfo,mountstats}
+ *
+ * In fact, that's a piece of procfs; it's *almost* isolated from
+ * the rest of fs/proc, but has rather close relationships with
+ * fs/namespace.c, thus here instead of fs/proc
+ *
+ */
+#include <linux/mnt_namespace.h>
+#include <linux/nsproxy.h>
+#include <linux/security.h>
+#include <linux/fs_struct.h>
+#include "proc/internal.h" /* only for get_proc_task() in ->open() */
+
+#include "pnode.h"
+#include "internal.h"
+
+static unsigned mounts_poll(struct file *file, poll_table *wait)
+{
+	struct proc_mounts *p = file->private_data;
+	struct mnt_namespace *ns = p->ns;
+	unsigned res = POLLIN | POLLRDNORM;
+
+	poll_wait(file, &p->ns->poll, wait);
+
+	br_read_lock(vfsmount_lock);
+	if (p->m.poll_event != ns->event) {
+		p->m.poll_event = ns->event;
+		res |= POLLERR | POLLPRI;
+	}
+	br_read_unlock(vfsmount_lock);
+
+	return res;
+}
+
+struct proc_fs_info {
+	int flag;
+	const char *str;
+};
+
+static int show_sb_opts(struct seq_file *m, struct super_block *sb)
+{
+	static const struct proc_fs_info fs_info[] = {
+		{ MS_SYNCHRONOUS, ",sync" },
+		{ MS_DIRSYNC, ",dirsync" },
+		{ MS_MANDLOCK, ",mand" },
+		{ 0, NULL }
+	};
+	const struct proc_fs_info *fs_infop;
+
+	for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
+		if (sb->s_flags & fs_infop->flag)
+			seq_puts(m, fs_infop->str);
+	}
+
+	return security_sb_show_options(m, sb);
+}
+
+static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
+{
+	static const struct proc_fs_info mnt_info[] = {
+		{ MNT_NOSUID, ",nosuid" },
+		{ MNT_NODEV, ",nodev" },
+		{ MNT_NOEXEC, ",noexec" },
+		{ MNT_NOATIME, ",noatime" },
+		{ MNT_NODIRATIME, ",nodiratime" },
+		{ MNT_RELATIME, ",relatime" },
+		{ 0, NULL }
+	};
+	const struct proc_fs_info *fs_infop;
+
+	for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
+		if (mnt->mnt_flags & fs_infop->flag)
+			seq_puts(m, fs_infop->str);
+	}
+}
+
+static inline void mangle(struct seq_file *m, const char *s)
+{
+	seq_escape(m, s, " \t\n\\");
+}
+
+static void show_type(struct seq_file *m, struct super_block *sb)
+{
+	mangle(m, sb->s_type->name);
+	if (sb->s_subtype && sb->s_subtype[0]) {
+		seq_putc(m, '.');
+		mangle(m, sb->s_subtype);
+	}
+}
+
+static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct mount *r = real_mount(mnt);
+	int err = 0;
+	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+
+	if (mnt->mnt_sb->s_op->show_devname) {
+		err = mnt->mnt_sb->s_op->show_devname(m, mnt);
+		if (err)
+			goto out;
+	} else {
+		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
+	}
+	seq_putc(m, ' ');
+	seq_path(m, &mnt_path, " \t\n\\");
+	seq_putc(m, ' ');
+	show_type(m, mnt->mnt_sb);
+	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
+	err = show_sb_opts(m, mnt->mnt_sb);
+	if (err)
+		goto out;
+	show_mnt_opts(m, mnt);
+	if (mnt->mnt_sb->s_op->show_options)
+		err = mnt->mnt_sb->s_op->show_options(m, mnt);
+	seq_puts(m, " 0 0\n");
+out:
+	return err;
+}
+
+static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct proc_mounts *p = m->private;
+	struct mount *r = real_mount(mnt);
+	struct super_block *sb = mnt->mnt_sb;
+	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+	struct path root = p->root;
+	int err = 0;
+
+	seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
+		   MAJOR(sb->s_dev), MINOR(sb->s_dev));
+	if (sb->s_op->show_path)
+		err = sb->s_op->show_path(m, mnt);
+	else
+		seq_dentry(m, mnt->mnt_root, " \t\n\\");
+	if (err)
+		goto out;
+	seq_putc(m, ' ');
+
+	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+	err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
+	if (err)
+		goto out;
+
+	seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
+	show_mnt_opts(m, mnt);
+
+	/* Tagged fields ("foo:X" or "bar") */
+	if (IS_MNT_SHARED(r))
+		seq_printf(m, " shared:%i", r->mnt_group_id);
+	if (IS_MNT_SLAVE(r)) {
+		int master = r->mnt_master->mnt_group_id;
+		int dom = get_dominating_id(r, &p->root);
+		seq_printf(m, " master:%i", master);
+		if (dom && dom != master)
+			seq_printf(m, " propagate_from:%i", dom);
+	}
+	if (IS_MNT_UNBINDABLE(r))
+		seq_puts(m, " unbindable");
+
+	/* Filesystem specific data */
+	seq_puts(m, " - ");
+	show_type(m, sb);
+	seq_putc(m, ' ');
+	if (sb->s_op->show_devname)
+		err = sb->s_op->show_devname(m, mnt);
+	else
+		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
+	if (err)
+		goto out;
+	seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
+	err = show_sb_opts(m, sb);
+	if (err)
+		goto out;
+	if (sb->s_op->show_options)
+		err = sb->s_op->show_options(m, mnt);
+	seq_putc(m, '\n');
+out:
+	return err;
+}
+
+static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct mount *r = real_mount(mnt);
+	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+	int err = 0;
+
+	/* device */
+	if (mnt->mnt_sb->s_op->show_devname) {
+		seq_puts(m, "device ");
+		err = mnt->mnt_sb->s_op->show_devname(m, mnt);
+	} else {
+		if (r->mnt_devname) {
+			seq_puts(m, "device ");
+			mangle(m, r->mnt_devname);
+		} else
+			seq_puts(m, "no device");
+	}
+
+	/* mount point */
+	seq_puts(m, " mounted on ");
+	seq_path(m, &mnt_path, " \t\n\\");
+	seq_putc(m, ' ');
+
+	/* file system type */
+	seq_puts(m, "with fstype ");
+	show_type(m, mnt->mnt_sb);
+
+	/* optional statistics */
+	if (mnt->mnt_sb->s_op->show_stats) {
+		seq_putc(m, ' ');
+		if (!err)
+			err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+	}
+
+	seq_putc(m, '\n');
+	return err;
+}
+
+static int mounts_open_common(struct inode *inode, struct file *file,
+			      int (*show)(struct seq_file *, struct vfsmount *))
+{
+	struct task_struct *task = get_proc_task(inode);
+	struct nsproxy *nsp;
+	struct mnt_namespace *ns = NULL;
+	struct path root;
+	struct proc_mounts *p;
+	int ret = -EINVAL;
+
+	if (!task)
+		goto err;
+
+	rcu_read_lock();
+	nsp = task_nsproxy(task);
+	if (!nsp) {
+		rcu_read_unlock();
+		put_task_struct(task);
+		goto err;
+	}
+	ns = nsp->mnt_ns;
+	if (!ns) {
+		rcu_read_unlock();
+		put_task_struct(task);
+		goto err;
+	}
+	get_mnt_ns(ns);
+	rcu_read_unlock();
+	task_lock(task);
+	if (!task->fs) {
+		task_unlock(task);
+		put_task_struct(task);
+		ret = -ENOENT;
+		goto err_put_ns;
+	}
+	get_fs_root(task->fs, &root);
+	task_unlock(task);
+	put_task_struct(task);
+
+	ret = -ENOMEM;
+	p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
+	if (!p)
+		goto err_put_path;
+
+	file->private_data = &p->m;
+	ret = seq_open(file, &mounts_op);
+	if (ret)
+		goto err_free;
+
+	p->m.private = p;
+	p->ns = ns;
+	p->root = root;
+	p->m.poll_event = ns->event;
+	p->show = show;
+
+	return 0;
+
+ err_free:
+	kfree(p);
+ err_put_path:
+	path_put(&root);
+ err_put_ns:
+	put_mnt_ns(ns);
+ err:
+	return ret;
+}
+
+static int mounts_release(struct inode *inode, struct file *file)
+{
+	struct proc_mounts *p = file->private_data;
+	path_put(&p->root);
+	put_mnt_ns(p->ns);
+	return seq_release(inode, file);
+}
+
+static int mounts_open(struct inode *inode, struct file *file)
+{
+	return mounts_open_common(inode, file, show_vfsmnt);
+}
+
+static int mountinfo_open(struct inode *inode, struct file *file)
+{
+	return mounts_open_common(inode, file, show_mountinfo);
+}
+
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+	return mounts_open_common(inode, file, show_vfsstat);
+}
+
+const struct file_operations proc_mounts_operations = {
+	.open		= mounts_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= mounts_release,
+	.poll		= mounts_poll,
+};
+
+const struct file_operations proc_mountinfo_operations = {
+	.open		= mountinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= mounts_release,
+	.poll		= mounts_poll,
+};
+
+const struct file_operations proc_mountstats_operations = {
+	.open		= mountstats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= mounts_release,
+};
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index e87ec01aac9d..5a8e3903d770 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -2,38 +2,16 @@
 #define _NAMESPACE_H_
 #ifdef __KERNEL__
 
-#include <linux/path.h>
-#include <linux/seq_file.h>
-#include <linux/wait.h>
-
-struct mnt_namespace {
-	atomic_t		count;
-	struct vfsmount *	root;
-	struct list_head	list;
-	wait_queue_head_t poll;
-	int event;
-};
-
-struct proc_mounts {
-	struct seq_file m; /* must be the first element */
-	struct mnt_namespace *ns;
-	struct path root;
-};
-
+struct mnt_namespace;
 struct fs_struct;
 
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-static inline void get_mnt_ns(struct mnt_namespace *ns)
-{
-	atomic_inc(&ns->count);
-}
 
-extern const struct seq_operations mounts_op;
-extern const struct seq_operations mountinfo_op;
-extern const struct seq_operations mountstats_op;
-extern int mnt_had_events(struct proc_mounts *);
+extern const struct file_operations proc_mounts_operations;
+extern const struct file_operations proc_mountinfo_operations;
+extern const struct file_operations proc_mountstats_operations;
 
 #endif
 #endif
-- 
cgit v1.2.3


From 3bb3dbbd56ea39e5537db8f8041ea95d28f16a7f Mon Sep 17 00:00:00 2001
From: Donggeun Kim <dg77.kim@samsung.com>
Date: Tue, 27 Dec 2011 18:47:48 +0900
Subject: power_supply: Add initial Charger-Manager driver

Because battery health monitoring should be done even when suspended,
it needs to wake up and suspend periodically. Thus, userspace battery
monitoring may incur too much overhead; every device and task is woken
up periodically. Charger Manager uses suspend-again to provide
in-suspend monitoring.

This patch allows to monitor battery health in-suspend state.

Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 Documentation/power/charger-manager.txt | 149 ++++++
 drivers/power/Kconfig                   |  10 +
 drivers/power/Makefile                  |   1 +
 drivers/power/charger-manager.c         | 779 ++++++++++++++++++++++++++++++++
 include/linux/power/charger-manager.h   | 130 ++++++
 5 files changed, 1069 insertions(+)
 create mode 100644 Documentation/power/charger-manager.txt
 create mode 100644 drivers/power/charger-manager.c
 create mode 100644 include/linux/power/charger-manager.h

(limited to 'include/linux')

diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt
new file mode 100644
index 000000000000..081489f3db25
--- /dev/null
+++ b/Documentation/power/charger-manager.txt
@@ -0,0 +1,149 @@
+Charger Manager
+	(C) 2011 MyungJoo Ham <myungjoo.ham@samsung.com>, GPL
+
+Charger Manager provides in-kernel battery charger management that
+requires temperature monitoring during suspend-to-RAM state
+and where each battery may have multiple chargers attached and the userland
+wants to look at the aggregated information of the multiple chargers.
+
+Charger Manager is a platform_driver with power-supply-class entries.
+An instance of Charger Manager (a platform-device created with Charger-Manager)
+represents an independent battery with chargers. If there are multiple
+batteries with their own chargers acting independently in a system,
+the system may need multiple instances of Charger Manager.
+
+1. Introduction
+===============
+
+Charger Manager supports the following:
+
+* Support for multiple chargers (e.g., a device with USB, AC, and solar panels)
+	A system may have multiple chargers (or power sources) and some of
+	they may be activated at the same time. Each charger may have its
+	own power-supply-class and each power-supply-class can provide
+	different information about the battery status. This framework
+	aggregates charger-related information from multiple sources and
+	shows combined information as a single power-supply-class.
+
+* Support for in suspend-to-RAM polling (with suspend_again callback)
+	While the battery is being charged and the system is in suspend-to-RAM,
+	we may need to monitor the battery health by looking at the ambient or
+	battery temperature. We can accomplish this by waking up the system
+	periodically. However, such a method wakes up devices unncessary for
+	monitoring the battery health and tasks, and user processes that are
+	supposed to be kept suspended. That, in turn, incurs unnecessary power
+	consumption and slow down charging process. Or even, such peak power
+	consumption can stop chargers in the middle of charging
+	(external power input < device power consumption), which not
+	only affects the charging time, but the lifespan of the battery.
+
+	Charger Manager provides a function "cm_suspend_again" that can be
+	used as suspend_again callback of platform_suspend_ops. If the platform
+	requires tasks other than cm_suspend_again, it may implement its own
+	suspend_again callback that calls cm_suspend_again in the middle.
+	Normally, the platform will need to resume and suspend some devices
+	that are used by Charger Manager.
+
+2. Global Charger-Manager Data related with suspend_again
+========================================================
+In order to setup Charger Manager with suspend-again feature
+(in-suspend monitoring), the user should provide charger_global_desc
+with setup_charger_manager(struct charger_global_desc *).
+This charger_global_desc data for in-suspend monitoring is global
+as the name suggests. Thus, the user needs to provide only once even
+if there are multiple batteries. If there are multiple batteries, the
+multiple instances of Charger Manager share the same charger_global_desc
+and it will manage in-suspend monitoring for all instances of Charger Manager.
+
+The user needs to provide all the two entries properly in order to activate
+in-suspend monitoring:
+
+struct charger_global_desc {
+
+char *rtc_name;
+	: The name of rtc (e.g., "rtc0") used to wakeup the system from
+	suspend for Charger Manager. The alarm interrupt (AIE) of the rtc
+	should be able to wake up the system from suspend. Charger Manager
+	saves and restores the alarm value and use the previously-defined
+	alarm if it is going to go off earlier than Charger Manager so that
+	Charger Manager does not interfere with previously-defined alarms.
+
+bool (*rtc_only_wakeup)(void);
+	: This callback should let CM know whether
+	the wakeup-from-suspend is caused only by the alarm of "rtc" in the
+	same struct. If there is any other wakeup source triggered the
+	wakeup, it should return false. If the "rtc" is the only wakeup
+	reason, it should return true.
+};
+
+3. How to setup suspend_again
+=============================
+Charger Manager provides a function "extern bool cm_suspend_again(void)".
+When cm_suspend_again is called, it monitors every battery. The suspend_ops
+callback of the system's platform_suspend_ops can call cm_suspend_again
+function to know whether Charger Manager wants to suspend again or not.
+If there are no other devices or tasks that want to use suspend_again
+feature, the platform_suspend_ops may directly refer to cm_suspend_again
+for its suspend_again callback.
+
+The cm_suspend_again() returns true (meaning "I want to suspend again")
+if the system was woken up by Charger Manager and the polling
+(in-suspend monitoring) results in "normal".
+
+4. Charger-Manager Data (struct charger_desc)
+=============================================
+For each battery charged independently from other batteries (if a series of
+batteries are charged by a single charger, they are counted as one independent
+battery), an instance of Charger Manager is attached to it.
+
+struct charger_desc {
+
+enum polling_modes polling_mode;
+	: CM_POLL_DISABLE: do not poll this battery.
+	  CM_POLL_ALWAYS: always poll this battery.
+	  CM_POLL_EXTERNAL_POWER_ONLY: poll this battery if and only if
+				       an external power source is attached.
+	  CM_POLL_CHARGING_ONLY: poll this battery if and only if the
+				 battery is being charged.
+
+unsigned int polling_interval_ms;
+	: Required polling interval in ms. Charger Manager will poll
+	this battery every polling_interval_ms or more frequently.
+
+enum data_source battery_present;
+	CM_FUEL_GAUGE: get battery presence information from fuel gauge.
+	CM_CHARGER_STAT: get battery presence from chargers.
+
+char **psy_charger_stat;
+	: An array ending with NULL that has power-supply-class names of
+	chargers. Each power-supply-class should provide "PRESENT" (if
+	battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an
+	external power source is attached or not), and "STATUS" (shows whether
+	the battery is {"FULL" or not FULL} or {"FULL", "Charging",
+	"Discharging", "NotCharging"}).
+
+int num_charger_regulators;
+struct regulator_bulk_data *charger_regulators;
+	: Regulators representing the chargers in the form for
+	regulator framework's bulk functions.
+
+char *psy_fuel_gauge;
+	: Power-supply-class name of the fuel gauge.
+
+int (*temperature_out_of_range)(int *mC);
+	: This callback returns 0 if the temperature is safe for charging,
+	a positive number if it is too hot to charge, and a negative number
+	if it is too cold to charge. With the variable mC, the callback returns
+	the temperature in 1/1000 of centigrade.
+};
+
+5. Other Considerations
+=======================
+
+At the charger/battery-related events such as battery-pulled-out,
+charger-pulled-out, charger-inserted, DCIN-over/under-voltage, charger-stopped,
+and others critical to chargers, the system should be configured to wake up.
+At least the following should wake up the system from a suspend:
+a) charger-on/off b) external-power-in/out c) battery-in/out (while charging)
+
+It is usually accomplished by configuring the PMIC as a wakeup source.
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 57de051a74b3..363f4d1ae067 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -235,6 +235,16 @@ config CHARGER_GPIO
 	  This driver can be build as a module. If so, the module will be
 	  called gpio-charger.
 
+config CHARGER_MANAGER
+	bool "Battery charger manager for multiple chargers"
+	depends on REGULATOR && RTC_CLASS
+	help
+          Say Y to enable charger-manager support, which allows multiple
+          chargers attached to a battery and multiple batteries attached to a
+          system. The charger-manager also can monitor charging status in
+          runtime and in suspend-to-RAM by waking up the system periodically
+          with help of suspend_again support.
+
 config CHARGER_MAX8997
 	tristate "Maxim MAX8997/MAX8966 PMIC battery charger driver"
 	depends on MFD_MAX8997 && REGULATOR_MAX8997
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index b4af13dd8b66..d3b24e99acbe 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -36,5 +36,6 @@ obj-$(CONFIG_CHARGER_ISP1704)	+= isp1704_charger.o
 obj-$(CONFIG_CHARGER_MAX8903)	+= max8903_charger.o
 obj-$(CONFIG_CHARGER_TWL4030)	+= twl4030_charger.o
 obj-$(CONFIG_CHARGER_GPIO)	+= gpio-charger.o
+obj-$(CONFIG_CHARGER_MANAGER)	+= charger-manager.o
 obj-$(CONFIG_CHARGER_MAX8997)	+= max8997_charger.o
 obj-$(CONFIG_CHARGER_MAX8998)	+= max8998_charger.o
diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
new file mode 100644
index 000000000000..727a259ea46c
--- /dev/null
+++ b/drivers/power/charger-manager.c
@@ -0,0 +1,779 @@
+/*
+ * Copyright (C) 2011 Samsung Electronics Co., Ltd.
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This driver enables to monitor battery health and control charger
+ * during suspend-to-mem.
+ * Charger manager depends on other devices. register this later than
+ * the depending devices.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+**/
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/rtc.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/platform_device.h>
+#include <linux/power/charger-manager.h>
+#include <linux/regulator/consumer.h>
+
+/*
+ * Regard CM_JIFFIES_SMALL jiffies is small enough to ignore for
+ * delayed works so that we can run delayed works with CM_JIFFIES_SMALL
+ * without any delays.
+ */
+#define	CM_JIFFIES_SMALL	(2)
+
+/* If y is valid (> 0) and smaller than x, do x = y */
+#define CM_MIN_VALID(x, y)	x = (((y > 0) && ((x) > (y))) ? (y) : (x))
+
+/*
+ * Regard CM_RTC_SMALL (sec) is small enough to ignore error in invoking
+ * rtc alarm. It should be 2 or larger
+ */
+#define CM_RTC_SMALL		(2)
+
+#define UEVENT_BUF_SIZE		32
+
+static LIST_HEAD(cm_list);
+static DEFINE_MUTEX(cm_list_mtx);
+
+/* About in-suspend (suspend-again) monitoring */
+static struct rtc_device *rtc_dev;
+/*
+ * Backup RTC alarm
+ * Save the wakeup alarm before entering suspend-to-RAM
+ */
+static struct rtc_wkalrm rtc_wkalarm_save;
+/* Backup RTC alarm time in terms of seconds since 01-01-1970 00:00:00 */
+static unsigned long rtc_wkalarm_save_time;
+static bool cm_suspended;
+static bool cm_rtc_set;
+static unsigned long cm_suspend_duration_ms;
+
+/* Global charger-manager description */
+static struct charger_global_desc *g_desc; /* init with setup_charger_manager */
+
+/**
+ * is_batt_present - See if the battery presents in place.
+ * @cm: the Charger Manager representing the battery.
+ */
+static bool is_batt_present(struct charger_manager *cm)
+{
+	union power_supply_propval val;
+	bool present = false;
+	int i, ret;
+
+	switch (cm->desc->battery_present) {
+	case CM_FUEL_GAUGE:
+		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+				POWER_SUPPLY_PROP_PRESENT, &val);
+		if (ret == 0 && val.intval)
+			present = true;
+		break;
+	case CM_CHARGER_STAT:
+		for (i = 0; cm->charger_stat[i]; i++) {
+			ret = cm->charger_stat[i]->get_property(
+					cm->charger_stat[i],
+					POWER_SUPPLY_PROP_PRESENT, &val);
+			if (ret == 0 && val.intval) {
+				present = true;
+				break;
+			}
+		}
+		break;
+	}
+
+	return present;
+}
+
+/**
+ * is_ext_pwr_online - See if an external power source is attached to charge
+ * @cm: the Charger Manager representing the battery.
+ *
+ * Returns true if at least one of the chargers of the battery has an external
+ * power source attached to charge the battery regardless of whether it is
+ * actually charging or not.
+ */
+static bool is_ext_pwr_online(struct charger_manager *cm)
+{
+	union power_supply_propval val;
+	bool online = false;
+	int i, ret;
+
+	/* If at least one of them has one, it's yes. */
+	for (i = 0; cm->charger_stat[i]; i++) {
+		ret = cm->charger_stat[i]->get_property(
+				cm->charger_stat[i],
+				POWER_SUPPLY_PROP_ONLINE, &val);
+		if (ret == 0 && val.intval) {
+			online = true;
+			break;
+		}
+	}
+
+	return online;
+}
+
+/**
+ * is_charging - Returns true if the battery is being charged.
+ * @cm: the Charger Manager representing the battery.
+ */
+static bool is_charging(struct charger_manager *cm)
+{
+	int i, ret;
+	bool charging = false;
+	union power_supply_propval val;
+
+	/* If there is no battery, it cannot be charged */
+	if (!is_batt_present(cm))
+		return false;
+
+	/* If at least one of the charger is charging, return yes */
+	for (i = 0; cm->charger_stat[i]; i++) {
+		/* 1. The charger sholuld not be DISABLED */
+		if (cm->emergency_stop)
+			continue;
+		if (!cm->charger_enabled)
+			continue;
+
+		/* 2. The charger should be online (ext-power) */
+		ret = cm->charger_stat[i]->get_property(
+				cm->charger_stat[i],
+				POWER_SUPPLY_PROP_ONLINE, &val);
+		if (ret) {
+			dev_warn(cm->dev, "Cannot read ONLINE value from %s.\n",
+					cm->desc->psy_charger_stat[i]);
+			continue;
+		}
+		if (val.intval == 0)
+			continue;
+
+		/*
+		 * 3. The charger should not be FULL, DISCHARGING,
+		 * or NOT_CHARGING.
+		 */
+		ret = cm->charger_stat[i]->get_property(
+				cm->charger_stat[i],
+				POWER_SUPPLY_PROP_STATUS, &val);
+		if (ret) {
+			dev_warn(cm->dev, "Cannot read STATUS value from %s.\n",
+					cm->desc->psy_charger_stat[i]);
+			continue;
+		}
+		if (val.intval == POWER_SUPPLY_STATUS_FULL ||
+				val.intval == POWER_SUPPLY_STATUS_DISCHARGING ||
+				val.intval == POWER_SUPPLY_STATUS_NOT_CHARGING)
+			continue;
+
+		/* Then, this is charging. */
+		charging = true;
+		break;
+	}
+
+	return charging;
+}
+
+/**
+ * is_polling_required - Return true if need to continue polling for this CM.
+ * @cm: the Charger Manager representing the battery.
+ */
+static bool is_polling_required(struct charger_manager *cm)
+{
+	switch (cm->desc->polling_mode) {
+	case CM_POLL_DISABLE:
+		return false;
+	case CM_POLL_ALWAYS:
+		return true;
+	case CM_POLL_EXTERNAL_POWER_ONLY:
+		return is_ext_pwr_online(cm);
+	case CM_POLL_CHARGING_ONLY:
+		return is_charging(cm);
+	default:
+		dev_warn(cm->dev, "Incorrect polling_mode (%d)\n",
+			cm->desc->polling_mode);
+	}
+
+	return false;
+}
+
+/**
+ * try_charger_enable - Enable/Disable chargers altogether
+ * @cm: the Charger Manager representing the battery.
+ * @enable: true: enable / false: disable
+ *
+ * Note that Charger Manager keeps the charger enabled regardless whether
+ * the charger is charging or not (because battery is full or no external
+ * power source exists) except when CM needs to disable chargers forcibly
+ * bacause of emergency causes; when the battery is overheated or too cold.
+ */
+static int try_charger_enable(struct charger_manager *cm, bool enable)
+{
+	int err = 0, i;
+	struct charger_desc *desc = cm->desc;
+
+	/* Ignore if it's redundent command */
+	if (enable && cm->charger_enabled)
+		return 0;
+	if (!enable && !cm->charger_enabled)
+		return 0;
+
+	if (enable) {
+		if (cm->emergency_stop)
+			return -EAGAIN;
+		err = regulator_bulk_enable(desc->num_charger_regulators,
+					desc->charger_regulators);
+	} else {
+		/*
+		 * Abnormal battery state - Stop charging forcibly,
+		 * even if charger was enabled at the other places
+		 */
+		err = regulator_bulk_disable(desc->num_charger_regulators,
+					desc->charger_regulators);
+
+		for (i = 0; i < desc->num_charger_regulators; i++) {
+			if (regulator_is_enabled(
+				    desc->charger_regulators[i].consumer)) {
+				regulator_force_disable(
+					desc->charger_regulators[i].consumer);
+				dev_warn(cm->dev,
+					"Disable regulator(%s) forcibly.\n",
+					desc->charger_regulators[i].supply);
+			}
+		}
+	}
+
+	if (!err)
+		cm->charger_enabled = enable;
+
+	return err;
+}
+
+/**
+ * uevent_notify - Let users know something has changed.
+ * @cm: the Charger Manager representing the battery.
+ * @event: the event string.
+ *
+ * If @event is null, it implies that uevent_notify is called
+ * by resume function. When called in the resume function, cm_suspended
+ * should be already reset to false in order to let uevent_notify
+ * notify the recent event during the suspend to users. While
+ * suspended, uevent_notify does not notify users, but tracks
+ * events so that uevent_notify can notify users later after resumed.
+ */
+static void uevent_notify(struct charger_manager *cm, const char *event)
+{
+	static char env_str[UEVENT_BUF_SIZE + 1] = "";
+	static char env_str_save[UEVENT_BUF_SIZE + 1] = "";
+
+	if (cm_suspended) {
+		/* Nothing in suspended-event buffer */
+		if (env_str_save[0] == 0) {
+			if (!strncmp(env_str, event, UEVENT_BUF_SIZE))
+				return; /* status not changed */
+			strncpy(env_str_save, event, UEVENT_BUF_SIZE);
+			return;
+		}
+
+		if (!strncmp(env_str_save, event, UEVENT_BUF_SIZE))
+			return; /* Duplicated. */
+		else
+			strncpy(env_str_save, event, UEVENT_BUF_SIZE);
+
+		return;
+	}
+
+	if (event == NULL) {
+		/* No messages pending */
+		if (!env_str_save[0])
+			return;
+
+		strncpy(env_str, env_str_save, UEVENT_BUF_SIZE);
+		kobject_uevent(&cm->dev->kobj, KOBJ_CHANGE);
+		env_str_save[0] = 0;
+
+		return;
+	}
+
+	/* status not changed */
+	if (!strncmp(env_str, event, UEVENT_BUF_SIZE))
+		return;
+
+	/* save the status and notify the update */
+	strncpy(env_str, event, UEVENT_BUF_SIZE);
+	kobject_uevent(&cm->dev->kobj, KOBJ_CHANGE);
+
+	dev_info(cm->dev, event);
+}
+
+/**
+ * _cm_monitor - Monitor the temperature and return true for exceptions.
+ * @cm: the Charger Manager representing the battery.
+ *
+ * Returns true if there is an event to notify for the battery.
+ * (True if the status of "emergency_stop" changes)
+ */
+static bool _cm_monitor(struct charger_manager *cm)
+{
+	struct charger_desc *desc = cm->desc;
+	int temp = desc->temperature_out_of_range(&cm->last_temp_mC);
+
+	dev_dbg(cm->dev, "monitoring (%2.2d.%3.3dC)\n",
+		cm->last_temp_mC / 1000, cm->last_temp_mC % 1000);
+
+	/* It has been stopped or charging already */
+	if (!!temp == !!cm->emergency_stop)
+		return false;
+
+	if (temp) {
+		cm->emergency_stop = temp;
+		if (!try_charger_enable(cm, false)) {
+			if (temp > 0)
+				uevent_notify(cm, "OVERHEAT");
+			else
+				uevent_notify(cm, "COLD");
+		}
+	} else {
+		cm->emergency_stop = 0;
+		if (!try_charger_enable(cm, true))
+			uevent_notify(cm, "CHARGING");
+	}
+
+	return true;
+}
+
+/**
+ * cm_monitor - Monitor every battery.
+ *
+ * Returns true if there is an event to notify from any of the batteries.
+ * (True if the status of "emergency_stop" changes)
+ */
+static bool cm_monitor(void)
+{
+	bool stop = false;
+	struct charger_manager *cm;
+
+	mutex_lock(&cm_list_mtx);
+
+	list_for_each_entry(cm, &cm_list, entry)
+		stop = stop || _cm_monitor(cm);
+
+	mutex_unlock(&cm_list_mtx);
+
+	return stop;
+}
+
+/**
+ * cm_setup_timer - For in-suspend monitoring setup wakeup alarm
+ *		    for suspend_again.
+ *
+ * Returns true if the alarm is set for Charger Manager to use.
+ * Returns false if
+ *	cm_setup_timer fails to set an alarm,
+ *	cm_setup_timer does not need to set an alarm for Charger Manager,
+ *	or an alarm previously configured is to be used.
+ */
+static bool cm_setup_timer(void)
+{
+	struct charger_manager *cm;
+	unsigned int wakeup_ms = UINT_MAX;
+	bool ret = false;
+
+	mutex_lock(&cm_list_mtx);
+
+	list_for_each_entry(cm, &cm_list, entry) {
+		/* Skip if polling is not required for this CM */
+		if (!is_polling_required(cm) && !cm->emergency_stop)
+			continue;
+		if (cm->desc->polling_interval_ms == 0)
+			continue;
+		CM_MIN_VALID(wakeup_ms, cm->desc->polling_interval_ms);
+	}
+
+	mutex_unlock(&cm_list_mtx);
+
+	if (wakeup_ms < UINT_MAX && wakeup_ms > 0) {
+		pr_info("Charger Manager wakeup timer: %u ms.\n", wakeup_ms);
+		if (rtc_dev) {
+			struct rtc_wkalrm tmp;
+			unsigned long time, now;
+			unsigned long add = DIV_ROUND_UP(wakeup_ms, 1000);
+
+			/*
+			 * Set alarm with the polling interval (wakeup_ms)
+			 * except when rtc_wkalarm_save comes first.
+			 * However, the alarm time should be NOW +
+			 * CM_RTC_SMALL or later.
+			 */
+			tmp.enabled = 1;
+			rtc_read_time(rtc_dev, &tmp.time);
+			rtc_tm_to_time(&tmp.time, &now);
+			if (add < CM_RTC_SMALL)
+				add = CM_RTC_SMALL;
+			time = now + add;
+
+			ret = true;
+
+			if (rtc_wkalarm_save.enabled &&
+			    rtc_wkalarm_save_time &&
+			    rtc_wkalarm_save_time < time) {
+				if (rtc_wkalarm_save_time < now + CM_RTC_SMALL)
+					time = now + CM_RTC_SMALL;
+				else
+					time = rtc_wkalarm_save_time;
+
+				/* The timer is not appointed by CM */
+				ret = false;
+			}
+
+			pr_info("Waking up after %lu secs.\n",
+					time - now);
+
+			rtc_time_to_tm(time, &tmp.time);
+			rtc_set_alarm(rtc_dev, &tmp);
+			cm_suspend_duration_ms += wakeup_ms;
+			return ret;
+		}
+	}
+
+	if (rtc_dev)
+		rtc_set_alarm(rtc_dev, &rtc_wkalarm_save);
+	return false;
+}
+
+/**
+ * cm_suspend_again - Determine whether suspend again or not
+ *
+ * Returns true if the system should be suspended again
+ * Returns false if the system should be woken up
+ */
+bool cm_suspend_again(void)
+{
+	struct charger_manager *cm;
+	bool ret = false;
+
+	if (!g_desc || !g_desc->rtc_only_wakeup || !g_desc->rtc_only_wakeup() ||
+	    !cm_rtc_set)
+		return false;
+
+	if (cm_monitor())
+		goto out;
+
+	ret = true;
+	mutex_lock(&cm_list_mtx);
+	list_for_each_entry(cm, &cm_list, entry) {
+		if (cm->status_save_ext_pwr_inserted != is_ext_pwr_online(cm) ||
+		    cm->status_save_batt != is_batt_present(cm))
+			ret = false;
+	}
+	mutex_unlock(&cm_list_mtx);
+
+	cm_rtc_set = cm_setup_timer();
+out:
+	/* It's about the time when the non-CM appointed timer goes off */
+	if (rtc_wkalarm_save.enabled) {
+		unsigned long now;
+		struct rtc_time tmp;
+
+		rtc_read_time(rtc_dev, &tmp);
+		rtc_tm_to_time(&tmp, &now);
+
+		if (rtc_wkalarm_save_time &&
+		    now + CM_RTC_SMALL >= rtc_wkalarm_save_time)
+			return false;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cm_suspend_again);
+
+/**
+ * setup_charger_manager - initialize charger_global_desc data
+ * @gd: pointer to instance of charger_global_desc
+ */
+int setup_charger_manager(struct charger_global_desc *gd)
+{
+	if (!gd)
+		return -EINVAL;
+
+	if (rtc_dev)
+		rtc_class_close(rtc_dev);
+	rtc_dev = NULL;
+	g_desc = NULL;
+
+	if (!gd->rtc_only_wakeup) {
+		pr_err("The callback rtc_only_wakeup is not given.\n");
+		return -EINVAL;
+	}
+
+	if (gd->rtc_name) {
+		rtc_dev = rtc_class_open(gd->rtc_name);
+		if (IS_ERR_OR_NULL(rtc_dev)) {
+			rtc_dev = NULL;
+			/* Retry at probe. RTC may be not registered yet */
+		}
+	} else {
+		pr_warn("No wakeup timer is given for charger manager."
+			"In-suspend monitoring won't work.\n");
+	}
+
+	g_desc = gd;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(setup_charger_manager);
+
+static int charger_manager_probe(struct platform_device *pdev)
+{
+	struct charger_desc *desc = dev_get_platdata(&pdev->dev);
+	struct charger_manager *cm;
+	int ret = 0, i = 0;
+
+	if (g_desc && !rtc_dev && g_desc->rtc_name) {
+		rtc_dev = rtc_class_open(g_desc->rtc_name);
+		if (IS_ERR_OR_NULL(rtc_dev)) {
+			rtc_dev = NULL;
+			dev_err(&pdev->dev, "Cannot get RTC %s.\n",
+				g_desc->rtc_name);
+			ret = -ENODEV;
+			goto err_alloc;
+		}
+	}
+
+	if (!desc) {
+		dev_err(&pdev->dev, "No platform data (desc) found.\n");
+		ret = -ENODEV;
+		goto err_alloc;
+	}
+
+	cm = kzalloc(sizeof(struct charger_manager), GFP_KERNEL);
+	if (!cm) {
+		dev_err(&pdev->dev, "Cannot allocate memory.\n");
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	/* Basic Values. Unspecified are Null or 0 */
+	cm->dev = &pdev->dev;
+	cm->desc = kzalloc(sizeof(struct charger_desc), GFP_KERNEL);
+	if (!cm->desc) {
+		dev_err(&pdev->dev, "Cannot allocate memory.\n");
+		ret = -ENOMEM;
+		goto err_alloc_desc;
+	}
+	memcpy(cm->desc, desc, sizeof(struct charger_desc));
+	cm->last_temp_mC = INT_MIN; /* denotes "unmeasured, yet" */
+
+	if (!desc->charger_regulators || desc->num_charger_regulators < 1) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "charger_regulators undefined.\n");
+		goto err_no_charger;
+	}
+
+	if (!desc->psy_charger_stat || !desc->psy_charger_stat[0]) {
+		dev_err(&pdev->dev, "No power supply defined.\n");
+		ret = -EINVAL;
+		goto err_no_charger_stat;
+	}
+
+	/* Counting index only */
+	while (desc->psy_charger_stat[i])
+		i++;
+
+	cm->charger_stat = kzalloc(sizeof(struct power_supply *) * (i + 1),
+				   GFP_KERNEL);
+	if (!cm->charger_stat) {
+		ret = -ENOMEM;
+		goto err_no_charger_stat;
+	}
+
+	for (i = 0; desc->psy_charger_stat[i]; i++) {
+		cm->charger_stat[i] = power_supply_get_by_name(
+					desc->psy_charger_stat[i]);
+		if (!cm->charger_stat[i]) {
+			dev_err(&pdev->dev, "Cannot find power supply "
+					"\"%s\"\n",
+					desc->psy_charger_stat[i]);
+			ret = -ENODEV;
+			goto err_chg_stat;
+		}
+	}
+
+	cm->fuel_gauge = power_supply_get_by_name(desc->psy_fuel_gauge);
+	if (!cm->fuel_gauge) {
+		dev_err(&pdev->dev, "Cannot find power supply \"%s\"\n",
+				desc->psy_fuel_gauge);
+		ret = -ENODEV;
+		goto err_chg_stat;
+	}
+
+	if (desc->polling_interval_ms == 0 ||
+	    msecs_to_jiffies(desc->polling_interval_ms) <= CM_JIFFIES_SMALL) {
+		dev_err(&pdev->dev, "polling_interval_ms is too small\n");
+		ret = -EINVAL;
+		goto err_chg_stat;
+	}
+
+	if (!desc->temperature_out_of_range) {
+		dev_err(&pdev->dev, "there is no temperature_out_of_range\n");
+		ret = -EINVAL;
+		goto err_chg_stat;
+	}
+
+	platform_set_drvdata(pdev, cm);
+
+	ret = regulator_bulk_get(&pdev->dev, desc->num_charger_regulators,
+				 desc->charger_regulators);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot get charger regulators.\n");
+		goto err_chg_stat;
+	}
+
+	ret = try_charger_enable(cm, true);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot enable charger regulators\n");
+		goto err_chg_enable;
+	}
+
+	/* Add to the list */
+	mutex_lock(&cm_list_mtx);
+	list_add(&cm->entry, &cm_list);
+	mutex_unlock(&cm_list_mtx);
+
+	return 0;
+
+err_chg_enable:
+	if (desc->charger_regulators)
+		regulator_bulk_free(desc->num_charger_regulators,
+					desc->charger_regulators);
+err_chg_stat:
+	kfree(cm->charger_stat);
+err_no_charger_stat:
+err_no_charger:
+	kfree(cm->desc);
+err_alloc_desc:
+	kfree(cm);
+err_alloc:
+	return ret;
+}
+
+static int __devexit charger_manager_remove(struct platform_device *pdev)
+{
+	struct charger_manager *cm = platform_get_drvdata(pdev);
+	struct charger_desc *desc = cm->desc;
+
+	/* Remove from the list */
+	mutex_lock(&cm_list_mtx);
+	list_del(&cm->entry);
+	mutex_unlock(&cm_list_mtx);
+
+	if (desc->charger_regulators)
+		regulator_bulk_free(desc->num_charger_regulators,
+					desc->charger_regulators);
+	kfree(cm->charger_stat);
+	kfree(cm->desc);
+	kfree(cm);
+
+	return 0;
+}
+
+const struct platform_device_id charger_manager_id[] = {
+	{ "charger-manager", 0 },
+	{ },
+};
+
+static int cm_suspend_prepare(struct device *dev)
+{
+	struct platform_device *pdev = container_of(dev, struct platform_device,
+						    dev);
+	struct charger_manager *cm = platform_get_drvdata(pdev);
+
+	if (!cm_suspended) {
+		if (rtc_dev) {
+			struct rtc_time tmp;
+			unsigned long now;
+
+			rtc_read_alarm(rtc_dev, &rtc_wkalarm_save);
+			rtc_read_time(rtc_dev, &tmp);
+
+			if (rtc_wkalarm_save.enabled) {
+				rtc_tm_to_time(&rtc_wkalarm_save.time,
+					       &rtc_wkalarm_save_time);
+				rtc_tm_to_time(&tmp, &now);
+				if (now > rtc_wkalarm_save_time)
+					rtc_wkalarm_save_time = 0;
+			} else {
+				rtc_wkalarm_save_time = 0;
+			}
+		}
+		cm_suspended = true;
+	}
+
+	cm->status_save_ext_pwr_inserted = is_ext_pwr_online(cm);
+	cm->status_save_batt = is_batt_present(cm);
+
+	if (!cm_rtc_set) {
+		cm_suspend_duration_ms = 0;
+		cm_rtc_set = cm_setup_timer();
+	}
+
+	return 0;
+}
+
+static void cm_suspend_complete(struct device *dev)
+{
+	struct platform_device *pdev = container_of(dev, struct platform_device,
+						    dev);
+	struct charger_manager *cm = platform_get_drvdata(pdev);
+
+	if (cm_suspended) {
+		if (rtc_dev) {
+			struct rtc_wkalrm tmp;
+
+			rtc_read_alarm(rtc_dev, &tmp);
+			rtc_wkalarm_save.pending = tmp.pending;
+			rtc_set_alarm(rtc_dev, &rtc_wkalarm_save);
+		}
+		cm_suspended = false;
+		cm_rtc_set = false;
+	}
+
+	uevent_notify(cm, NULL);
+}
+
+static const struct dev_pm_ops charger_manager_pm = {
+	.prepare	= cm_suspend_prepare,
+	.complete	= cm_suspend_complete,
+};
+
+static struct platform_driver charger_manager_driver = {
+	.driver = {
+		.name = "charger-manager",
+		.owner = THIS_MODULE,
+		.pm = &charger_manager_pm,
+	},
+	.probe = charger_manager_probe,
+	.remove = __devexit_p(charger_manager_remove),
+	.id_table = charger_manager_id,
+};
+
+static int __init charger_manager_init(void)
+{
+	return platform_driver_register(&charger_manager_driver);
+}
+late_initcall(charger_manager_init);
+
+static void __exit charger_manager_cleanup(void)
+{
+	platform_driver_unregister(&charger_manager_driver);
+}
+module_exit(charger_manager_cleanup);
+
+MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
+MODULE_DESCRIPTION("Charger Manager");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("charger-manager");
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
new file mode 100644
index 000000000000..102c5b3f3325
--- /dev/null
+++ b/include/linux/power/charger-manager.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2011 Samsung Electronics Co., Ltd.
+ * MyungJoo.Ham <myungjoo.ham@samsung.com>
+ *
+ * Charger Manager.
+ * This framework enables to control and multiple chargers and to
+ * monitor charging even in the context of suspend-to-RAM with
+ * an interface combining the chargers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+**/
+
+#ifndef _CHARGER_MANAGER_H
+#define _CHARGER_MANAGER_H
+
+#include <linux/power_supply.h>
+
+enum data_source {
+	CM_FUEL_GAUGE,
+	CM_CHARGER_STAT,
+};
+
+enum polling_modes {
+	CM_POLL_DISABLE = 0,
+	CM_POLL_ALWAYS,
+	CM_POLL_EXTERNAL_POWER_ONLY,
+	CM_POLL_CHARGING_ONLY,
+};
+
+/**
+ * struct charger_global_desc
+ * @rtc_name: the name of RTC used to wake up the system from suspend.
+ * @rtc_only_wakeup:
+ *	If the system is woken up by waekup-sources other than the RTC or
+ *	callbacks, Charger Manager should recognize with
+ *	rtc_only_wakeup() returning false.
+ *	If the RTC given to CM is the only wakeup reason,
+ *	rtc_only_wakeup should return true.
+ */
+struct charger_global_desc {
+	char *rtc_name;
+
+	bool (*rtc_only_wakeup)(void);
+};
+
+/**
+ * struct charger_desc
+ * @polling_mode:
+ *	Determine which polling mode will be used
+ * @polling_interval_ms: interval in millisecond at which
+ *	charger manager will monitor battery health
+ * @battery_present:
+ *	Specify where information for existance of battery can be obtained
+ * @psy_charger_stat: the names of power-supply for chargers
+ * @num_charger_regulator: the number of entries in charger_regulators
+ * @charger_regulators: array of regulator_bulk_data for chargers
+ * @psy_fuel_gauge: the name of power-supply for fuel gauge
+ * @temperature_out_of_range:
+ *	Determine whether the status is overheat or cold or normal.
+ *	return_value > 0: overheat
+ *	return_value == 0: normal
+ *	return_value < 0: cold
+ */
+struct charger_desc {
+	enum polling_modes polling_mode;
+	unsigned int polling_interval_ms;
+
+	enum data_source battery_present;
+
+	char **psy_charger_stat;
+
+	int num_charger_regulators;
+	struct regulator_bulk_data *charger_regulators;
+
+	char *psy_fuel_gauge;
+
+	int (*temperature_out_of_range)(int *mC);
+};
+
+#define PSY_NAME_MAX	30
+
+/**
+ * struct charger_manager
+ * @entry: entry for list
+ * @dev: device pointer
+ * @desc: instance of charger_desc
+ * @fuel_gauge: power_supply for fuel gauge
+ * @charger_stat: array of power_supply for chargers
+ * @charger_enabled: the state of charger
+ * @emergency_stop:
+ *	When setting true, stop charging
+ * @last_temp_mC: the measured temperature in milli-Celsius
+ * @status_save_ext_pwr_inserted:
+ *	saved status of external power before entering suspend-to-RAM
+ * @status_save_batt:
+ *	saved status of battery before entering suspend-to-RAM
+ */
+struct charger_manager {
+	struct list_head entry;
+	struct device *dev;
+	struct charger_desc *desc;
+
+	struct power_supply *fuel_gauge;
+	struct power_supply **charger_stat;
+
+	bool charger_enabled;
+
+	int emergency_stop;
+	int last_temp_mC;
+
+	bool status_save_ext_pwr_inserted;
+	bool status_save_batt;
+};
+
+#ifdef CONFIG_CHARGER_MANAGER
+extern int setup_charger_manager(struct charger_global_desc *gd);
+extern bool cm_suspend_again(void);
+#else
+static void __maybe_unused setup_charger_manager(struct charger_global_desc *gd)
+{ }
+
+static bool __maybe_unused cm_suspend_again(void)
+{
+	return false;
+}
+#endif
+
+#endif /* _CHARGER_MANAGER_H */
-- 
cgit v1.2.3


From ad3d13eee78ec44194bf919a37e2f711e53cbdf0 Mon Sep 17 00:00:00 2001
From: Donggeun Kim <dg77.kim@samsung.com>
Date: Tue, 27 Dec 2011 18:47:49 +0900
Subject: power_supply: Charger-Manager: Add properties for power-supply-class

Charger Manager provides power-supply-class aggregating
information from multiple chargers and a fuel-gauge.

Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 Documentation/power/charger-manager.txt |  14 ++
 drivers/power/charger-manager.c         | 295 +++++++++++++++++++++++++++++++-
 include/linux/power/charger-manager.h   |  17 ++
 3 files changed, 325 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt
index 081489f3db25..fdcca991df30 100644
--- a/Documentation/power/charger-manager.txt
+++ b/Documentation/power/charger-manager.txt
@@ -98,6 +98,11 @@ battery), an instance of Charger Manager is attached to it.
 
 struct charger_desc {
 
+char *psy_name;
+	: The power-supply-class name of the battery. Default is
+	"battery" if psy_name is NULL. Users can access the psy entries
+	at "/sys/class/power_supply/[psy_name]/".
+
 enum polling_modes polling_mode;
 	: CM_POLL_DISABLE: do not poll this battery.
 	  CM_POLL_ALWAYS: always poll this battery.
@@ -106,6 +111,12 @@ enum polling_modes polling_mode;
 	  CM_POLL_CHARGING_ONLY: poll this battery if and only if the
 				 battery is being charged.
 
+unsigned int fullbatt_uV;
+	: If specified with a non-zero value, Charger Manager assumes
+	that the battery is full (capacity = 100) if the battery is not being
+	charged and the battery voltage is equal to or greater than
+	fullbatt_uV.
+
 unsigned int polling_interval_ms;
 	: Required polling interval in ms. Charger Manager will poll
 	this battery every polling_interval_ms or more frequently.
@@ -131,10 +142,13 @@ char *psy_fuel_gauge;
 	: Power-supply-class name of the fuel gauge.
 
 int (*temperature_out_of_range)(int *mC);
+bool measure_battery_temp;
 	: This callback returns 0 if the temperature is safe for charging,
 	a positive number if it is too hot to charge, and a negative number
 	if it is too cold to charge. With the variable mC, the callback returns
 	the temperature in 1/1000 of centigrade.
+	The source of temperature can be battery or ambient one according to
+	the value of measure_battery_temp.
 };
 
 5. Other Considerations
diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 727a259ea46c..0378d019efae 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -121,6 +121,32 @@ static bool is_ext_pwr_online(struct charger_manager *cm)
 	return online;
 }
 
+/**
+ * get_batt_uV - Get the voltage level of the battery
+ * @cm: the Charger Manager representing the battery.
+ * @uV: the voltage level returned.
+ *
+ * Returns 0 if there is no error.
+ * Returns a negative value on error.
+ */
+static int get_batt_uV(struct charger_manager *cm, int *uV)
+{
+	union power_supply_propval val;
+	int ret;
+
+	if (cm->fuel_gauge)
+		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+				POWER_SUPPLY_PROP_VOLTAGE_NOW, &val);
+	else
+		return -ENODEV;
+
+	if (ret)
+		return ret;
+
+	*uV = val.intval;
+	return 0;
+}
+
 /**
  * is_charging - Returns true if the battery is being charged.
  * @cm: the Charger Manager representing the battery.
@@ -369,6 +395,208 @@ static bool cm_monitor(void)
 	return stop;
 }
 
+static int charger_get_property(struct power_supply *psy,
+		enum power_supply_property psp,
+		union power_supply_propval *val)
+{
+	struct charger_manager *cm = container_of(psy,
+			struct charger_manager, charger_psy);
+	struct charger_desc *desc = cm->desc;
+	int i, ret = 0, uV;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		if (is_charging(cm))
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else if (is_ext_pwr_online(cm))
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		if (cm->emergency_stop > 0)
+			val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+		else if (cm->emergency_stop < 0)
+			val->intval = POWER_SUPPLY_HEALTH_COLD;
+		else
+			val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		if (is_batt_present(cm))
+			val->intval = 1;
+		else
+			val->intval = 0;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = get_batt_uV(cm, &i);
+		val->intval = i;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+				POWER_SUPPLY_PROP_CURRENT_NOW, val);
+		break;
+	case POWER_SUPPLY_PROP_TEMP:
+		/* in thenth of centigrade */
+		if (cm->last_temp_mC == INT_MIN)
+			desc->temperature_out_of_range(&cm->last_temp_mC);
+		val->intval = cm->last_temp_mC / 100;
+		if (!desc->measure_battery_temp)
+			ret = -ENODEV;
+		break;
+	case POWER_SUPPLY_PROP_TEMP_AMBIENT:
+		/* in thenth of centigrade */
+		if (cm->last_temp_mC == INT_MIN)
+			desc->temperature_out_of_range(&cm->last_temp_mC);
+		val->intval = cm->last_temp_mC / 100;
+		if (desc->measure_battery_temp)
+			ret = -ENODEV;
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		if (!cm->fuel_gauge) {
+			ret = -ENODEV;
+			break;
+		}
+
+		if (!is_batt_present(cm)) {
+			/* There is no battery. Assume 100% */
+			val->intval = 100;
+			break;
+		}
+
+		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+					POWER_SUPPLY_PROP_CAPACITY, val);
+		if (ret)
+			break;
+
+		if (val->intval > 100) {
+			val->intval = 100;
+			break;
+		}
+		if (val->intval < 0)
+			val->intval = 0;
+
+		/* Do not adjust SOC when charging: voltage is overrated */
+		if (is_charging(cm))
+			break;
+
+		/*
+		 * If the capacity value is inconsistent, calibrate it base on
+		 * the battery voltage values and the thresholds given as desc
+		 */
+		ret = get_batt_uV(cm, &uV);
+		if (ret) {
+			/* Voltage information not available. No calibration */
+			ret = 0;
+			break;
+		}
+
+		if (desc->fullbatt_uV > 0 && uV >= desc->fullbatt_uV &&
+		    !is_charging(cm)) {
+			val->intval = 100;
+			break;
+		}
+
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		if (is_ext_pwr_online(cm))
+			val->intval = 1;
+		else
+			val->intval = 0;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+		if (cm->fuel_gauge) {
+			if (cm->fuel_gauge->get_property(cm->fuel_gauge,
+			    POWER_SUPPLY_PROP_CHARGE_FULL, val) == 0)
+				break;
+		}
+
+		if (is_ext_pwr_online(cm)) {
+			/* Not full if it's charging. */
+			if (is_charging(cm)) {
+				val->intval = 0;
+				break;
+			}
+			/*
+			 * Full if it's powered but not charging andi
+			 * not forced stop by emergency
+			 */
+			if (!cm->emergency_stop) {
+				val->intval = 1;
+				break;
+			}
+		}
+
+		/* Full if it's over the fullbatt voltage */
+		ret = get_batt_uV(cm, &uV);
+		if (!ret && desc->fullbatt_uV > 0 && uV >= desc->fullbatt_uV &&
+		    !is_charging(cm)) {
+			val->intval = 1;
+			break;
+		}
+
+		/* Full if the cap is 100 */
+		if (cm->fuel_gauge) {
+			ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+					POWER_SUPPLY_PROP_CAPACITY, val);
+			if (!ret && val->intval >= 100 && !is_charging(cm)) {
+				val->intval = 1;
+				break;
+			}
+		}
+
+		val->intval = 0;
+		ret = 0;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+		if (is_charging(cm)) {
+			ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+						POWER_SUPPLY_PROP_CHARGE_NOW,
+						val);
+			if (ret) {
+				val->intval = 1;
+				ret = 0;
+			} else {
+				/* If CHARGE_NOW is supplied, use it */
+				val->intval = (val->intval > 0) ?
+						val->intval : 1;
+			}
+		} else {
+			val->intval = 0;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+	return ret;
+}
+
+#define NUM_CHARGER_PSY_OPTIONAL	(4)
+static enum power_supply_property default_charger_props[] = {
+	/* Guaranteed to provide */
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	/*
+	 * Optional properties are:
+	 * POWER_SUPPLY_PROP_CHARGE_NOW,
+	 * POWER_SUPPLY_PROP_CURRENT_NOW,
+	 * POWER_SUPPLY_PROP_TEMP, and
+	 * POWER_SUPPLY_PROP_TEMP_AMBIENT,
+	 */
+};
+
+static struct power_supply psy_default = {
+	.name = "battery",
+	.type = POWER_SUPPLY_TYPE_BATTERY,
+	.properties = default_charger_props,
+	.num_properties = ARRAY_SIZE(default_charger_props),
+	.get_property = charger_get_property,
+};
+
 /**
  * cm_setup_timer - For in-suspend monitoring setup wakeup alarm
  *		    for suspend_again.
@@ -532,6 +760,7 @@ static int charger_manager_probe(struct platform_device *pdev)
 	struct charger_desc *desc = dev_get_platdata(&pdev->dev);
 	struct charger_manager *cm;
 	int ret = 0, i = 0;
+	union power_supply_propval val;
 
 	if (g_desc && !rtc_dev && g_desc->rtc_name) {
 		rtc_dev = rtc_class_open(g_desc->rtc_name);
@@ -626,11 +855,68 @@ static int charger_manager_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, cm);
 
+	memcpy(&cm->charger_psy, &psy_default,
+				sizeof(psy_default));
+	if (!desc->psy_name) {
+		strncpy(cm->psy_name_buf, psy_default.name,
+				PSY_NAME_MAX);
+	} else {
+		strncpy(cm->psy_name_buf, desc->psy_name, PSY_NAME_MAX);
+	}
+	cm->charger_psy.name = cm->psy_name_buf;
+
+	/* Allocate for psy properties because they may vary */
+	cm->charger_psy.properties = kzalloc(sizeof(enum power_supply_property)
+				* (ARRAY_SIZE(default_charger_props) +
+				NUM_CHARGER_PSY_OPTIONAL),
+				GFP_KERNEL);
+	if (!cm->charger_psy.properties) {
+		dev_err(&pdev->dev, "Cannot allocate for psy properties.\n");
+		ret = -ENOMEM;
+		goto err_chg_stat;
+	}
+	memcpy(cm->charger_psy.properties, default_charger_props,
+		sizeof(enum power_supply_property) *
+		ARRAY_SIZE(default_charger_props));
+	cm->charger_psy.num_properties = psy_default.num_properties;
+
+	/* Find which optional psy-properties are available */
+	if (!cm->fuel_gauge->get_property(cm->fuel_gauge,
+					  POWER_SUPPLY_PROP_CHARGE_NOW, &val)) {
+		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+				POWER_SUPPLY_PROP_CHARGE_NOW;
+		cm->charger_psy.num_properties++;
+	}
+	if (!cm->fuel_gauge->get_property(cm->fuel_gauge,
+					  POWER_SUPPLY_PROP_CURRENT_NOW,
+					  &val)) {
+		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+				POWER_SUPPLY_PROP_CURRENT_NOW;
+		cm->charger_psy.num_properties++;
+	}
+	if (!desc->measure_battery_temp) {
+		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+				POWER_SUPPLY_PROP_TEMP_AMBIENT;
+		cm->charger_psy.num_properties++;
+	}
+	if (desc->measure_battery_temp) {
+		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+				POWER_SUPPLY_PROP_TEMP;
+		cm->charger_psy.num_properties++;
+	}
+
+	ret = power_supply_register(NULL, &cm->charger_psy);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot register charger-manager with"
+				" name \"%s\".\n", cm->charger_psy.name);
+		goto err_register;
+	}
+
 	ret = regulator_bulk_get(&pdev->dev, desc->num_charger_regulators,
 				 desc->charger_regulators);
 	if (ret) {
 		dev_err(&pdev->dev, "Cannot get charger regulators.\n");
-		goto err_chg_stat;
+		goto err_bulk_get;
 	}
 
 	ret = try_charger_enable(cm, true);
@@ -650,6 +936,10 @@ err_chg_enable:
 	if (desc->charger_regulators)
 		regulator_bulk_free(desc->num_charger_regulators,
 					desc->charger_regulators);
+err_bulk_get:
+	power_supply_unregister(&cm->charger_psy);
+err_register:
+	kfree(cm->charger_psy.properties);
 err_chg_stat:
 	kfree(cm->charger_stat);
 err_no_charger_stat:
@@ -674,6 +964,9 @@ static int __devexit charger_manager_remove(struct platform_device *pdev)
 	if (desc->charger_regulators)
 		regulator_bulk_free(desc->num_charger_regulators,
 					desc->charger_regulators);
+
+	power_supply_unregister(&cm->charger_psy);
+	kfree(cm->charger_psy.properties);
 	kfree(cm->charger_stat);
 	kfree(cm->desc);
 	kfree(cm);
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index 102c5b3f3325..4f75e531c112 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -47,8 +47,12 @@ struct charger_global_desc {
 
 /**
  * struct charger_desc
+ * @psy_name: the name of power-supply-class for charger manager
  * @polling_mode:
  *	Determine which polling mode will be used
+ * @fullbatt_uV: voltage in microvolt
+ *	If it is not being charged and VBATT >= fullbatt_uV,
+ *	it is assumed to be full.
  * @polling_interval_ms: interval in millisecond at which
  *	charger manager will monitor battery health
  * @battery_present:
@@ -62,11 +66,18 @@ struct charger_global_desc {
  *	return_value > 0: overheat
  *	return_value == 0: normal
  *	return_value < 0: cold
+ * @measure_battery_temp:
+ *	true: measure battery temperature
+ *	false: measure ambient temperature
  */
 struct charger_desc {
+	char *psy_name;
+
 	enum polling_modes polling_mode;
 	unsigned int polling_interval_ms;
 
+	unsigned int fullbatt_uV;
+
 	enum data_source battery_present;
 
 	char **psy_charger_stat;
@@ -77,6 +88,7 @@ struct charger_desc {
 	char *psy_fuel_gauge;
 
 	int (*temperature_out_of_range)(int *mC);
+	bool measure_battery_temp;
 };
 
 #define PSY_NAME_MAX	30
@@ -92,6 +104,8 @@ struct charger_desc {
  * @emergency_stop:
  *	When setting true, stop charging
  * @last_temp_mC: the measured temperature in milli-Celsius
+ * @psy_name_buf: the name of power-supply-class for charger manager
+ * @charger_psy: power_supply for charger manager
  * @status_save_ext_pwr_inserted:
  *	saved status of external power before entering suspend-to-RAM
  * @status_save_batt:
@@ -110,6 +124,9 @@ struct charger_manager {
 	int emergency_stop;
 	int last_temp_mC;
 
+	char psy_name_buf[PSY_NAME_MAX + 1];
+	struct power_supply charger_psy;
+
 	bool status_save_ext_pwr_inserted;
 	bool status_save_batt;
 };
-- 
cgit v1.2.3


From 9b8872273af6983b246252a6508fa7cf34c69d6e Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@ti.com>
Date: Wed, 30 Nov 2011 23:08:33 -0800
Subject: power_supply: Add "unknown" in power supply type

For the default value of power supply type, "unknown" is added.
With default prop value, supply type property can be displayed
as default - "Unknown".

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/power_supply_sysfs.c | 2 +-
 include/linux/power_supply.h       | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 58cc4906d216..939e2e432553 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -42,7 +42,7 @@ static ssize_t power_supply_show_property(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf) {
 	static char *type_text[] = {
-		"Battery", "UPS", "Mains", "USB",
+		"Unknown", "Battery", "UPS", "Mains", "USB",
 		"USB_DCP", "USB_CDP", "USB_ACA"
 	};
 	static char *status_text[] = {
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 204c18dfdc9e..9c83e04f6a43 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -123,7 +123,8 @@ enum power_supply_property {
 };
 
 enum power_supply_type {
-	POWER_SUPPLY_TYPE_BATTERY = 0,
+	POWER_SUPPLY_TYPE_UNKNOWN = 0,
+	POWER_SUPPLY_TYPE_BATTERY,
 	POWER_SUPPLY_TYPE_UPS,
 	POWER_SUPPLY_TYPE_MAINS,
 	POWER_SUPPLY_TYPE_USB,		/* Standard Downstream Port */
-- 
cgit v1.2.3


From 620b2736696743fc785b2fc63dbc0fe69cbfe3a7 Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@nsc.com>
Date: Wed, 7 Sep 2011 01:56:14 -0700
Subject: lp8727_charger: Add header file

Oops, forgot to 'git add' it. [AV]

Signed-off-by: Woogyom Kim <milo.kim@ti.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/lp8727.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100755 include/linux/lp8727.h

(limited to 'include/linux')

diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h
new file mode 100755
index 000000000000..3ec558959a11
--- /dev/null
+++ b/include/linux/lp8727.h
@@ -0,0 +1,54 @@
+/*
+ * lp8727.h - Driver for LP8727 Micro/Mini USB IC with intergrated charger
+ *
+ *			Copyright (C) 2011 National Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _LP8727_H
+#define _LP8727_H
+
+enum lp8727_eoc_level {
+	EOC_5P,
+	EOC_10P,
+	EOC_16P,
+	EOC_20P,
+	EOC_25P,
+	EOC_33P,
+	EOC_50P,
+};
+
+enum lp8727_ichg {
+	ICHG_90mA,
+	ICHG_100mA,
+	ICHG_400mA,
+	ICHG_450mA,
+	ICHG_500mA,
+	ICHG_600mA,
+	ICHG_700mA,
+	ICHG_800mA,
+	ICHG_900mA,
+	ICHG_1000mA,
+};
+
+struct lp8727_chg_param {
+	/* end of charge level setting */
+	enum lp8727_eoc_level eoc_level;
+	/* charging current */
+	enum lp8727_ichg ichg;
+};
+
+struct lp8727_platform_data {
+	u8(*get_batt_present) (void);
+	u16(*get_batt_level) (void);
+	u8(*get_batt_capacity) (void);
+	u8(*get_batt_temp) (void);
+	struct lp8727_chg_param ac;
+	struct lp8727_chg_param usb;
+};
+
+#endif
-- 
cgit v1.2.3


From e57b432d0c91a4c472b2826b21524bdbbf2688d1 Mon Sep 17 00:00:00 2001
From: "Milo(Woogyom) Kim" <milo.kim@ti.com>
Date: Wed, 4 Jan 2012 16:23:11 +0400
Subject: lp8727_charger: Some minor fixes for the header

Pointer coding style changes
: add space between return type and function pointer
  ex) u8(*get_batt_present) (void)
   -> u8 (*get_batt_present) (void)

Signed-off-by: Woogyom Kim <milo.kim@ti.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/lp8727.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h
index 3ec558959a11..d21fa2865bf4 100755
--- a/include/linux/lp8727.h
+++ b/include/linux/lp8727.h
@@ -1,12 +1,9 @@
 /*
- * lp8727.h - Driver for LP8727 Micro/Mini USB IC with intergrated charger
- *
  *			Copyright (C) 2011 National Semiconductor
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
  */
 
 #ifndef _LP8727_H
@@ -43,10 +40,10 @@ struct lp8727_chg_param {
 };
 
 struct lp8727_platform_data {
-	u8(*get_batt_present) (void);
-	u16(*get_batt_level) (void);
-	u8(*get_batt_capacity) (void);
-	u8(*get_batt_temp) (void);
+	u8 (*get_batt_present)(void);
+	u16 (*get_batt_level)(void);
+	u8 (*get_batt_capacity)(void);
+	u8 (*get_batt_temp)(void);
 	struct lp8727_chg_param ac;
 	struct lp8727_chg_param usb;
 };
-- 
cgit v1.2.3


From 60f98d1839376d30e13f3e452dce2433fad3060e Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Wed, 2 Nov 2011 14:30:58 -0500
Subject: dlm: add recovery callbacks

These new callbacks notify the dlm user about lock recovery.
GFS2, and possibly others, need to be aware of when the dlm
will be doing lock recovery for a failed lockspace member.

In the past, this coordination has been done between dlm and
file system daemons in userspace, which then direct their
kernel counterparts.  These callbacks allow the same
coordination directly, and more simply.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/config.c       | 130 ++++++++++++++++++---------------
 fs/dlm/config.h       |  17 ++++-
 fs/dlm/dlm_internal.h |  21 ++----
 fs/dlm/lockspace.c    |  43 +++++++++--
 fs/dlm/member.c       | 197 ++++++++++++++++++++++++++++++++------------------
 fs/dlm/member.h       |   3 +-
 fs/dlm/recoverd.c     |  10 +--
 fs/dlm/user.c         |   5 +-
 fs/gfs2/lock_dlm.c    |   4 +-
 fs/ocfs2/stack_user.c |   4 +-
 include/linux/dlm.h   |  71 ++++++++++++++++--
 11 files changed, 333 insertions(+), 172 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 6cf72fcc0d0c..e7e327d43fa5 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/dlmconstants.h>
 #include <net/ipv6.h>
 #include <net/sock.h>
 
@@ -36,6 +37,7 @@
 static struct config_group *space_list;
 static struct config_group *comm_list;
 static struct dlm_comm *local_comm;
+static uint32_t dlm_comm_count;
 
 struct dlm_clusters;
 struct dlm_cluster;
@@ -103,6 +105,8 @@ struct dlm_cluster {
 	unsigned int cl_timewarn_cs;
 	unsigned int cl_waitwarn_us;
 	unsigned int cl_new_rsb_count;
+	unsigned int cl_recover_callbacks;
+	char cl_cluster_name[DLM_LOCKSPACE_LEN];
 };
 
 enum {
@@ -118,6 +122,8 @@ enum {
 	CLUSTER_ATTR_TIMEWARN_CS,
 	CLUSTER_ATTR_WAITWARN_US,
 	CLUSTER_ATTR_NEW_RSB_COUNT,
+	CLUSTER_ATTR_RECOVER_CALLBACKS,
+	CLUSTER_ATTR_CLUSTER_NAME,
 };
 
 struct cluster_attribute {
@@ -126,6 +132,27 @@ struct cluster_attribute {
 	ssize_t (*store)(struct dlm_cluster *, const char *, size_t);
 };
 
+static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf)
+{
+	return sprintf(buf, "%s\n", cl->cl_cluster_name);
+}
+
+static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl,
+					  const char *buf, size_t len)
+{
+	strncpy(dlm_config.ci_cluster_name, buf, DLM_LOCKSPACE_LEN);
+	strncpy(cl->cl_cluster_name, buf, DLM_LOCKSPACE_LEN);
+	return len;
+}
+
+static struct cluster_attribute cluster_attr_cluster_name = {
+	.attr   = { .ca_owner = THIS_MODULE,
+                    .ca_name = "cluster_name",
+                    .ca_mode = S_IRUGO | S_IWUSR },
+	.show   = cluster_cluster_name_read,
+	.store  = cluster_cluster_name_write,
+};
+
 static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
 			   int *info_field, int check_zero,
 			   const char *buf, size_t len)
@@ -171,6 +198,7 @@ CLUSTER_ATTR(protocol, 0);
 CLUSTER_ATTR(timewarn_cs, 1);
 CLUSTER_ATTR(waitwarn_us, 0);
 CLUSTER_ATTR(new_rsb_count, 0);
+CLUSTER_ATTR(recover_callbacks, 0);
 
 static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -185,6 +213,8 @@ static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
 	[CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
 	[CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr,
+	[CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks.attr,
+	[CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name.attr,
 	NULL,
 };
 
@@ -293,6 +323,7 @@ struct dlm_comms {
 
 struct dlm_comm {
 	struct config_item item;
+	int seq;
 	int nodeid;
 	int local;
 	int addr_count;
@@ -309,6 +340,7 @@ struct dlm_node {
 	int nodeid;
 	int weight;
 	int new;
+	int comm_seq; /* copy of cm->seq when nd->nodeid is set */
 };
 
 static struct configfs_group_operations clusters_ops = {
@@ -455,6 +487,9 @@ static struct config_group *make_cluster(struct config_group *g,
 	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
 	cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
 	cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
+	cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks;
+	memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name,
+	       DLM_LOCKSPACE_LEN);
 
 	space_list = &sps->ss_group;
 	comm_list = &cms->cs_group;
@@ -558,6 +593,11 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
 		return ERR_PTR(-ENOMEM);
 
 	config_item_init_type_name(&cm->item, name, &comm_type);
+
+	cm->seq = dlm_comm_count++;
+	if (!cm->seq)
+		cm->seq = dlm_comm_count++;
+
 	cm->nodeid = -1;
 	cm->local = 0;
 	cm->addr_count = 0;
@@ -801,7 +841,10 @@ static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf)
 static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
 				 size_t len)
 {
+	uint32_t seq = 0;
 	nd->nodeid = simple_strtol(buf, NULL, 0);
+	dlm_comm_seq(nd->nodeid, &seq);
+	nd->comm_seq = seq;
 	return len;
 }
 
@@ -908,13 +951,13 @@ static void put_comm(struct dlm_comm *cm)
 }
 
 /* caller must free mem */
-int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
-		    int **new_out, int *new_count_out)
+int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
+		     int *count_out)
 {
 	struct dlm_space *sp;
 	struct dlm_node *nd;
-	int i = 0, rv = 0, ids_count = 0, new_count = 0;
-	int *ids, *new;
+	struct dlm_config_node *nodes, *node;
+	int rv, count;
 
 	sp = get_space(lsname);
 	if (!sp)
@@ -927,73 +970,42 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
 		goto out;
 	}
 
-	ids_count = sp->members_count;
+	count = sp->members_count;
 
-	ids = kcalloc(ids_count, sizeof(int), GFP_NOFS);
-	if (!ids) {
+	nodes = kcalloc(count, sizeof(struct dlm_config_node), GFP_NOFS);
+	if (!nodes) {
 		rv = -ENOMEM;
 		goto out;
 	}
 
+	node = nodes;
 	list_for_each_entry(nd, &sp->members, list) {
-		ids[i++] = nd->nodeid;
-		if (nd->new)
-			new_count++;
-	}
-
-	if (ids_count != i)
-		printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i);
-
-	if (!new_count)
-		goto out_ids;
+		node->nodeid = nd->nodeid;
+		node->weight = nd->weight;
+		node->new = nd->new;
+		node->comm_seq = nd->comm_seq;
+		node++;
 
-	new = kcalloc(new_count, sizeof(int), GFP_NOFS);
-	if (!new) {
-		kfree(ids);
-		rv = -ENOMEM;
-		goto out;
+		nd->new = 0;
 	}
 
-	i = 0;
-	list_for_each_entry(nd, &sp->members, list) {
-		if (nd->new) {
-			new[i++] = nd->nodeid;
-			nd->new = 0;
-		}
-	}
-	*new_count_out = new_count;
-	*new_out = new;
-
- out_ids:
-	*ids_count_out = ids_count;
-	*ids_out = ids;
+	*count_out = count;
+	*nodes_out = nodes;
+	rv = 0;
  out:
 	mutex_unlock(&sp->members_lock);
 	put_space(sp);
 	return rv;
 }
 
-int dlm_node_weight(char *lsname, int nodeid)
+int dlm_comm_seq(int nodeid, uint32_t *seq)
 {
-	struct dlm_space *sp;
-	struct dlm_node *nd;
-	int w = -EEXIST;
-
-	sp = get_space(lsname);
-	if (!sp)
-		goto out;
-
-	mutex_lock(&sp->members_lock);
-	list_for_each_entry(nd, &sp->members, list) {
-		if (nd->nodeid != nodeid)
-			continue;
-		w = nd->weight;
-		break;
-	}
-	mutex_unlock(&sp->members_lock);
-	put_space(sp);
- out:
-	return w;
+	struct dlm_comm *cm = get_comm(nodeid, NULL);
+	if (!cm)
+		return -EEXIST;
+	*seq = cm->seq;
+	put_comm(cm);
+	return 0;
 }
 
 int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
@@ -1047,6 +1059,8 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 #define DEFAULT_WAITWARN_US	   0
 #define DEFAULT_NEW_RSB_COUNT    128
+#define DEFAULT_RECOVER_CALLBACKS  0
+#define DEFAULT_CLUSTER_NAME      ""
 
 struct dlm_config_info dlm_config = {
 	.ci_tcp_port = DEFAULT_TCP_PORT,
@@ -1060,6 +1074,8 @@ struct dlm_config_info dlm_config = {
 	.ci_protocol = DEFAULT_PROTOCOL,
 	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
 	.ci_waitwarn_us = DEFAULT_WAITWARN_US,
-	.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT
+	.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT,
+	.ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS,
+	.ci_cluster_name = DEFAULT_CLUSTER_NAME
 };
 
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 3099d0dd26c0..9f5e3663bb0c 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -14,6 +14,13 @@
 #ifndef __CONFIG_DOT_H__
 #define __CONFIG_DOT_H__
 
+struct dlm_config_node {
+	int nodeid;
+	int weight;
+	int new;
+	uint32_t comm_seq;
+};
+
 #define DLM_MAX_ADDR_COUNT 3
 
 struct dlm_config_info {
@@ -29,15 +36,17 @@ struct dlm_config_info {
 	int ci_timewarn_cs;
 	int ci_waitwarn_us;
 	int ci_new_rsb_count;
+	int ci_recover_callbacks;
+	char ci_cluster_name[DLM_LOCKSPACE_LEN];
 };
 
 extern struct dlm_config_info dlm_config;
 
 int dlm_config_init(void);
 void dlm_config_exit(void);
-int dlm_node_weight(char *lsname, int nodeid);
-int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
-		    int **new_out, int *new_count_out);
+int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
+		     int *count_out);
+int dlm_comm_seq(int nodeid, uint32_t *seq);
 int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
 int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
 int dlm_our_nodeid(void);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index f4d132c76908..3a564d197e99 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2010 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -119,28 +119,18 @@ struct dlm_member {
 	int			weight;
 	int			slot;
 	int			slot_prev;
+	int			comm_seq;
 	uint32_t		generation;
 };
 
-/*
- * low nodeid saves array of these in ls_slots
- */
-
-struct dlm_slot {
-	int			nodeid;
-	int			slot;
-};
-
 /*
  * Save and manage recovery state for a lockspace.
  */
 
 struct dlm_recover {
 	struct list_head	list;
-	int			*nodeids;   /* nodeids of all members */
-	int			node_count;
-	int			*new;       /* nodeids of new members */
-	int			new_count;
+	struct dlm_config_node	*nodes;
+	int			nodes_count;
 	uint64_t		seq;
 };
 
@@ -584,6 +574,9 @@ struct dlm_ls {
 	struct list_head	ls_root_list;	/* root resources */
 	struct rw_semaphore	ls_root_sem;	/* protect root_list */
 
+	const struct dlm_lockspace_ops *ls_ops;
+	void			*ls_ops_arg;
+
 	int			ls_namelen;
 	char			ls_name[1];
 };
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 1441f04bfabe..a1ea25face82 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -386,12 +386,15 @@ static void threads_stop(void)
 	dlm_lowcomms_stop();
 }
 
-static int new_lockspace(const char *name, int namelen, void **lockspace,
-			 uint32_t flags, int lvblen)
+static int new_lockspace(const char *name, const char *cluster,
+			 uint32_t flags, int lvblen,
+			 const struct dlm_lockspace_ops *ops, void *ops_arg,
+			 int *ops_result, dlm_lockspace_t **lockspace)
 {
 	struct dlm_ls *ls;
 	int i, size, error;
 	int do_unreg = 0;
+	int namelen = strlen(name);
 
 	if (namelen > DLM_LOCKSPACE_LEN)
 		return -EINVAL;
@@ -403,8 +406,24 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 		return -EINVAL;
 
 	if (!dlm_user_daemon_available()) {
-		module_put(THIS_MODULE);
-		return -EUNATCH;
+		log_print("dlm user daemon not available");
+		error = -EUNATCH;
+		goto out;
+	}
+
+	if (ops && ops_result) {
+	       	if (!dlm_config.ci_recover_callbacks)
+			*ops_result = -EOPNOTSUPP;
+		else
+			*ops_result = 0;
+	}
+
+	if (dlm_config.ci_recover_callbacks && cluster &&
+	    strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
+		log_print("dlm cluster name %s mismatch %s",
+			  dlm_config.ci_cluster_name, cluster);
+		error = -EBADR;
+		goto out;
 	}
 
 	error = 0;
@@ -442,6 +461,11 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	ls->ls_flags = 0;
 	ls->ls_scan_time = jiffies;
 
+	if (ops && dlm_config.ci_recover_callbacks) {
+		ls->ls_ops = ops;
+		ls->ls_ops_arg = ops_arg;
+	}
+
 	if (flags & DLM_LSFL_TIMEWARN)
 		set_bit(LSFL_TIMEWARN, &ls->ls_flags);
 
@@ -619,8 +643,10 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	return error;
 }
 
-int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
-		      uint32_t flags, int lvblen)
+int dlm_new_lockspace(const char *name, const char *cluster,
+		      uint32_t flags, int lvblen,
+		      const struct dlm_lockspace_ops *ops, void *ops_arg,
+		      int *ops_result, dlm_lockspace_t **lockspace)
 {
 	int error = 0;
 
@@ -630,7 +656,8 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
 	if (error)
 		goto out;
 
-	error = new_lockspace(name, namelen, lockspace, flags, lvblen);
+	error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
+			      ops_result, lockspace);
 	if (!error)
 		ls_count++;
 	if (error > 0)
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index eebc52aae82e..862640a36d5c 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005-2009 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -27,7 +27,7 @@ int dlm_slots_version(struct dlm_header *h)
 }
 
 void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc,
-		  struct dlm_member *memb)
+		   struct dlm_member *memb)
 {
 	struct rcom_config *rf = (struct rcom_config *)rc->rc_buf;
 
@@ -317,59 +317,51 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
 	}
 }
 
-static int dlm_add_member(struct dlm_ls *ls, int nodeid)
+static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node)
 {
 	struct dlm_member *memb;
-	int w, error;
+	int error;
 
 	memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
 	if (!memb)
 		return -ENOMEM;
 
-	w = dlm_node_weight(ls->ls_name, nodeid);
-	if (w < 0) {
-		kfree(memb);
-		return w;
-	}
-
-	error = dlm_lowcomms_connect_node(nodeid);
+	error = dlm_lowcomms_connect_node(node->nodeid);
 	if (error < 0) {
 		kfree(memb);
 		return error;
 	}
 
-	memb->nodeid = nodeid;
-	memb->weight = w;
+	memb->nodeid = node->nodeid;
+	memb->weight = node->weight;
+	memb->comm_seq = node->comm_seq;
 	add_ordered_member(ls, memb);
 	ls->ls_num_nodes++;
 	return 0;
 }
 
-static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb)
-{
-	list_move(&memb->list, &ls->ls_nodes_gone);
-	ls->ls_num_nodes--;
-}
-
-int dlm_is_member(struct dlm_ls *ls, int nodeid)
+static struct dlm_member *find_memb(struct list_head *head, int nodeid)
 {
 	struct dlm_member *memb;
 
-	list_for_each_entry(memb, &ls->ls_nodes, list) {
+	list_for_each_entry(memb, head, list) {
 		if (memb->nodeid == nodeid)
-			return 1;
+			return memb;
 	}
+	return NULL;
+}
+
+int dlm_is_member(struct dlm_ls *ls, int nodeid)
+{
+	if (find_memb(&ls->ls_nodes, nodeid))
+		return 1;
 	return 0;
 }
 
 int dlm_is_removed(struct dlm_ls *ls, int nodeid)
 {
-	struct dlm_member *memb;
-
-	list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
-		if (memb->nodeid == nodeid)
-			return 1;
-	}
+	if (find_memb(&ls->ls_nodes_gone, nodeid))
+		return 1;
 	return 0;
 }
 
@@ -460,10 +452,88 @@ static int ping_members(struct dlm_ls *ls)
 	return error;
 }
 
+static void dlm_lsop_recover_prep(struct dlm_ls *ls)
+{
+	if (!ls->ls_ops || !ls->ls_ops->recover_prep)
+		return;
+	ls->ls_ops->recover_prep(ls->ls_ops_arg);
+}
+
+static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
+{
+	struct dlm_slot slot;
+	uint32_t seq;
+	int error;
+
+	if (!ls->ls_ops || !ls->ls_ops->recover_slot)
+		return;
+
+	/* if there is no comms connection with this node
+	   or the present comms connection is newer
+	   than the one when this member was added, then
+	   we consider the node to have failed (versus
+	   being removed due to dlm_release_lockspace) */
+
+	error = dlm_comm_seq(memb->nodeid, &seq);
+
+	if (!error && seq == memb->comm_seq)
+		return;
+
+	slot.nodeid = memb->nodeid;
+	slot.slot = memb->slot;
+
+	ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot);
+}
+
+void dlm_lsop_recover_done(struct dlm_ls *ls)
+{
+	struct dlm_member *memb;
+	struct dlm_slot *slots;
+	int i, num;
+
+	if (!ls->ls_ops || !ls->ls_ops->recover_done)
+		return;
+
+	num = ls->ls_num_nodes;
+
+	slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL);
+	if (!slots)
+		return;
+
+	i = 0;
+	list_for_each_entry(memb, &ls->ls_nodes, list) {
+		if (i == num) {
+			log_error(ls, "dlm_lsop_recover_done bad num %d", num);
+			goto out;
+		}
+		slots[i].nodeid = memb->nodeid;
+		slots[i].slot = memb->slot;
+		i++;
+	}
+
+	ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num,
+				 ls->ls_slot, ls->ls_generation);
+ out:
+	kfree(slots);
+}
+
+static struct dlm_config_node *find_config_node(struct dlm_recover *rv,
+						int nodeid)
+{
+	int i;
+
+	for (i = 0; i < rv->nodes_count; i++) {
+		if (rv->nodes[i].nodeid == nodeid)
+			return &rv->nodes[i];
+	}
+	return NULL;
+}
+
 int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 {
 	struct dlm_member *memb, *safe;
-	int i, error, found, pos = 0, neg = 0, low = -1;
+	struct dlm_config_node *node;
+	int i, error, neg = 0, low = -1;
 
 	/* previously removed members that we've not finished removing need to
 	   count as a negative change so the "neg" recovery steps will happen */
@@ -476,46 +546,32 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 	/* move departed members from ls_nodes to ls_nodes_gone */
 
 	list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
-		found = 0;
-		for (i = 0; i < rv->node_count; i++) {
-			if (memb->nodeid == rv->nodeids[i]) {
-				found = 1;
-				break;
-			}
-		}
+		node = find_config_node(rv, memb->nodeid);
+		if (node && !node->new)
+			continue;
 
-		if (!found) {
-			neg++;
-			dlm_remove_member(ls, memb);
+		if (!node) {
 			log_debug(ls, "remove member %d", memb->nodeid);
+		} else {
+			/* removed and re-added */
+			log_debug(ls, "remove member %d comm_seq %u %u",
+				  memb->nodeid, memb->comm_seq, node->comm_seq);
 		}
-	}
-
-	/* Add an entry to ls_nodes_gone for members that were removed and
-	   then added again, so that previous state for these nodes will be
-	   cleared during recovery. */
 
-	for (i = 0; i < rv->new_count; i++) {
-		if (!dlm_is_member(ls, rv->new[i]))
-			continue;
-		log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
-
-		memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
-		if (!memb)
-			return -ENOMEM;
-		memb->nodeid = rv->new[i];
-		list_add_tail(&memb->list, &ls->ls_nodes_gone);
 		neg++;
+		list_move(&memb->list, &ls->ls_nodes_gone);
+		ls->ls_num_nodes--;
+		dlm_lsop_recover_slot(ls, memb);
 	}
 
 	/* add new members to ls_nodes */
 
-	for (i = 0; i < rv->node_count; i++) {
-		if (dlm_is_member(ls, rv->nodeids[i]))
+	for (i = 0; i < rv->nodes_count; i++) {
+		node = &rv->nodes[i];
+		if (dlm_is_member(ls, node->nodeid))
 			continue;
-		dlm_add_member(ls, rv->nodeids[i]);
-		pos++;
-		log_debug(ls, "add member %d", rv->nodeids[i]);
+		dlm_add_member(ls, node);
+		log_debug(ls, "add member %d", node->nodeid);
 	}
 
 	list_for_each_entry(memb, &ls->ls_nodes, list) {
@@ -609,21 +665,22 @@ int dlm_ls_stop(struct dlm_ls *ls)
 
 	if (!ls->ls_recover_begin)
 		ls->ls_recover_begin = jiffies;
+
+	dlm_lsop_recover_prep(ls);
 	return 0;
 }
 
 int dlm_ls_start(struct dlm_ls *ls)
 {
 	struct dlm_recover *rv = NULL, *rv_old;
-	int *ids = NULL, *new = NULL;
-	int error, ids_count = 0, new_count = 0;
+	struct dlm_config_node *nodes;
+	int error, count;
 
 	rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS);
 	if (!rv)
 		return -ENOMEM;
 
-	error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count,
-				&new, &new_count);
+	error = dlm_config_nodes(ls->ls_name, &nodes, &count);
 	if (error < 0)
 		goto fail;
 
@@ -638,10 +695,8 @@ int dlm_ls_start(struct dlm_ls *ls)
 		goto fail;
 	}
 
-	rv->nodeids = ids;
-	rv->node_count = ids_count;
-	rv->new = new;
-	rv->new_count = new_count;
+	rv->nodes = nodes;
+	rv->nodes_count = count;
 	rv->seq = ++ls->ls_recover_seq;
 	rv_old = ls->ls_recover_args;
 	ls->ls_recover_args = rv;
@@ -649,9 +704,8 @@ int dlm_ls_start(struct dlm_ls *ls)
 
 	if (rv_old) {
 		log_error(ls, "unused recovery %llx %d",
-			  (unsigned long long)rv_old->seq, rv_old->node_count);
-		kfree(rv_old->nodeids);
-		kfree(rv_old->new);
+			  (unsigned long long)rv_old->seq, rv_old->nodes_count);
+		kfree(rv_old->nodes);
 		kfree(rv_old);
 	}
 
@@ -660,8 +714,7 @@ int dlm_ls_start(struct dlm_ls *ls)
 
  fail:
 	kfree(rv);
-	kfree(ids);
-	kfree(new);
+	kfree(nodes);
 	return error;
 }
 
diff --git a/fs/dlm/member.h b/fs/dlm/member.h
index 7e87e8a79dfd..3deb70661c69 100644
--- a/fs/dlm/member.h
+++ b/fs/dlm/member.h
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -27,6 +27,7 @@ void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_slots_copy_in(struct dlm_ls *ls);
 int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
 		     struct dlm_slot **slots_out, uint32_t *gen_out);
+void dlm_lsop_recover_done(struct dlm_ls *ls);
 
 #endif                          /* __MEMBER_DOT_H__ */
 
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 5a9e1a49a860..3780caf7ae0c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -227,11 +227,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_grant_after_purge(ls);
 
-	log_debug(ls, "dlm_recover %llx done: %u ms",
-		  (unsigned long long)rv->seq,
+	log_debug(ls, "dlm_recover %llx generation %u done: %u ms",
+		  (unsigned long long)rv->seq, ls->ls_generation,
 		  jiffies_to_msecs(jiffies - start));
 	mutex_unlock(&ls->ls_recoverd_active);
 
+	dlm_lsop_recover_done(ls);
 	return 0;
 
  fail:
@@ -259,8 +260,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
 
 	if (rv) {
 		ls_recover(ls, rv);
-		kfree(rv->nodeids);
-		kfree(rv->new);
+		kfree(rv->nodes);
 		kfree(rv);
 	}
 }
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index d8ea60756403..eb4ed9ba3098 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -392,8 +392,9 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	error = dlm_new_lockspace(params->name, strlen(params->name),
-				  &lockspace, params->flags, DLM_USER_LVB_LEN);
+	error = dlm_new_lockspace(params->name, NULL, params->flags,
+				  DLM_USER_LVB_LEN, NULL, NULL, NULL,
+				  &lockspace);
 	if (error)
 		return error;
 
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 98c80d8c2a62..ce85b62bc0a2 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -195,10 +195,10 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname)
 		return -EINVAL;
 	}
 
-	error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm,
+	error = dlm_new_lockspace(fsname, NULL, 
 				  DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
 				  (ls->ls_nodir ? DLM_LSFL_NODIR : 0),
-				  GDLM_LVB_SIZE);
+				  GDLM_LVB_SIZE, NULL, NULL, NULL, &ls->ls_dlm);
 	if (error)
 		printk(KERN_ERR "dlm_new_lockspace error %d", error);
 
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index a5ebe421195f..286edf1e231f 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -827,8 +827,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
 		goto out;
 	}
 
-	rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name),
-			       &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN);
+	rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
+			       NULL, NULL, NULL, &fsdlm);
 	if (rc) {
 		ocfs2_live_connection_drop(control);
 		goto out;
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index d4e02f5353a0..6c7f6e9546c7 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -74,15 +74,76 @@ struct dlm_lksb {
 
 #ifdef __KERNEL__
 
+struct dlm_slot {
+	int nodeid; /* 1 to MAX_INT */
+	int slot;   /* 1 to MAX_INT */
+};
+
+/*
+ * recover_prep: called before the dlm begins lock recovery.
+ *   Notfies lockspace user that locks from failed members will be granted.
+ * recover_slot: called after recover_prep and before recover_done.
+ *   Identifies a failed lockspace member.
+ * recover_done: called after the dlm completes lock recovery.
+ *   Identifies lockspace members and lockspace generation number.
+ */
+
+struct dlm_lockspace_ops {
+	void (*recover_prep) (void *ops_arg);
+	void (*recover_slot) (void *ops_arg, struct dlm_slot *slot);
+	void (*recover_done) (void *ops_arg, struct dlm_slot *slots,
+			      int num_slots, int our_slot, uint32_t generation);
+};
+
 /*
  * dlm_new_lockspace
  *
- * Starts a lockspace with the given name.  If the named lockspace exists in
- * the cluster, the calling node joins it.
+ * Create/join a lockspace.
+ *
+ * name: lockspace name, null terminated, up to DLM_LOCKSPACE_LEN (not
+ *   including terminating null).
+ *
+ * cluster: cluster name, null terminated, up to DLM_LOCKSPACE_LEN (not
+ *   including terminating null).  Optional.  When cluster is null, it
+ *   is not used.  When set, dlm_new_lockspace() returns -EBADR if cluster
+ *   is not equal to the dlm cluster name.
+ *
+ * flags:
+ * DLM_LSFL_NODIR
+ *   The dlm should not use a resource directory, but statically assign
+ *   resource mastery to nodes based on the name hash that is otherwise
+ *   used to select the directory node.  Must be the same on all nodes.
+ * DLM_LSFL_TIMEWARN
+ *   The dlm should emit netlink messages if locks have been waiting
+ *   for a configurable amount of time.  (Unused.)
+ * DLM_LSFL_FS
+ *   The lockspace user is in the kernel (i.e. filesystem).  Enables
+ *   direct bast/cast callbacks.
+ * DLM_LSFL_NEWEXCL
+ *   dlm_new_lockspace() should return -EEXIST if the lockspace exists.
+ *
+ * lvblen: length of lvb in bytes.  Must be multiple of 8.
+ *   dlm_new_lockspace() returns an error if this does not match
+ *   what other nodes are using.
+ *
+ * ops: callbacks that indicate lockspace recovery points so the
+ *   caller can coordinate its recovery and know lockspace members.
+ *   This is only used by the initial dlm_new_lockspace() call.
+ *   Optional.
+ *
+ * ops_arg: arg for ops callbacks.
+ *
+ * ops_result: tells caller if the ops callbacks (if provided) will
+ *   be used or not.  0: will be used, -EXXX will not be used.
+ *   -EOPNOTSUPP: the dlm does not have recovery_callbacks enabled.
+ *
+ * lockspace: handle for dlm functions
  */
 
-int dlm_new_lockspace(const char *name, int namelen,
-		      dlm_lockspace_t **lockspace, uint32_t flags, int lvblen);
+int dlm_new_lockspace(const char *name, const char *cluster,
+		      uint32_t flags, int lvblen,
+		      const struct dlm_lockspace_ops *ops, void *ops_arg,
+		      int *ops_result, dlm_lockspace_t **lockspace);
 
 /*
  * dlm_release_lockspace
-- 
cgit v1.2.3


From f2ab2ba09e081fbce068c0adc205ad3f25a3b626 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Fri, 9 Dec 2011 14:11:41 -0600
Subject: gpio: pl061: convert to use 0 for no irq

We don't want drivers using NO_IRQ, so remove its use. For now, 0 or
-1 means no irq until platforms are converted to use 0.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Linus Walleij <linus.ml.walleij@gmail.com>
---
 drivers/gpio/gpio-pl061.c  | 8 ++++----
 include/linux/amba/pl061.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index 0f718f9bbd8c..fe19dec4b117 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -53,7 +53,7 @@ struct pl061_gpio {
 	spinlock_t		irq_lock;	/* IRQ registers */
 
 	void __iomem		*base;
-	unsigned		irq_base;
+	int			irq_base;
 	struct gpio_chip	gc;
 };
 
@@ -119,7 +119,7 @@ static int pl061_to_irq(struct gpio_chip *gc, unsigned offset)
 {
 	struct pl061_gpio *chip = container_of(gc, struct pl061_gpio, gc);
 
-	if (chip->irq_base == NO_IRQ)
+	if (chip->irq_base <= 0)
 		return -EINVAL;
 
 	return chip->irq_base + offset;
@@ -250,7 +250,7 @@ static int pl061_probe(struct amba_device *dev, const struct amba_id *id)
 		chip->irq_base = pdata->irq_base;
 	} else if (dev->dev.of_node) {
 		chip->gc.base = -1;
-		chip->irq_base = NO_IRQ;
+		chip->irq_base = 0;
 	} else {
 		ret = -ENODEV;
 		goto free_mem;
@@ -290,7 +290,7 @@ static int pl061_probe(struct amba_device *dev, const struct amba_id *id)
 	 * irq_chip support
 	 */
 
-	if (chip->irq_base == NO_IRQ)
+	if (chip->irq_base <= 0)
 		return 0;
 
 	writeb(0, chip->base + GPIOIE); /* disable irqs */
diff --git a/include/linux/amba/pl061.h b/include/linux/amba/pl061.h
index 2412af944f1f..fb83c0453489 100644
--- a/include/linux/amba/pl061.h
+++ b/include/linux/amba/pl061.h
@@ -7,7 +7,7 @@ struct pl061_platform_data {
 	unsigned	gpio_base;
 
 	/* number of the first IRQ.
-	 * If the IRQ functionality in not desired this must be set to NO_IRQ.
+	 * If the IRQ functionality in not desired this must be set to 0.
 	 */
 	unsigned	irq_base;
 
-- 
cgit v1.2.3


From 55664f324c2a1a6386dc88492c5c94aa3d336b93 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 3 Jan 2012 12:04:51 +0000
Subject: ethtool: Allow drivers to select RX NFC rule locations

Define special location values for RX NFC that request the driver to
select the actual rule location.  This allows for implementation on
devices that use hash-based filter lookup, whereas currently the API is
more suited to devices with TCAM lookup or linear search.

In ethtool_set_rxnfc() and the compat wrapper ethtool_ioctl(), copy
the structure back to user-space after insertion so that the actual
location is returned.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 26 ++++++++++++++++++++++++--
 net/core/ethtool.c      | 11 ++++++++++-
 net/socket.c            |  2 +-
 3 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b38bf69310ee..d901714120a3 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -489,7 +489,10 @@ struct ethtool_rx_flow_spec {
  * on return.
  *
  * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined
- * rules on return.
+ * rules on return.  If @data is non-zero on return then it is the
+ * size of the rule table, plus the flag %RX_CLS_LOC_SPECIAL if the
+ * driver supports any special location values.  If that flag is not
+ * set in @data then special location values should not be used.
  *
  * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an
  * existing rule on entry and @fs contains the rule on return.
@@ -501,10 +504,23 @@ struct ethtool_rx_flow_spec {
  * must use the second parameter to get_rxnfc() instead of @rule_locs.
  *
  * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update.
- * @fs.@location specifies the location to use and must not be ignored.
+ * @fs.@location either specifies the location to use or is a special
+ * location value with %RX_CLS_LOC_SPECIAL flag set.  On return,
+ * @fs.@location is the actual rule location.
  *
  * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an
  * existing rule on entry.
+ *
+ * A driver supporting the special location values for
+ * %ETHTOOL_SRXCLSRLINS may add the rule at any suitable unused
+ * location, and may remove a rule at a later location (lower
+ * priority) that matches exactly the same set of flows.  The special
+ * values are: %RX_CLS_LOC_ANY, selecting any location;
+ * %RX_CLS_LOC_FIRST, selecting the first suitable location (maximum
+ * priority); and %RX_CLS_LOC_LAST, selecting the last suitable
+ * location (minimum priority).  Additional special values may be
+ * defined in future and drivers must return -%EINVAL for any
+ * unrecognised value.
  */
 struct ethtool_rxnfc {
 	__u32				cmd;
@@ -1141,6 +1157,12 @@ struct ethtool_ops {
 
 #define	RX_CLS_FLOW_DISC	0xffffffffffffffffULL
 
+/* Special RX classification rule insert location values */
+#define RX_CLS_LOC_SPECIAL	0x80000000	/* flag */
+#define RX_CLS_LOC_ANY		0xffffffff
+#define RX_CLS_LOC_FIRST	0xfffffffe
+#define RX_CLS_LOC_LAST		0xfffffffd
+
 /* Reset flags */
 /* The reset() operation must clear the flags for the components which
  * were actually reset.  On successful return, the flags indicate the
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 597732c989ca..e88b80d41f73 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -439,6 +439,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
 {
 	struct ethtool_rxnfc info;
 	size_t info_size = sizeof(info);
+	int rc;
 
 	if (!dev->ethtool_ops->set_rxnfc)
 		return -EOPNOTSUPP;
@@ -454,7 +455,15 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
 	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_rxnfc(dev, &info);
+	rc = dev->ethtool_ops->set_rxnfc(dev, &info);
+	if (rc)
+		return rc;
+
+	if (cmd == ETHTOOL_SRXCLSRLINS &&
+	    copy_to_user(useraddr, &info, info_size))
+		return -EFAULT;
+
+	return 0;
 }
 
 static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
diff --git a/net/socket.c b/net/socket.c
index e62b4f055071..2cad581318fe 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2758,10 +2758,10 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 	case ETHTOOL_GRXRINGS:
 	case ETHTOOL_GRXCLSRLCNT:
 	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_SRXCLSRLINS:
 		convert_out = true;
 		/* fall through */
 	case ETHTOOL_SRXCLSRLDEL:
-	case ETHTOOL_SRXCLSRLINS:
 		buf_size += sizeof(struct ethtool_rxnfc);
 		convert_in = true;
 		break;
-- 
cgit v1.2.3


From 6cfb5e759d47f037cbd0953ec2c3ceb220ed9e96 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 3 Jan 2012 12:07:59 +0000
Subject: ethtool: Remove ethtool_ops::set_rx_ntuple operation

All implementations have been converted to implement set_rxnfc
instead.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  4 ----
 net/core/ethtool.c      | 55 -------------------------------------------------
 2 files changed, 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index d901714120a3..da5b2de99ae4 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -859,8 +859,6 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  * @reset: Reset (part of) the device, as specified by a bitmask of
  *	flags from &enum ethtool_reset_flags.  Returns a negative
  *	error code or zero.
- * @set_rx_ntuple: Set an RX n-tuple rule.  Returns a negative error code
- *	or zero.
  * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
  *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
@@ -929,8 +927,6 @@ struct ethtool_ops {
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 	int	(*flash_device)(struct net_device *, struct ethtool_flash *);
 	int	(*reset)(struct net_device *, u32 *);
-	int	(*set_rx_ntuple)(struct net_device *,
-				 struct ethtool_rx_ntuple *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
 	int	(*get_rxfh_indir)(struct net_device *, u32 *);
 	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index e88b80d41f73..921aa2b4b415 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -631,58 +631,6 @@ out:
 	return ret;
 }
 
-/*
- * ethtool does not (or did not) set masks for flow parameters that are
- * not specified, so if both value and mask are 0 then this must be
- * treated as equivalent to a mask with all bits set.  Implement that
- * here rather than in drivers.
- */
-static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
-{
-	struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
-	struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
-
-	if (fs->flow_type != TCP_V4_FLOW &&
-	    fs->flow_type != UDP_V4_FLOW &&
-	    fs->flow_type != SCTP_V4_FLOW)
-		return;
-
-	if (!(entry->ip4src | mask->ip4src))
-		mask->ip4src = htonl(0xffffffff);
-	if (!(entry->ip4dst | mask->ip4dst))
-		mask->ip4dst = htonl(0xffffffff);
-	if (!(entry->psrc | mask->psrc))
-		mask->psrc = htons(0xffff);
-	if (!(entry->pdst | mask->pdst))
-		mask->pdst = htons(0xffff);
-	if (!(entry->tos | mask->tos))
-		mask->tos = 0xff;
-	if (!(fs->vlan_tag | fs->vlan_tag_mask))
-		fs->vlan_tag_mask = 0xffff;
-	if (!(fs->data | fs->data_mask))
-		fs->data_mask = 0xffffffffffffffffULL;
-}
-
-static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
-						    void __user *useraddr)
-{
-	struct ethtool_rx_ntuple cmd;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops->set_rx_ntuple)
-		return -EOPNOTSUPP;
-
-	if (!(dev->features & NETIF_F_NTUPLE))
-		return -EINVAL;
-
-	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
-		return -EFAULT;
-
-	rx_ntuple_fix_masks(&cmd.fs);
-
-	return ops->set_rx_ntuple(dev, &cmd);
-}
-
 static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_regs regs;
@@ -1495,9 +1443,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_RESET:
 		rc = ethtool_reset(dev, useraddr);
 		break;
-	case ETHTOOL_SRXNTUPLE:
-		rc = ethtool_set_rx_ntuple(dev, useraddr);
-		break;
 	case ETHTOOL_GSSET_INFO:
 		rc = ethtool_get_sset_info(dev, useraddr);
 		break;
-- 
cgit v1.2.3


From cd7d494d0b23673215330963c28138dd0c3fd405 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sun, 1 Jan 2012 16:11:17 -0300
Subject: [media] dvb: deprecate the usage of ops->info.type

Mark info.type as deprecated inside the header, recommending
the usage of DTV_ENUM_DELSYS DVBv5 command instead.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/dvb/frontend.xml | 4 ++++
 include/linux/dvb/frontend.h                 | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 28d7ea5d5e73..aeaed59d0f1f 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -63,6 +63,10 @@ transmission. The fontend types are given by fe_type_t type, defined as:</para>
 <para>Newer formats like DVB-S2, ISDB-T, ISDB-S and DVB-T2 are not described at the above, as they're
 supported via the new <link linkend="FE_GET_SET_PROPERTY">FE_GET_PROPERTY/FE_GET_SET_PROPERTY</link> ioctl's, using the <link linkend="DTV-DELIVERY-SYSTEM">DTV_DELIVERY_SYSTEM</link> parameter.
 </para>
+
+<para>The usage of this field is deprecated, as it doesn't report all supported standards, and
+will provide an incomplete information for frontends that support multiple delivery systems.
+Please use <link linkend="DTV_ENUM_DELSYS">DTV_ENUM_DELSYS</link> instead.</para>
 </section>
 
 <section id="fe-caps-t">
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 7e7cb64f56d8..cb4428ab81ed 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -72,7 +72,7 @@ typedef enum fe_caps {
 
 struct dvb_frontend_info {
 	char       name[128];
-	fe_type_t  type;
+	fe_type_t  type;			/* DEPRECATED. Use DTV_ENUM_DELSYS instead */
 	__u32      frequency_min;
 	__u32      frequency_max;
 	__u32      frequency_stepsize;
-- 
cgit v1.2.3


From 288e0713f469c03dbc412153b5341d6dfc2c9907 Mon Sep 17 00:00:00 2001
From: Ilan Elias <ilane@ti.com>
Date: Thu, 22 Dec 2011 11:51:54 +0200
Subject: NFC: Export a new attribute nfcid1 in target info

The nfcid1 is the NFC-A identifier.
It is exported as an attribute of the target info
(returned as a response to NFC_CMD_GET_TARGET).

Signed-off-by: Ilan Elias <ilane@ti.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nfc.h   | 2 ++
 include/net/nfc/nfc.h | 3 +++
 net/nfc/nci/ntf.c     | 6 ++++++
 net/nfc/netlink.c     | 3 +++
 4 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfc.h b/include/linux/nfc.h
index 89fee4ab1904..01d4e5d60325 100644
--- a/include/linux/nfc.h
+++ b/include/linux/nfc.h
@@ -88,6 +88,7 @@ enum nfc_commands {
  * @NFC_ATTR_TARGET_SENS_RES: NFC-A targets extra information such as NFCID
  * @NFC_ATTR_TARGET_SEL_RES: NFC-A targets extra information (useful if the
  *	target is not NFC-Forum compliant)
+ * @NFC_ATTR_TARGET_NFCID1: NFC-A targets identifier, max 10 bytes
  * @NFC_ATTR_COMM_MODE: Passive or active mode
  * @NFC_ATTR_RF_MODE: Initiator or target
  */
@@ -99,6 +100,7 @@ enum nfc_attrs {
 	NFC_ATTR_TARGET_INDEX,
 	NFC_ATTR_TARGET_SENS_RES,
 	NFC_ATTR_TARGET_SEL_RES,
+	NFC_ATTR_TARGET_NFCID1,
 	NFC_ATTR_COMM_MODE,
 	NFC_ATTR_RF_MODE,
 /* private: internal use only */
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index ccfe757a94ec..8696b773a695 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -65,12 +65,15 @@ struct nfc_ops {
 
 #define NFC_TARGET_IDX_ANY -1
 #define NFC_MAX_GT_LEN 48
+#define NFC_MAX_NFCID1_LEN 10
 
 struct nfc_target {
 	u32 idx;
 	u32 supported_protocols;
 	u16 sens_res;
 	u8 sel_res;
+	u8 nfcid1_len;
+	u8 nfcid1[NFC_MAX_NFCID1_LEN];
 };
 
 struct nfc_genl_data {
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 352f7a2321d9..b16a8dc2afbe 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -154,6 +154,12 @@ static void nci_target_found(struct nci_dev *ndev,
 
 	nfc_tgt.sens_res = ntf->rf_tech_specific_params.nfca_poll.sens_res;
 	nfc_tgt.sel_res = ntf->rf_tech_specific_params.nfca_poll.sel_res;
+	nfc_tgt.nfcid1_len = ntf->rf_tech_specific_params.nfca_poll.nfcid1_len;
+	if (nfc_tgt.nfcid1_len > 0) {
+		memcpy(nfc_tgt.nfcid1,
+			ntf->rf_tech_specific_params.nfca_poll.nfcid1,
+			nfc_tgt.nfcid1_len);
+	}
 
 	if (!(nfc_tgt.supported_protocols & ndev->poll_prots)) {
 		pr_debug("the target found does not have the desired protocol\n");
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 43a1c47756a7..6989dfa28ee2 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -67,6 +67,9 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
 				target->supported_protocols);
 	NLA_PUT_U16(msg, NFC_ATTR_TARGET_SENS_RES, target->sens_res);
 	NLA_PUT_U8(msg, NFC_ATTR_TARGET_SEL_RES, target->sel_res);
+	if (target->nfcid1_len > 0)
+		NLA_PUT(msg, NFC_ATTR_TARGET_NFCID1, target->nfcid1_len,
+				target->nfcid1);
 
 	return genlmsg_end(msg, hdr);
 
-- 
cgit v1.2.3


From b9d4e714a86a4e88c2f530c76597f7025e5851d6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 4 Jan 2012 15:05:10 -0800
Subject: driver core: remove __must_check from device_create_file

With the conversion of the sysdev to a real struct device, more drivers
are calling device_create_file, and some of them don't check the return
value, which isn't wise.

But as they happen to be in parts of the kernel where a warning is
considered an error (i.e. powerpc), this breaks the build.  So for now,
remove the marking on the function, which fixes the build problems.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 7f9fc1505e94..acf505e4fe94 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -510,8 +510,8 @@ ssize_t device_store_int(struct device *dev, struct device_attribute *attr,
 	struct dev_ext_attribute dev_attr_##_name = \
 		{ __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) }
 
-extern int __must_check device_create_file(struct device *device,
-					const struct device_attribute *entry);
+extern int device_create_file(struct device *device,
+			      const struct device_attribute *entry);
 extern void device_remove_file(struct device *dev,
 			       const struct device_attribute *attr);
 extern int __must_check device_create_bin_file(struct device *dev,
-- 
cgit v1.2.3


From 18b7ede5f7ee2092aedcb578d3ac30bd5d4fc23c Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 2 Jan 2012 13:35:41 +0200
Subject: usb: ch9: fix up MaxStreams helper

According to USB 3.0 Specification Table 9-22, if
bmAttributes [4:0] are set to zero, it means "no
streams supported", but the way this helper was
defined on Linux, we will *always* have one stream
which might cause several problems.

For example on DWC3, we would tell the controller
endpoint has streams enabled and yet start transfers
with Stream ID set to 0, which would goof up the host
side.

While doing that, convert the macro to an inline
function due to the different checks we now need.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/dwc3/gadget.c |  3 +--
 drivers/usb/host/xhci.c   |  3 +--
 include/linux/usb/ch9.h   | 20 +++++++++++++++++++-
 3 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 4c6bedad51fd..a696bde53222 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -297,8 +297,7 @@ static int dwc3_gadget_set_ep_config(struct dwc3 *dwc, struct dwc3_ep *dep,
 	params.param1 = DWC3_DEPCFG_XFER_COMPLETE_EN
 		| DWC3_DEPCFG_XFER_NOT_READY_EN;
 
-	if (comp_desc && USB_SS_MAX_STREAMS(comp_desc->bmAttributes)
-			&& usb_endpoint_xfer_bulk(desc)) {
+	if (usb_ss_max_streams(comp_desc) && usb_endpoint_xfer_bulk(desc)) {
 		params.param1 |= DWC3_DEPCFG_STREAM_CAPABLE
 			| DWC3_DEPCFG_STREAM_EVENT_EN;
 		dep->stream_capable = true;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index f3d0b8d96440..dda84756c465 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -2799,8 +2799,7 @@ static int xhci_calculate_streams_and_bitmask(struct xhci_hcd *xhci,
 		if (ret < 0)
 			return ret;
 
-		max_streams = USB_SS_MAX_STREAMS(
-				eps[i]->ss_ep_comp.bmAttributes);
+		max_streams = usb_ss_max_streams(&eps[i]->ss_ep_comp);
 		if (max_streams < (*num_streams - 1)) {
 			xhci_dbg(xhci, "Ep 0x%x only supports %u stream IDs.\n",
 					eps[i]->desc.bEndpointAddress,
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index d5da6c68c250..61b29057b054 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -605,8 +605,26 @@ struct usb_ss_ep_comp_descriptor {
 } __attribute__ ((packed));
 
 #define USB_DT_SS_EP_COMP_SIZE		6
+
 /* Bits 4:0 of bmAttributes if this is a bulk endpoint */
-#define USB_SS_MAX_STREAMS(p)		(1 << ((p) & 0x1f))
+static inline int
+usb_ss_max_streams(const struct usb_ss_ep_comp_descriptor *comp)
+{
+	int		max_streams;
+
+	if (!comp)
+		return 0;
+
+	max_streams = comp->bmAttributes & 0x1f;
+
+	if (!max_streams)
+		return 0;
+
+	max_streams = 1 << max_streams;
+
+	return max_streams;
+}
+
 /* Bits 1:0 of bmAttributes if this is an isoc endpoint */
 #define USB_SS_MULT(p)			(1 + ((p) & 0x3))
 
-- 
cgit v1.2.3


From 4d447c9a6ebc0142d320f075c5bac6d202a79fd4 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu, 15 Dec 2011 13:45:17 -0800
Subject: net/hyperv: Add support for jumbo frame up to 64KB

Allow the user set the MTU up to 65536 for Linux guests running on
Hyper-V 2008 R2 or later.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/net/hyperv/hyperv_net.h |  8 ++---
 drivers/net/hyperv/netvsc.c     |  6 ++--
 drivers/net/hyperv/netvsc_drv.c | 70 +++++++++++++++++++++++++++++++++++------
 include/linux/hyperv.h          |  2 +-
 4 files changed, 68 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 287767055125..dec5836ae075 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -456,12 +456,9 @@ struct nvsp_message {
 } __packed;
 
 
+#define NETVSC_MTU 65536
 
-
-/* #define NVSC_MIN_PROTOCOL_VERSION		1 */
-/* #define NVSC_MAX_PROTOCOL_VERSION		1 */
-
-#define NETVSC_RECEIVE_BUFFER_SIZE		(1024*1024)	/* 1MB */
+#define NETVSC_RECEIVE_BUFFER_SIZE		(1024*1024*2)	/* 2MB */
 
 #define NETVSC_RECEIVE_BUFFER_ID		0xcafe
 
@@ -479,6 +476,7 @@ struct netvsc_device {
 	u32 nvsp_version;
 
 	atomic_t num_outstanding_sends;
+	bool start_remove;
 	bool destroy;
 	/*
 	 * List of free preallocated hv_netvsc_packet to represent receive
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 46828b4dd8ab..8965b45ce5a5 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -42,7 +42,7 @@ static struct netvsc_device *alloc_net_device(struct hv_device *device)
 	if (!net_device)
 		return NULL;
 
-
+	net_device->start_remove = false;
 	net_device->destroy = false;
 	net_device->dev = device;
 	net_device->ndev = ndev;
@@ -299,7 +299,7 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 	/* NVSPv2 only: Send NDIS config */
 	memset(init_packet, 0, sizeof(struct nvsp_message));
 	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
-	init_packet->msg.v2_msg.send_ndis_config.mtu = ETH_DATA_LEN;
+	init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu;
 
 	ret = vmbus_sendpacket(device->channel, init_packet,
 				sizeof(struct nvsp_message),
@@ -464,7 +464,7 @@ static void netvsc_send_completion(struct hv_device *device,
 
 		atomic_dec(&net_device->num_outstanding_sends);
 
-		if (netif_queue_stopped(ndev))
+		if (netif_queue_stopped(ndev) && !net_device->start_remove)
 			netif_wake_queue(ndev);
 	} else {
 		netdev_err(ndev, "Unknown send completion packet type- "
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index b7cbd126f20a..462d05f05e84 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -148,10 +148,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	struct net_device_context *net_device_ctx = netdev_priv(net);
 	struct hv_netvsc_packet *packet;
 	int ret;
-	unsigned int i, num_pages;
+	unsigned int i, num_pages, npg_data;
 
-	/* Add 1 for skb->data and additional one for RNDIS */
-	num_pages = skb_shinfo(skb)->nr_frags + 1 + 1;
+	/* Add multipage for skb->data and additional one for RNDIS */
+	npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
+		>> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
+	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 1;
 
 	/* Allocate a netvsc packet based on # of frags. */
 	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
@@ -174,21 +176,36 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	packet->page_buf_cnt = num_pages;
 
 	/* Initialize it from the skb */
-	packet->total_data_buflen	= skb->len;
+	packet->total_data_buflen = skb->len;
 
 	/* Start filling in the page buffers starting after RNDIS buffer. */
 	packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT;
 	packet->page_buf[1].offset
 		= (unsigned long)skb->data & (PAGE_SIZE - 1);
-	packet->page_buf[1].len = skb_headlen(skb);
+	if (npg_data == 1)
+		packet->page_buf[1].len = skb_headlen(skb);
+	else
+		packet->page_buf[1].len = PAGE_SIZE
+			- packet->page_buf[1].offset;
+
+	for (i = 2; i <= npg_data; i++) {
+		packet->page_buf[i].pfn = virt_to_phys(skb->data
+			+ PAGE_SIZE * (i-1)) >> PAGE_SHIFT;
+		packet->page_buf[i].offset = 0;
+		packet->page_buf[i].len = PAGE_SIZE;
+	}
+	if (npg_data > 1)
+		packet->page_buf[npg_data].len = (((unsigned long)skb->data
+			+ skb_headlen(skb) - 1) & (PAGE_SIZE - 1)) + 1;
 
 	/* Additional fragments are after SKB data */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
 
-		packet->page_buf[i+2].pfn = page_to_pfn(skb_frag_page(f));
-		packet->page_buf[i+2].offset = f->page_offset;
-		packet->page_buf[i+2].len = skb_frag_size(f);
+		packet->page_buf[i+npg_data+1].pfn =
+			page_to_pfn(skb_frag_page(f));
+		packet->page_buf[i+npg_data+1].offset = f->page_offset;
+		packet->page_buf[i+npg_data+1].len = skb_frag_size(f);
 	}
 
 	/* Set the completion routine */
@@ -300,6 +317,39 @@ static void netvsc_get_drvinfo(struct net_device *net,
 	strcpy(info->fw_version, "N/A");
 }
 
+static int netvsc_change_mtu(struct net_device *ndev, int mtu)
+{
+	struct net_device_context *ndevctx = netdev_priv(ndev);
+	struct hv_device *hdev =  ndevctx->device_ctx;
+	struct netvsc_device *nvdev = hv_get_drvdata(hdev);
+	struct netvsc_device_info device_info;
+	int limit = ETH_DATA_LEN;
+
+	if (nvdev == NULL || nvdev->destroy)
+		return -ENODEV;
+
+	if (nvdev->nvsp_version == NVSP_PROTOCOL_VERSION_2)
+		limit = NETVSC_MTU;
+
+	if (mtu < 68 || mtu > limit)
+		return -EINVAL;
+
+	nvdev->start_remove = true;
+	cancel_delayed_work_sync(&ndevctx->dwork);
+	netif_stop_queue(ndev);
+	rndis_filter_device_remove(hdev);
+
+	ndev->mtu = mtu;
+
+	ndevctx->device_ctx = hdev;
+	hv_set_drvdata(hdev, ndev);
+	device_info.ring_size = ring_size;
+	rndis_filter_device_add(hdev, &device_info);
+	netif_wake_queue(ndev);
+
+	return 0;
+}
+
 static const struct ethtool_ops ethtool_ops = {
 	.get_drvinfo	= netvsc_get_drvinfo,
 	.get_link	= ethtool_op_get_link,
@@ -310,7 +360,7 @@ static const struct net_device_ops device_ops = {
 	.ndo_stop =			netvsc_close,
 	.ndo_start_xmit =		netvsc_start_xmit,
 	.ndo_set_rx_mode =		netvsc_set_multicast_list,
-	.ndo_change_mtu =		eth_change_mtu,
+	.ndo_change_mtu =		netvsc_change_mtu,
 	.ndo_validate_addr =		eth_validate_addr,
 	.ndo_set_mac_address =		eth_mac_addr,
 };
@@ -403,6 +453,8 @@ static int netvsc_remove(struct hv_device *dev)
 		return 0;
 	}
 
+	net_device->start_remove = true;
+
 	ndev_ctx = netdev_priv(net);
 	cancel_delayed_work_sync(&ndev_ctx->dwork);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 12ec328481de..62b908e0e591 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -35,7 +35,7 @@
 #include <linux/mod_devicetable.h>
 
 
-#define MAX_PAGE_BUFFER_COUNT				16
+#define MAX_PAGE_BUFFER_COUNT				18
 #define MAX_MULTIPAGE_BUFFER_COUNT			32 /* 128K */
 
 #pragma pack(push, 1)
-- 
cgit v1.2.3


From 68c97153fb7f2877f98aa6c29546381d9cad2fed Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2012 13:22:46 -0500
Subject: SUNRPC: Clean up the RPCSEC_GSS service ticket requests

Instead of hacking specific service names into gss_encode_v1_msg, we should
just allow the caller to specify the service name explicitly.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/client.c                 |  2 +-
 fs/nfsd/nfs4callback.c          |  2 +-
 include/linux/sunrpc/auth.h     |  3 ++-
 include/linux/sunrpc/auth_gss.h |  2 +-
 net/sunrpc/auth_generic.c       |  6 ++++--
 net/sunrpc/auth_gss/auth_gss.c  | 40 +++++++++++++++++++++++-----------------
 6 files changed, 32 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 873bf00d51a2..32ea37198e93 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -185,7 +185,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_minorversion = cl_init->minorversion;
 	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
 #endif
-	cred = rpc_lookup_machine_cred();
+	cred = rpc_lookup_machine_cred("*");
 	if (!IS_ERR(cred))
 		clp->cl_machine_cred = cred;
 	nfs_fscache_get_client_cookie(clp);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7748d6a18d97..6f3ebb48b12f 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -718,7 +718,7 @@ int set_callback_cred(void)
 {
 	if (callback_cred)
 		return 0;
-	callback_cred = rpc_lookup_machine_cred();
+	callback_cred = rpc_lookup_machine_cred("nfs");
 	if (!callback_cred)
 		return -ENOMEM;
 	return 0;
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index febc4dbec2ca..7874a8a56638 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -26,6 +26,7 @@ struct auth_cred {
 	uid_t	uid;
 	gid_t	gid;
 	struct group_info *group_info;
+	const char *principal;
 	unsigned char machine_cred : 1;
 };
 
@@ -127,7 +128,7 @@ void			rpc_destroy_generic_auth(void);
 void 			rpc_destroy_authunix(void);
 
 struct rpc_cred *	rpc_lookup_cred(void);
-struct rpc_cred *	rpc_lookup_machine_cred(void);
+struct rpc_cred *	rpc_lookup_machine_cred(const char *service_name);
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
 struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 8eee9dbbfe7a..f1cfd4c85cd0 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -82,8 +82,8 @@ struct gss_cred {
 	enum rpc_gss_svc	gc_service;
 	struct gss_cl_ctx __rcu	*gc_ctx;
 	struct gss_upcall_msg	*gc_upcall;
+	const char		*gc_principal;
 	unsigned long		gc_upcall_timestamp;
-	unsigned char		gc_machine_cred : 1;
 };
 
 #endif /* __KERNEL__ */
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index e010a015d996..1426ec3d0a53 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -41,15 +41,17 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred);
 /*
  * Public call interface for looking up machine creds.
  */
-struct rpc_cred *rpc_lookup_machine_cred(void)
+struct rpc_cred *rpc_lookup_machine_cred(const char *service_name)
 {
 	struct auth_cred acred = {
 		.uid = RPC_MACHINE_CRED_USERID,
 		.gid = RPC_MACHINE_CRED_GROUPID,
+		.principal = service_name,
 		.machine_cred = 1,
 	};
 
-	dprintk("RPC:       looking up machine cred\n");
+	dprintk("RPC:       looking up machine cred for service %s\n",
+			service_name);
 	return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0);
 }
 EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index afb56553dfe7..28d72d298735 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -392,7 +392,8 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
 }
 
 static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
-				struct rpc_clnt *clnt, int machine_cred)
+				struct rpc_clnt *clnt,
+				const char *service_name)
 {
 	struct gss_api_mech *mech = gss_msg->auth->mech;
 	char *p = gss_msg->databuf;
@@ -407,12 +408,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 		p += len;
 		gss_msg->msg.len += len;
 	}
-	if (machine_cred) {
-		len = sprintf(p, "service=* ");
-		p += len;
-		gss_msg->msg.len += len;
-	} else if (!strcmp(clnt->cl_program->name, "nfs4_cb")) {
-		len = sprintf(p, "service=nfs ");
+	if (service_name != NULL) {
+		len = sprintf(p, "service=%s ", service_name);
 		p += len;
 		gss_msg->msg.len += len;
 	}
@@ -429,17 +426,18 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 }
 
 static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
-				struct rpc_clnt *clnt, int machine_cred)
+				struct rpc_clnt *clnt,
+				const char *service_name)
 {
 	if (pipe_version == 0)
 		gss_encode_v0_msg(gss_msg);
 	else /* pipe_version == 1 */
-		gss_encode_v1_msg(gss_msg, clnt, machine_cred);
+		gss_encode_v1_msg(gss_msg, clnt, service_name);
 }
 
-static inline struct gss_upcall_msg *
-gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid, struct rpc_clnt *clnt,
-		int machine_cred)
+static struct gss_upcall_msg *
+gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
+		uid_t uid, const char *service_name)
 {
 	struct gss_upcall_msg *gss_msg;
 	int vers;
@@ -459,7 +457,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid, struct rpc_clnt *clnt,
 	atomic_set(&gss_msg->count, 1);
 	gss_msg->uid = uid;
 	gss_msg->auth = gss_auth;
-	gss_encode_msg(gss_msg, clnt, machine_cred);
+	gss_encode_msg(gss_msg, clnt, service_name);
 	return gss_msg;
 }
 
@@ -471,7 +469,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr
 	struct gss_upcall_msg *gss_new, *gss_msg;
 	uid_t uid = cred->cr_uid;
 
-	gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred);
+	gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal);
 	if (IS_ERR(gss_new))
 		return gss_new;
 	gss_msg = gss_add_msg(gss_new);
@@ -995,7 +993,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	 */
 	cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
 	cred->gc_service = gss_auth->service;
-	cred->gc_machine_cred = acred->machine_cred;
+	cred->gc_principal = NULL;
+	if (acred->machine_cred)
+		cred->gc_principal = acred->principal;
 	kref_get(&gss_auth->kref);
 	return &cred->gc_base;
 
@@ -1030,7 +1030,12 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
 	if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags))
 		return 0;
 out:
-	if (acred->machine_cred != gss_cred->gc_machine_cred)
+	if (acred->principal != NULL) {
+		if (gss_cred->gc_principal == NULL)
+			return 0;
+		return strcmp(acred->principal, gss_cred->gc_principal) == 0;
+	}
+	if (gss_cred->gc_principal != NULL)
 		return 0;
 	return rc->cr_uid == acred->uid;
 }
@@ -1104,7 +1109,8 @@ static int gss_renew_cred(struct rpc_task *task)
 	struct rpc_auth *auth = oldcred->cr_auth;
 	struct auth_cred acred = {
 		.uid = oldcred->cr_uid,
-		.machine_cred = gss_cred->gc_machine_cred,
+		.principal = gss_cred->gc_principal,
+		.machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0),
 	};
 	struct rpc_cred *new;
 
-- 
cgit v1.2.3


From bf118a342f10dafe44b14451a1392c3254629a1f Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 7 Dec 2011 11:55:27 -0500
Subject: NFSv4: include bitmap in nfsv4 get acl data

The NFSv4 bitmap size is unbounded: a server can return an arbitrary
sized bitmap in an FATTR4_WORD0_ACL request.  Replace using the
nfs4_fattr_bitmap_maxsz as a guess to the maximum bitmask returned by a server
with the inclusion of the bitmap (xdr length plus bitmasks) and the acl data
xdr length to the (cached) acl page data.

This is a general solution to commit e5012d1f "NFSv4.1: update
nfs4_fattr_bitmap_maxsz" and fixes hitting a BUG_ON in xdr_shrink_bufhead
when getting ACLs.

Fix a bug in decode_getacl that returned -EINVAL on ACLs > page when getxattr
was called with a NULL buffer, preventing ACL > PAGE_SIZE from being retrieved.

Cc: stable@kernel.org
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c          | 96 +++++++++++++++++++++++++++-------------------
 fs/nfs/nfs4xdr.c           | 31 +++++++++++----
 include/linux/nfs_xdr.h    |  5 +++
 include/linux/sunrpc/xdr.h |  2 +
 net/sunrpc/xdr.c           |  3 +-
 5 files changed, 89 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fcc2408d7ab0..3b1080118452 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3426,19 +3426,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
  */
 #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
 
-static void buf_to_pages(const void *buf, size_t buflen,
-		struct page **pages, unsigned int *pgbase)
-{
-	const void *p = buf;
-
-	*pgbase = offset_in_page(buf);
-	p -= *pgbase;
-	while (p < buf + buflen) {
-		*(pages++) = virt_to_page(p);
-		p += PAGE_CACHE_SIZE;
-	}
-}
-
 static int buf_to_pages_noslab(const void *buf, size_t buflen,
 		struct page **pages, unsigned int *pgbase)
 {
@@ -3535,9 +3522,19 @@ out:
 	nfs4_set_cached_acl(inode, acl);
 }
 
+/*
+ * The getxattr API returns the required buffer length when called with a
+ * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating
+ * the required buf.  On a NULL buf, we send a page of data to the server
+ * guessing that the ACL request can be serviced by a page. If so, we cache
+ * up to the page of ACL data, and the 2nd call to getxattr is serviced by
+ * the cache. If not so, we throw away the page, and cache the required
+ * length. The next getxattr call will then produce another round trip to
+ * the server, this time with the input buf of the required size.
+ */
 static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
-	struct page *pages[NFS4ACL_MAXPAGES];
+	struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
 	struct nfs_getaclargs args = {
 		.fh = NFS_FH(inode),
 		.acl_pages = pages,
@@ -3552,41 +3549,60 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	struct page *localpage = NULL;
-	int ret;
+	int ret = -ENOMEM, npages, i, acl_len = 0;
 
-	if (buflen < PAGE_SIZE) {
-		/* As long as we're doing a round trip to the server anyway,
-		 * let's be prepared for a page of acl data. */
-		localpage = alloc_page(GFP_KERNEL);
-		resp_buf = page_address(localpage);
-		if (localpage == NULL)
-			return -ENOMEM;
-		args.acl_pages[0] = localpage;
-		args.acl_pgbase = 0;
-		args.acl_len = PAGE_SIZE;
-	} else {
-		resp_buf = buf;
-		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+	npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	/* As long as we're doing a round trip to the server anyway,
+	 * let's be prepared for a page of acl data. */
+	if (npages == 0)
+		npages = 1;
+
+	for (i = 0; i < npages; i++) {
+		pages[i] = alloc_page(GFP_KERNEL);
+		if (!pages[i])
+			goto out_free;
+	}
+	if (npages > 1) {
+		/* for decoding across pages */
+		args.acl_scratch = alloc_page(GFP_KERNEL);
+		if (!args.acl_scratch)
+			goto out_free;
 	}
-	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
+	args.acl_len = npages * PAGE_SIZE;
+	args.acl_pgbase = 0;
+	/* Let decode_getfacl know not to fail if the ACL data is larger than
+	 * the page we send as a guess */
+	if (buf == NULL)
+		res.acl_flags |= NFS4_ACL_LEN_REQUEST;
+	resp_buf = page_address(pages[0]);
+
+	dprintk("%s  buf %p buflen %ld npages %d args.acl_len %ld\n",
+		__func__, buf, buflen, npages, args.acl_len);
+	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
+			     &msg, &args.seq_args, &res.seq_res, 0);
 	if (ret)
 		goto out_free;
-	if (res.acl_len > args.acl_len)
-		nfs4_write_cached_acl(inode, NULL, res.acl_len);
+
+	acl_len = res.acl_len - res.acl_data_offset;
+	if (acl_len > args.acl_len)
+		nfs4_write_cached_acl(inode, NULL, acl_len);
 	else
-		nfs4_write_cached_acl(inode, resp_buf, res.acl_len);
+		nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset,
+				      acl_len);
 	if (buf) {
 		ret = -ERANGE;
-		if (res.acl_len > buflen)
+		if (acl_len > buflen)
 			goto out_free;
-		if (localpage)
-			memcpy(buf, resp_buf, res.acl_len);
+		_copy_from_pages(buf, pages, res.acl_data_offset,
+				res.acl_len);
 	}
-	ret = res.acl_len;
+	ret = acl_len;
 out_free:
-	if (localpage)
-		__free_page(localpage);
+	for (i = 0; i < npages; i++)
+		if (pages[i])
+			__free_page(pages[i]);
+	if (args.acl_scratch)
+		__free_page(args.acl_scratch);
 	return ret;
 }
 
@@ -3617,6 +3633,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 		nfs_zap_acl_cache(inode);
 	ret = nfs4_read_cached_acl(inode, buf, buflen);
 	if (ret != -ENOENT)
+		/* -ENOENT is returned if there is no ACL or if there is an ACL
+		 * but no cached acl data, just the acl length */
 		return ret;
 	return nfs4_get_acl_uncached(inode, buf, buflen);
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e6161b213ed1..dcaf69309d8e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2517,11 +2517,13 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
-	replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
+	replen = hdr.replen + op_decode_hdr_maxsz + 1;
 	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, args->acl_pgbase, args->acl_len);
+	xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
+
 	encode_nops(&hdr);
 }
 
@@ -4957,17 +4959,18 @@ decode_restorefh(struct xdr_stream *xdr)
 }
 
 static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
-		size_t *acl_len)
+			 struct nfs_getaclres *res)
 {
-	__be32 *savep;
+	__be32 *savep, *bm_p;
 	uint32_t attrlen,
 		 bitmap[3] = {0};
 	struct kvec *iov = req->rq_rcv_buf.head;
 	int status;
 
-	*acl_len = 0;
+	res->acl_len = 0;
 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
 		goto out;
+	bm_p = xdr->p;
 	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
 		goto out;
 	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -4979,18 +4982,30 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 		size_t hdrlen;
 		u32 recvd;
 
+		/* The bitmap (xdr len + bitmaps) and the attr xdr len words
+		 * are stored with the acl data to handle the problem of
+		 * variable length bitmaps.*/
+		xdr->p = bm_p;
+		res->acl_data_offset = be32_to_cpup(bm_p) + 2;
+		res->acl_data_offset <<= 2;
+
 		/* We ignore &savep and don't do consistency checks on
 		 * the attr length.  Let userspace figure it out.... */
 		hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
+		attrlen += res->acl_data_offset;
 		recvd = req->rq_rcv_buf.len - hdrlen;
 		if (attrlen > recvd) {
-			dprintk("NFS: server cheating in getattr"
-					" acl reply: attrlen %u > recvd %u\n",
+			if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
+				/* getxattr interface called with a NULL buf */
+				res->acl_len = attrlen;
+				goto out;
+			}
+			dprintk("NFS: acl reply: attrlen %u > recvd %u\n",
 					attrlen, recvd);
 			return -EINVAL;
 		}
 		xdr_read_pages(xdr, attrlen);
-		*acl_len = attrlen;
+		res->acl_len = attrlen;
 	} else
 		status = -EOPNOTSUPP;
 
@@ -6028,7 +6043,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_getacl(xdr, rqstp, &res->acl_len);
+	status = decode_getacl(xdr, rqstp, res);
 
 out:
 	return status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2a7c533be5dd..6c898afe6095 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -602,11 +602,16 @@ struct nfs_getaclargs {
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
+	struct page *			acl_scratch;
 	struct nfs4_sequence_args 	seq_args;
 };
 
+/* getxattr ACL interface flags */
+#define NFS4_ACL_LEN_REQUEST	0x0001	/* zero length getxattr buffer */
 struct nfs_getaclres {
 	size_t				acl_len;
+	size_t				acl_data_offset;
+	int				acl_flags;
 	struct nfs4_sequence_res	seq_res;
 };
 
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index a20970ef9e4e..af70af333546 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -191,6 +191,8 @@ extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
 			     struct xdr_array2_desc *desc);
 extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
 			     struct xdr_array2_desc *desc);
+extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase,
+			     size_t len);
 
 /*
  * Provide some simple tools for XDR buffer overflow-checking etc.
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 277ebd4bf095..593f4c605305 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -296,7 +296,7 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
  * Copies data into an arbitrary memory location from an array of pages
  * The copy is assumed to be non-overlapping.
  */
-static void
+void
 _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
 {
 	struct page **pgfrom;
@@ -324,6 +324,7 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
 
 	} while ((len -= copy) != 0);
 }
+EXPORT_SYMBOL_GPL(_copy_from_pages);
 
 /*
  * xdr_shrink_bufhead
-- 
cgit v1.2.3


From 0aaaf5c424c7ffd6b0c4253251356558b16ef3a2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 6 Dec 2011 16:13:48 -0500
Subject: NFS: Cache state owners after files are closed

Servers have a finite amount of memory to store NFSv4 open and lock
owners.  Moreover, servers may have a difficult time determining when
they can reap their state owner table, thanks to gray areas in the
NFSv4 protocol specification.  Thus clients should be careful to reuse
state owners when possible.

Currently Linux is not too careful.  When a user has closed all her
files on one mount point, the state owner's reference count goes to
zero, and it is released.  The next OPEN allocates a new one.  A
workload that serially opens and closes files can run through a large
number of open owners this way.

When a state owner's reference count goes to zero, slap it onto a free
list for that nfs_server, with an expiry time.  Garbage collect before
looking for a state owner.  This makes state owners for active users
available for re-use.

Now that there can be unused state owners remaining at umount time,
purge the state owner free list when a server is destroyed.  Also be
sure not to reclaim unused state owners during state recovery.

This change has benefits for the client as well.  For some workloads,
this approach drops the number of OPEN_CONFIRM calls from the same as
the number of OPEN calls, down to just one.  This reduces wire traffic
and thus open(2) latency.  Before this patch, untarring a kernel
source tarball shows the OPEN_CONFIRM call counter steadily increasing
through the test.  With the patch, the OPEN_CONFIRM count remains at 1
throughout the entire untar.

As long as the expiry time is kept short, I don't think garbage
collection should be terribly expensive, although it does bounce the
clp->cl_lock around a bit.

[ At some point we should rationalize the use of the nfs_server
->destroy method. ]

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[Trond: Fixed a garbage collection race and a few efficiency issues]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  8 +++++
 fs/nfs/nfs4_fs.h          |  3 ++
 fs/nfs/nfs4state.c        | 89 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 92 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 32ea37198e93..41bd67f80d31 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -250,6 +250,11 @@ static void pnfs_init_server(struct nfs_server *server)
 	rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
 }
 
+static void nfs4_destroy_server(struct nfs_server *server)
+{
+	nfs4_purge_state_owners(server);
+}
+
 #else
 static void nfs4_shutdown_client(struct nfs_client *clp)
 {
@@ -1065,6 +1070,7 @@ static struct nfs_server *nfs_alloc_server(void)
 	INIT_LIST_HEAD(&server->master_link);
 	INIT_LIST_HEAD(&server->delegations);
 	INIT_LIST_HEAD(&server->layouts);
+	INIT_LIST_HEAD(&server->state_owners_lru);
 
 	atomic_set(&server->active, 0);
 
@@ -1538,6 +1544,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
 
 	nfs_server_insert_lists(server);
 	server->mount_time = jiffies;
+	server->destroy = nfs4_destroy_server;
 out:
 	nfs_free_fattr(fattr);
 	return error;
@@ -1719,6 +1726,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 
 	/* Copy data from the source */
 	server->nfs_client = source->nfs_client;
+	server->destroy = source->destroy;
 	atomic_inc(&server->nfs_client->cl_count);
 	nfs_server_copy_userdata(server, source);
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 693ae22f8731..4d7d0aedc101 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -94,6 +94,8 @@ struct nfs_unique_id {
 struct nfs4_state_owner {
 	struct nfs_unique_id so_owner_id;
 	struct nfs_server    *so_server;
+	struct list_head     so_lru;
+	unsigned long        so_expires;
 	struct rb_node	     so_server_node;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
@@ -319,6 +321,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
+extern void nfs4_purge_state_owners(struct nfs_server *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, fmode_t);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6354e4fcc829..a53f33b4ac3a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -49,6 +49,7 @@
 #include <linux/ratelimit.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
+#include <linux/jiffies.h>
 
 #include "nfs4_fs.h"
 #include "callback.h"
@@ -388,6 +389,8 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
 		else if (cred > sp->so_cred)
 			p = &parent->rb_right;
 		else {
+			if (!list_empty(&sp->so_lru))
+				list_del_init(&sp->so_lru);
 			atomic_inc(&sp->so_count);
 			return sp;
 		}
@@ -412,6 +415,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
 		else if (new->so_cred > sp->so_cred)
 			p = &parent->rb_right;
 		else {
+			if (!list_empty(&sp->so_lru))
+				list_del_init(&sp->so_lru);
 			atomic_inc(&sp->so_count);
 			return sp;
 		}
@@ -453,6 +458,7 @@ nfs4_alloc_state_owner(void)
 	spin_lock_init(&sp->so_sequence.lock);
 	INIT_LIST_HEAD(&sp->so_sequence.list);
 	atomic_set(&sp->so_count, 1);
+	INIT_LIST_HEAD(&sp->so_lru);
 	return sp;
 }
 
@@ -470,6 +476,38 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 	}
 }
 
+static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
+{
+	rpc_destroy_wait_queue(&sp->so_sequence.wait);
+	put_rpccred(sp->so_cred);
+	kfree(sp);
+}
+
+static void nfs4_gc_state_owners(struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_state_owner *sp, *tmp;
+	unsigned long time_min, time_max;
+	LIST_HEAD(doomed);
+
+	spin_lock(&clp->cl_lock);
+	time_max = jiffies;
+	time_min = (long)time_max - (long)clp->cl_lease_time;
+	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+		/* NB: LRU is sorted so that oldest is at the head */
+		if (time_in_range(sp->so_expires, time_min, time_max))
+			break;
+		list_move(&sp->so_lru, &doomed);
+		nfs4_remove_state_owner_locked(sp);
+	}
+	spin_unlock(&clp->cl_lock);
+
+	list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+		list_del(&sp->so_lru);
+		nfs4_free_state_owner(sp);
+	}
+}
+
 /**
  * nfs4_get_state_owner - Look up a state owner given a credential
  * @server: nfs_server to search
@@ -487,10 +525,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
 	sp = nfs4_find_state_owner_locked(server, cred);
 	spin_unlock(&clp->cl_lock);
 	if (sp != NULL)
-		return sp;
+		goto out;
 	new = nfs4_alloc_state_owner();
 	if (new == NULL)
-		return NULL;
+		goto out;
 	new->so_server = server;
 	new->so_cred = cred;
 	spin_lock(&clp->cl_lock);
@@ -502,26 +540,58 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
 		rpc_destroy_wait_queue(&new->so_sequence.wait);
 		kfree(new);
 	}
+out:
+	nfs4_gc_state_owners(server);
 	return sp;
 }
 
 /**
  * nfs4_put_state_owner - Release a nfs4_state_owner
  * @sp: state owner data to release
- *
  */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs_client *clp = sp->so_server->nfs_client;
-	struct rpc_cred *cred = sp->so_cred;
+	struct nfs_server *server = sp->so_server;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 		return;
-	nfs4_remove_state_owner_locked(sp);
+
+	if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+		sp->so_expires = jiffies;
+		list_add_tail(&sp->so_lru, &server->state_owners_lru);
+		spin_unlock(&clp->cl_lock);
+	} else {
+		nfs4_remove_state_owner_locked(sp);
+		spin_unlock(&clp->cl_lock);
+		nfs4_free_state_owner(sp);
+	}
+}
+
+/**
+ * nfs4_purge_state_owners - Release all cached state owners
+ * @server: nfs_server with cached state owners to release
+ *
+ * Called at umount time.  Remaining state owners will be on
+ * the LRU with ref count of zero.
+ */
+void nfs4_purge_state_owners(struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_state_owner *sp, *tmp;
+	LIST_HEAD(doomed);
+
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+		list_move(&sp->so_lru, &doomed);
+		nfs4_remove_state_owner_locked(sp);
+	}
 	spin_unlock(&clp->cl_lock);
-	rpc_destroy_wait_queue(&sp->so_sequence.wait);
-	put_rpccred(cred);
-	kfree(sp);
+
+	list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+		list_del(&sp->so_lru);
+		nfs4_free_state_owner(sp);
+	}
 }
 
 static struct nfs4_state *
@@ -1393,6 +1463,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
 restart:
 	rcu_read_lock();
 	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		nfs4_purge_state_owners(server);
 		spin_lock(&clp->cl_lock);
 		for (pos = rb_first(&server->state_owners);
 		     pos != NULL;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b5479df8378d..ba4d7656ecfd 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -153,6 +153,7 @@ struct nfs_server {
 	struct rb_root		openowner_id;
 	struct rb_root		lockowner_id;
 #endif
+	struct list_head	state_owners_lru;
 	struct list_head	layouts;
 	struct list_head	delegations;
 	void (*destroy)(struct nfs_server *);
-- 
cgit v1.2.3


From 68bad94ed801d955535cb50dde3412944a24530c Mon Sep 17 00:00:00 2001
From: Neerav Parikh <Neerav.Parikh@intel.com>
Date: Wed, 4 Jan 2012 20:23:39 +0000
Subject: netdev: FCoE: Add new ndo_get_fcoe_hbainfo() call

This adds a new ndo_get_fcoe_hbainfo() call in
net_device_ops for FCoE protocol stack.

If supported by the underlying device, the FCoE protocol
stack will call this to get device specific information
from the underlying device.
This information will then be utilized by the FCoE protocol
stack to register Fiber Channel HBA attributes with the
Fiber Channel Management Service via Fabric Device
Management Interface (FDMI) as per the T11 FC-GS
specification.

Changes in v2:
- As per comments from David Miller aligning the parameters
of the ndo_get_fcoe_hbainfo()

Signed-off-by: Neerav Parikh <Neerav.Parikh@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a776a675c0e5..a1d109590da4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -707,6 +707,23 @@ struct netdev_tc_txq {
 	u16 offset;
 };
 
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+/*
+ * This structure is to hold information about the device
+ * configured to run FCoE protocol stack.
+ */
+struct netdev_fcoe_hbainfo {
+	char	manufacturer[64];
+	char	serial_number[64];
+	char	hardware_version[64];
+	char	driver_version[64];
+	char	optionrom_version[64];
+	char	firmware_version[64];
+	char	model[256];
+	char	model_description[256];
+};
+#endif
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -847,6 +864,13 @@ struct netdev_tc_txq {
  *	perform necessary setup and returns 1 to indicate the device is set up
  *	successfully to perform DDP on this I/O, otherwise this returns 0.
  *
+ * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
+ *			       struct netdev_fcoe_hbainfo *hbainfo);
+ *	Called when the FCoE Protocol stack wants information on the underlying
+ *	device. This information is utilized by the FCoE protocol stack to
+ *	register attributes with Fiber Channel management service as per the
+ *	FC-GS Fabric Device Management Information(FDMI) specification.
+ *
  * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type);
  *	Called when the underlying device wants to override default World Wide
  *	Name (WWN) generation mechanism in FCoE protocol stack to pass its own
@@ -950,6 +974,8 @@ struct net_device_ops {
 						       u16 xid,
 						       struct scatterlist *sgl,
 						       unsigned int sgc);
+	int			(*ndo_fcoe_get_hbainfo)(struct net_device *dev,
+							struct netdev_fcoe_hbainfo *hbainfo);
 #endif
 
 #if IS_ENABLED(CONFIG_LIBFCOE)
-- 
cgit v1.2.3


From 18cb809850fb499ad9bf288696a95f4071f73931 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 4 Jan 2012 14:18:38 +0000
Subject: net_sched: sfq: extend limits

SFQ as implemented in Linux is very limited, with at most 127 flows
and limit of 127 packets. [ So if 127 flows are active, we have one
packet per flow ]

This patch brings to SFQ following features to cope with modern needs.

- Ability to specify a smaller per flow limit of inflight packets.
    (default value being at 127 packets)

- Ability to have up to 65408 active flows (instead of 127)

- Ability to have head drops instead of tail drops
  (to drop old packets from a flow)

Example of use : No more than 20 packets per flow, max 8000 flows, max
20000 packets in SFQ qdisc, hash table of 65536 slots.

tc qdisc add ... sfq \
        flows 8000 \
        depth 20 \
        headdrop \
        limit 20000 \
	divisor 65536

Ram usage :

2 bytes per hash table entry (instead of previous 1 byte/entry)
32 bytes per flow on 64bit arches, instead of 384 for QFQ, so much
better cache hit ratio.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Dave Taht <dave.taht@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  16 ++---
 net/sched/sch_sfq.c       | 175 +++++++++++++++++++++++++++++++---------------
 2 files changed, 124 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 8daced32a014..8f1b928f777c 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -162,19 +162,17 @@ struct tc_sfq_qopt {
 	unsigned	flows;		/* Maximal number of flows  */
 };
 
+struct tc_sfq_qopt_v1 {
+	struct tc_sfq_qopt v0;
+	unsigned int	depth;		/* max number of packets per flow */
+	unsigned int	headdrop;
+};
+
+
 struct tc_sfq_xstats {
 	__s32		allot;
 };
 
-/*
- *  NOTE: limit, divisor and flows are hardwired to code at the moment.
- *
- *	limit=flows=128, divisor=1024;
- *
- *	The only reason for this is efficiency, it is possible
- *	to change these parameters in compile time.
- */
-
 /* RED section */
 
 enum {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 843018154a5c..0a7964009e8c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -66,16 +66,18 @@
 	SFQ is superior for this purpose.
 
 	IMPLEMENTATION:
-	This implementation limits maximal queue length to 128;
-	max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
-	The only goal of this restrictions was that all data
-	fit into one 4K page on 32bit arches.
+	This implementation limits :
+	- maximal queue length per flow to 127 packets.
+	- max mtu to 2^18-1;
+	- max 65408 flows,
+	- number of hash buckets to 65536.
 
 	It is easy to increase these values, but not in flight.  */
 
-#define SFQ_DEPTH		128 /* max number of packets per flow */
-#define SFQ_SLOTS		128 /* max number of flows */
-#define SFQ_EMPTY_SLOT		255
+#define SFQ_MAX_DEPTH		127 /* max number of packets per flow */
+#define SFQ_DEFAULT_FLOWS	128
+#define SFQ_MAX_FLOWS		(0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
+#define SFQ_EMPTY_SLOT		0xffff
 #define SFQ_DEFAULT_HASH_DIVISOR 1024
 
 /* We use 16 bits to store allot, and want to handle packets up to 64K
@@ -84,13 +86,13 @@
 #define SFQ_ALLOT_SHIFT		3
 #define SFQ_ALLOT_SIZE(X)	DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
 
-/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
-typedef unsigned char sfq_index;
+/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
+typedef u16 sfq_index;
 
 /*
  * We dont use pointers to save space.
- * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
- * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
+ * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
+ * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
  * are 'pointers' to dep[] array
  */
 struct sfq_head {
@@ -102,28 +104,38 @@ struct sfq_slot {
 	struct sk_buff	*skblist_next;
 	struct sk_buff	*skblist_prev;
 	sfq_index	qlen; /* number of skbs in skblist */
-	sfq_index	next; /* next slot in sfq chain */
+	sfq_index	next; /* next slot in sfq RR chain */
 	struct sfq_head dep; /* anchor in dep[] chains */
 	unsigned short	hash; /* hash value (index in ht[]) */
 	short		allot; /* credit for this slot */
 };
 
 struct sfq_sched_data {
-/* Parameters */
-	int		perturb_period;
-	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
-	int		limit;
+/* frequently used fields */
+	int		limit;		/* limit of total number of packets in this qdisc */
 	unsigned int	divisor;	/* number of slots in hash table */
-/* Variables */
-	struct tcf_proto *filter_list;
-	struct timer_list perturb_timer;
+	unsigned int	maxflows;	/* number of flows in flows array */
+	int		headdrop;
+	int		maxdepth;	/* limit of packets per flow */
+
 	u32		perturbation;
+	struct tcf_proto *filter_list;
 	sfq_index	cur_depth;	/* depth of longest slot */
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct sfq_slot *tail;		/* current slot in round */
-	sfq_index	*ht;		/* Hash table (divisor slots) */
-	struct sfq_slot	slots[SFQ_SLOTS];
-	struct sfq_head	dep[SFQ_DEPTH];	/* Linked list of slots, indexed by depth */
+	sfq_index	*ht;		/* Hash table ('divisor' slots) */
+	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
+
+	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
+					/* Linked lists of slots, indexed by depth
+					 * dep[0] : list of unused flows
+					 * dep[1] : list of flows with 1 packet
+					 * dep[X] : list of flows with X packets
+					 */
+
+	int		perturb_period;
+	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
+	struct timer_list perturb_timer;
 };
 
 /*
@@ -131,9 +143,9 @@ struct sfq_sched_data {
  */
 static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
 {
-	if (val < SFQ_SLOTS)
+	if (val < SFQ_MAX_FLOWS)
 		return &q->slots[val].dep;
-	return &q->dep[val - SFQ_SLOTS];
+	return &q->dep[val - SFQ_MAX_FLOWS];
 }
 
 /*
@@ -199,18 +211,19 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 }
 
 /*
- * x : slot number [0 .. SFQ_SLOTS - 1]
+ * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
  */
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
-	int qlen = q->slots[x].qlen;
+	struct sfq_slot *slot = &q->slots[x];
+	int qlen = slot->qlen;
 
-	p = qlen + SFQ_SLOTS;
+	p = qlen + SFQ_MAX_FLOWS;
 	n = q->dep[qlen].next;
 
-	q->slots[x].dep.next = n;
-	q->slots[x].dep.prev = p;
+	slot->dep.next = n;
+	slot->dep.prev = p;
 
 	q->dep[qlen].next = x;		/* sfq_dep_head(q, p)->next = x */
 	sfq_dep_head(q, n)->prev = x;
@@ -275,6 +288,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
 
 static inline void slot_queue_init(struct sfq_slot *slot)
 {
+	memset(slot, 0, sizeof(*slot));
 	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
 }
 
@@ -305,7 +319,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 		x = q->dep[d].next;
 		slot = &q->slots[x];
 drop:
-		skb = slot_dequeue_tail(slot);
+		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
 		len = qdisc_pkt_len(skb);
 		sfq_dec(q, x);
 		kfree_skb(skb);
@@ -349,16 +363,27 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	slot = &q->slots[x];
 	if (x == SFQ_EMPTY_SLOT) {
 		x = q->dep[0].next; /* get a free slot */
+		if (x >= SFQ_MAX_FLOWS)
+			return qdisc_drop(skb, sch);
 		q->ht[hash] = x;
 		slot = &q->slots[x];
 		slot->hash = hash;
 	}
 
-	/* If selected queue has length q->limit, do simple tail drop,
-	 * i.e. drop _this_ packet.
-	 */
-	if (slot->qlen >= q->limit)
-		return qdisc_drop(skb, sch);
+	if (slot->qlen >= q->maxdepth) {
+		struct sk_buff *head;
+
+		if (!q->headdrop)
+			return qdisc_drop(skb, sch);
+
+		head = slot_dequeue_head(slot);
+		sch->qstats.backlog -= qdisc_pkt_len(head);
+		qdisc_drop(head, sch);
+
+		sch->qstats.backlog += qdisc_pkt_len(skb);
+		slot_queue_add(slot, skb);
+		return NET_XMIT_CN;
+	}
 
 	sch->qstats.backlog += qdisc_pkt_len(skb);
 	slot_queue_add(slot, skb);
@@ -445,16 +470,18 @@ sfq_reset(struct Qdisc *sch)
  * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
  * counters.
  */
-static void sfq_rehash(struct sfq_sched_data *q)
+static void sfq_rehash(struct Qdisc *sch)
 {
+	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 	int i;
 	struct sfq_slot *slot;
 	struct sk_buff_head list;
+	int dropped = 0;
 
 	__skb_queue_head_init(&list);
 
-	for (i = 0; i < SFQ_SLOTS; i++) {
+	for (i = 0; i < q->maxflows; i++) {
 		slot = &q->slots[i];
 		if (!slot->qlen)
 			continue;
@@ -474,10 +501,18 @@ static void sfq_rehash(struct sfq_sched_data *q)
 		slot = &q->slots[x];
 		if (x == SFQ_EMPTY_SLOT) {
 			x = q->dep[0].next; /* get a free slot */
+			if (x >= SFQ_MAX_FLOWS) {
+drop:				sch->qstats.backlog -= qdisc_pkt_len(skb);
+				kfree_skb(skb);
+				dropped++;
+				continue;
+			}
 			q->ht[hash] = x;
 			slot = &q->slots[x];
 			slot->hash = hash;
 		}
+		if (slot->qlen >= q->maxdepth)
+			goto drop;
 		slot_queue_add(slot, skb);
 		sfq_inc(q, x);
 		if (slot->qlen == 1) {		/* The flow is new */
@@ -491,6 +526,8 @@ static void sfq_rehash(struct sfq_sched_data *q)
 			slot->allot = q->scaled_quantum;
 		}
 	}
+	sch->q.qlen -= dropped;
+	qdisc_tree_decrease_qlen(sch, dropped);
 }
 
 static void sfq_perturbation(unsigned long arg)
@@ -502,7 +539,7 @@ static void sfq_perturbation(unsigned long arg)
 	spin_lock(root_lock);
 	q->perturbation = net_random();
 	if (!q->filter_list && q->tail)
-		sfq_rehash(q);
+		sfq_rehash(sch);
 	spin_unlock(root_lock);
 
 	if (q->perturb_period)
@@ -513,23 +550,39 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct tc_sfq_qopt *ctl = nla_data(opt);
+	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
 	unsigned int qlen;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
-
+	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
+		ctl_v1 = nla_data(opt);
 	if (ctl->divisor &&
 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
 		return -EINVAL;
 
 	sch_tree_lock(sch);
-	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
-	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+	if (ctl->quantum) {
+		q->quantum = ctl->quantum;
+		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+	}
 	q->perturb_period = ctl->perturb_period * HZ;
-	if (ctl->limit)
-		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
-	if (ctl->divisor)
+	if (ctl->flows)
+		q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+	if (ctl->divisor) {
 		q->divisor = ctl->divisor;
+		q->maxflows = min_t(u32, q->maxflows, q->divisor);
+	}
+	if (ctl_v1) {
+		if (ctl_v1->depth)
+			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+		q->headdrop = ctl_v1->headdrop;
+	}
+	if (ctl->limit) {
+		q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
+		q->maxflows = min_t(u32, q->maxflows, q->limit);
+	}
+
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > q->limit)
 		sfq_drop(sch);
@@ -571,6 +624,7 @@ static void sfq_destroy(struct Qdisc *sch)
 	q->perturb_period = 0;
 	del_timer_sync(&q->perturb_timer);
 	sfq_free(q->ht);
+	sfq_free(q->slots);
 }
 
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
@@ -582,15 +636,17 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->perturb_timer.data = (unsigned long)sch;
 	init_timer_deferrable(&q->perturb_timer);
 
-	for (i = 0; i < SFQ_DEPTH; i++) {
-		q->dep[i].next = i + SFQ_SLOTS;
-		q->dep[i].prev = i + SFQ_SLOTS;
+	for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
+		q->dep[i].next = i + SFQ_MAX_FLOWS;
+		q->dep[i].prev = i + SFQ_MAX_FLOWS;
 	}
 
-	q->limit = SFQ_DEPTH - 1;
+	q->limit = SFQ_MAX_DEPTH;
+	q->maxdepth = SFQ_MAX_DEPTH;
 	q->cur_depth = 0;
 	q->tail = NULL;
 	q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
+	q->maxflows = SFQ_DEFAULT_FLOWS;
 	q->quantum = psched_mtu(qdisc_dev(sch));
 	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = 0;
@@ -603,14 +659,15 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
-	if (!q->ht) {
+	q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
+	if (!q->ht || !q->slots) {
 		sfq_destroy(sch);
 		return -ENOMEM;
 	}
 	for (i = 0; i < q->divisor; i++)
 		q->ht[i] = SFQ_EMPTY_SLOT;
 
-	for (i = 0; i < SFQ_SLOTS; i++) {
+	for (i = 0; i < q->maxflows; i++) {
 		slot_queue_init(&q->slots[i]);
 		sfq_link(q, i);
 	}
@@ -625,14 +682,16 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_sfq_qopt opt;
-
-	opt.quantum = q->quantum;
-	opt.perturb_period = q->perturb_period / HZ;
-
-	opt.limit = q->limit;
-	opt.divisor = q->divisor;
-	opt.flows = q->limit;
+	struct tc_sfq_qopt_v1 opt;
+
+	memset(&opt, 0, sizeof(opt));
+	opt.v0.quantum	= q->quantum;
+	opt.v0.perturb_period = q->perturb_period / HZ;
+	opt.v0.limit	= q->limit;
+	opt.v0.divisor	= q->divisor;
+	opt.v0.flows	= q->maxflows;
+	opt.depth	= q->maxdepth;
+	opt.headdrop	= q->headdrop;
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
-- 
cgit v1.2.3


From 9f42f126154786e6e76df513004800c8c633f020 Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Thu, 5 Jan 2012 07:13:39 +0000
Subject: net: pack skb_shared_info more efficiently

nr_frags can be 8 bits since 256 is plenty of fragments. This allows it to be
packed with tx_flags.

Also by moving ip6_frag_id and dataref (both 4 bytes) next to each other we can
avoid a hole between ip6_frag_id and frag_list on 64 bit systems.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f47f0c3939f2..50db9b04a552 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -242,15 +242,15 @@ struct ubuf_info {
  * the end of the header data, ie. at skb->end.
  */
 struct skb_shared_info {
-	unsigned short	nr_frags;
+	unsigned char	nr_frags;
+	__u8		tx_flags;
 	unsigned short	gso_size;
 	/* Warning: this field is not always filled in (UFO)! */
 	unsigned short	gso_segs;
 	unsigned short  gso_type;
-	__be32          ip6_frag_id;
-	__u8		tx_flags;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+	__be32          ip6_frag_id;
 
 	/*
 	 * Warning : all fields before dataref are cleared in __alloc_skb()
-- 
cgit v1.2.3


From 6a9de49115d5ff9871d953af1a5c8249e1585731 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:14 -0500
Subject: capabilities: remove the task from capable LSM hook entirely

The capabilities framework is based around credentials, not necessarily the
current task.  Yet we still passed the current task down into LSMs from the
security_capable() LSM hook as if it was a meaningful portion of the security
decision.  This patch removes the 'generic' passing of current and instead
forces individual LSMs to use current explicitly if they think it is
appropriate.  In our case those LSMs are SELinux and AppArmor.

I believe the AppArmor use of current is incorrect, but that is wholely
unrelated to this patch.  This patch does not change what AppArmor does, it
just makes it clear in the AppArmor code that it is doing it.

The SELinux code still uses current in it's audit message, which may also be
wrong and needs further investigation.  Again this is NOT a change, it may
have always been wrong, this patch just makes it clear what is happening.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/security.h | 16 +++++++---------
 security/apparmor/lsm.c  |  8 ++++----
 security/commoncap.c     | 16 +++++++---------
 security/security.c      |  7 +++----
 security/selinux/hooks.c | 23 ++++++++++-------------
 5 files changed, 31 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ebd2a53a3d07..4921163b2752 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,8 +53,8 @@ struct user_namespace;
  * These functions are in security/capability.c and are used
  * as the default capabilities functions
  */
-extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
-		       struct user_namespace *ns, int cap, int audit);
+extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
+		       int cap, int audit);
 extern int cap_settime(const struct timespec *ts, const struct timezone *tz);
 extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
@@ -1261,7 +1261,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @capable:
  *	Check whether the @tsk process has the @cap capability in the indicated
  *	credentials.
- *	@tsk contains the task_struct for the process.
  *	@cred contains the credentials to use.
  *      @ns contains the user namespace we want the capability in
  *	@cap contains the capability <include/linux/capability.h>.
@@ -1385,8 +1384,8 @@ struct security_operations {
 		       const kernel_cap_t *effective,
 		       const kernel_cap_t *inheritable,
 		       const kernel_cap_t *permitted);
-	int (*capable) (struct task_struct *tsk, const struct cred *cred,
-			struct user_namespace *ns, int cap, int audit);
+	int (*capable) (const struct cred *cred, struct user_namespace *ns,
+			int cap, int audit);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
 	int (*quota_on) (struct dentry *dentry);
 	int (*syslog) (int type);
@@ -1867,7 +1866,7 @@ static inline int security_capset(struct cred *new,
 static inline int security_capable(struct user_namespace *ns,
 				   const struct cred *cred, int cap)
 {
-	return cap_capable(current, cred, ns, cap, SECURITY_CAP_AUDIT);
+	return cap_capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
 
 static inline int security_real_capable(struct task_struct *tsk, struct user_namespace *ns, int cap)
@@ -1875,7 +1874,7 @@ static inline int security_real_capable(struct task_struct *tsk, struct user_nam
 	int ret;
 
 	rcu_read_lock();
-	ret = cap_capable(tsk, __task_cred(tsk), ns, cap, SECURITY_CAP_AUDIT);
+	ret = cap_capable(__task_cred(tsk), ns, cap, SECURITY_CAP_AUDIT);
 	rcu_read_unlock();
 	return ret;
 }
@@ -1886,8 +1885,7 @@ int security_real_capable_noaudit(struct task_struct *tsk, struct user_namespace
 	int ret;
 
 	rcu_read_lock();
-	ret = cap_capable(tsk, __task_cred(tsk), ns, cap,
-			       SECURITY_CAP_NOAUDIT);
+	ret = cap_capable(__task_cred(tsk), ns, cap, SECURITY_CAP_NOAUDIT);
 	rcu_read_unlock();
 	return ret;
 }
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 37832026e58a..ef4e2a8a33a5 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -136,16 +136,16 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective,
 	return 0;
 }
 
-static int apparmor_capable(struct task_struct *task, const struct cred *cred,
-			    struct user_namespace *ns, int cap, int audit)
+static int apparmor_capable(const struct cred *cred, struct user_namespace *ns,
+			    int cap, int audit)
 {
 	struct aa_profile *profile;
 	/* cap_capable returns 0 on success, else -EPERM */
-	int error = cap_capable(task, cred, ns, cap, audit);
+	int error = cap_capable(cred, ns, cap, audit);
 	if (!error) {
 		profile = aa_cred_profile(cred);
 		if (!unconfined(profile))
-			error = aa_capable(task, profile, cap, audit);
+			error = aa_capable(current, profile, cap, audit);
 	}
 	return error;
 }
diff --git a/security/commoncap.c b/security/commoncap.c
index a93b3b733079..89f02ff66af9 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -66,7 +66,6 @@ EXPORT_SYMBOL(cap_netlink_recv);
 
 /**
  * cap_capable - Determine whether a task has a particular effective capability
- * @tsk: The task to query
  * @cred: The credentials to use
  * @ns:  The user namespace in which we need the capability
  * @cap: The capability to check for
@@ -80,8 +79,8 @@ EXPORT_SYMBOL(cap_netlink_recv);
  * cap_has_capability() returns 0 when a task has a capability, but the
  * kernel's capable() and has_capability() returns 1 for this case.
  */
-int cap_capable(struct task_struct *tsk, const struct cred *cred,
-		struct user_namespace *targ_ns, int cap, int audit)
+int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
+		int cap, int audit)
 {
 	for (;;) {
 		/* The creator of the user namespace has all caps. */
@@ -222,9 +221,8 @@ static inline int cap_inh_is_capped(void)
 	/* they are so limited unless the current task has the CAP_SETPCAP
 	 * capability
 	 */
-	if (cap_capable(current, current_cred(),
-			current_cred()->user->user_ns, CAP_SETPCAP,
-			SECURITY_CAP_AUDIT) == 0)
+	if (cap_capable(current_cred(), current_cred()->user->user_ns,
+			CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
 		return 0;
 	return 1;
 }
@@ -870,7 +868,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		     & (new->securebits ^ arg2))			/*[1]*/
 		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
-		    || (cap_capable(current, current_cred(),
+		    || (cap_capable(current_cred(),
 				    current_cred()->user->user_ns, CAP_SETPCAP,
 				    SECURITY_CAP_AUDIT) != 0)		/*[4]*/
 			/*
@@ -936,7 +934,7 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int cap_sys_admin = 0;
 
-	if (cap_capable(current, current_cred(), &init_user_ns, CAP_SYS_ADMIN,
+	if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
 			SECURITY_CAP_NOAUDIT) == 0)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
@@ -963,7 +961,7 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
 	int ret = 0;
 
 	if (addr < dac_mmap_min_addr) {
-		ret = cap_capable(current, current_cred(), &init_user_ns, CAP_SYS_RAWIO,
+		ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
 				  SECURITY_CAP_AUDIT);
 		/* set PF_SUPERPRIV if it turns out we allow the low mmap */
 		if (ret == 0)
diff --git a/security/security.c b/security/security.c
index d9e153390926..9ae68c64455e 100644
--- a/security/security.c
+++ b/security/security.c
@@ -157,8 +157,7 @@ int security_capset(struct cred *new, const struct cred *old,
 int security_capable(struct user_namespace *ns, const struct cred *cred,
 		     int cap)
 {
-	return security_ops->capable(current, cred, ns, cap,
-				     SECURITY_CAP_AUDIT);
+	return security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
 
 int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
@@ -168,7 +167,7 @@ int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
 	int ret;
 
 	cred = get_task_cred(tsk);
-	ret = security_ops->capable(tsk, cred, ns, cap, SECURITY_CAP_AUDIT);
+	ret = security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 	put_cred(cred);
 	return ret;
 }
@@ -180,7 +179,7 @@ int security_real_capable_noaudit(struct task_struct *tsk,
 	int ret;
 
 	cred = get_task_cred(tsk);
-	ret = security_ops->capable(tsk, cred, ns, cap, SECURITY_CAP_NOAUDIT);
+	ret = security_ops->capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
 	put_cred(cred);
 	return ret;
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index e545b9f67072..c9605c4a2e08 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1414,8 +1414,7 @@ static int current_has_perm(const struct task_struct *tsk,
 #endif
 
 /* Check whether a task is allowed to use a capability. */
-static int task_has_capability(struct task_struct *tsk,
-			       const struct cred *cred,
+static int cred_has_capability(const struct cred *cred,
 			       int cap, int audit)
 {
 	struct common_audit_data ad;
@@ -1426,7 +1425,7 @@ static int task_has_capability(struct task_struct *tsk,
 	int rc;
 
 	COMMON_AUDIT_DATA_INIT(&ad, CAP);
-	ad.tsk = tsk;
+	ad.tsk = current;
 	ad.u.cap = cap;
 
 	switch (CAP_TO_INDEX(cap)) {
@@ -1867,16 +1866,16 @@ static int selinux_capset(struct cred *new, const struct cred *old,
  * the CAP_SETUID and CAP_SETGID capabilities using the capable hook.
  */
 
-static int selinux_capable(struct task_struct *tsk, const struct cred *cred,
-			   struct user_namespace *ns, int cap, int audit)
+static int selinux_capable(const struct cred *cred, struct user_namespace *ns,
+			   int cap, int audit)
 {
 	int rc;
 
-	rc = cap_capable(tsk, cred, ns, cap, audit);
+	rc = cap_capable(cred, ns, cap, audit);
 	if (rc)
 		return rc;
 
-	return task_has_capability(tsk, cred, cap, audit);
+	return cred_has_capability(cred, cap, audit);
 }
 
 static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb)
@@ -1953,8 +1952,7 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int rc, cap_sys_admin = 0;
 
-	rc = selinux_capable(current, current_cred(),
-			     &init_user_ns, CAP_SYS_ADMIN,
+	rc = selinux_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
 			     SECURITY_CAP_NOAUDIT);
 	if (rc == 0)
 		cap_sys_admin = 1;
@@ -2858,8 +2856,7 @@ static int selinux_inode_getsecurity(const struct inode *inode, const char *name
 	 * and lack of permission just means that we fall back to the
 	 * in-core context value, not a denial.
 	 */
-	error = selinux_capable(current, current_cred(),
-				&init_user_ns, CAP_MAC_ADMIN,
+	error = selinux_capable(current_cred(), &init_user_ns, CAP_MAC_ADMIN,
 				SECURITY_CAP_NOAUDIT);
 	if (!error)
 		error = security_sid_to_context_force(isec->sid, &context,
@@ -2992,8 +2989,8 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
 
 	case KDSKBENT:
 	case KDSKBSENT:
-		error = task_has_capability(current, cred, CAP_SYS_TTY_CONFIG,
-					SECURITY_CAP_AUDIT);
+		error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG,
+					    SECURITY_CAP_AUDIT);
 		break;
 
 	/* default case assumes that the command will go
-- 
cgit v1.2.3


From b7e724d303b684655e4ca3dabd5a6840ad19012d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilities: reverse arguments to security_capable

security_capable takes ns, cred, cap.  But the LSM capable() hook takes
cred, ns, cap.  The capability helper functions also take cred, ns, cap.
Rather than flip argument order just to flip it back, leave them alone.
Heck, this should be a little faster since argument will be in the right
place!

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 drivers/pci/pci-sysfs.c  | 2 +-
 include/linux/security.h | 6 +++---
 kernel/capability.c      | 2 +-
 security/security.c      | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 7bcf12adced7..a4457ab61342 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -431,7 +431,7 @@ pci_read_config(struct file *filp, struct kobject *kobj,
 	u8 *data = (u8*) buf;
 
 	/* Several chips lock up trying to read undefined config space */
-	if (security_capable(&init_user_ns, filp->f_cred, CAP_SYS_ADMIN) == 0) {
+	if (security_capable(filp->f_cred, &init_user_ns, CAP_SYS_ADMIN) == 0) {
 		size = dev->cfg_size;
 	} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
 		size = 128;
diff --git a/include/linux/security.h b/include/linux/security.h
index 4921163b2752..ee969ff40a26 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1666,7 +1666,7 @@ int security_capset(struct cred *new, const struct cred *old,
 		    const kernel_cap_t *effective,
 		    const kernel_cap_t *inheritable,
 		    const kernel_cap_t *permitted);
-int security_capable(struct user_namespace *ns, const struct cred *cred,
+int security_capable(const struct cred *cred, struct user_namespace *ns,
 			int cap);
 int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
 			int cap);
@@ -1863,8 +1863,8 @@ static inline int security_capset(struct cred *new,
 	return cap_capset(new, old, effective, inheritable, permitted);
 }
 
-static inline int security_capable(struct user_namespace *ns,
-				   const struct cred *cred, int cap)
+static inline int security_capable(const struct cred *cred,
+				   struct user_namespace *ns, int cap)
 {
 	return cap_capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
diff --git a/kernel/capability.c b/kernel/capability.c
index 283c529f8b1c..d98392719adb 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -374,7 +374,7 @@ bool ns_capable(struct user_namespace *ns, int cap)
 		BUG();
 	}
 
-	if (security_capable(ns, current_cred(), cap) == 0) {
+	if (security_capable(current_cred(), ns, cap) == 0) {
 		current->flags |= PF_SUPERPRIV;
 		return true;
 	}
diff --git a/security/security.c b/security/security.c
index 9ae68c64455e..b9e57f4fc44a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -154,7 +154,7 @@ int security_capset(struct cred *new, const struct cred *old,
 				    effective, inheritable, permitted);
 }
 
-int security_capable(struct user_namespace *ns, const struct cred *cred,
+int security_capable(const struct cred *cred, struct user_namespace *ns,
 		     int cap)
 {
 	return security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
-- 
cgit v1.2.3


From c7eba4a97563fd8b431787f7ad623444f2da80c6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilities: introduce security_capable_noaudit

Exactly like security_capable except don't audit any denials.  This is for
places where the kernel may make decisions about what to do if a task has a
given capability, but which failing that capability is not a sign of a
security policy violation.  An example is checking if a task has
CAP_SYS_ADMIN to lower it's likelyhood of being killed by the oom killer.
This check is not a security violation if it is denied.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 include/linux/security.h | 7 +++++++
 security/security.c      | 6 ++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ee969ff40a26..caff54eee686 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1668,6 +1668,8 @@ int security_capset(struct cred *new, const struct cred *old,
 		    const kernel_cap_t *permitted);
 int security_capable(const struct cred *cred, struct user_namespace *ns,
 			int cap);
+int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
+			     int cap);
 int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
 			int cap);
 int security_real_capable_noaudit(struct task_struct *tsk,
@@ -1869,6 +1871,11 @@ static inline int security_capable(const struct cred *cred,
 	return cap_capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
 
+static inline int security_capable_noaudit(const struct cred *cred,
+					   struct user_namespace *ns, int cap) {
+	return cap_capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
+}
+
 static inline int security_real_capable(struct task_struct *tsk, struct user_namespace *ns, int cap)
 {
 	int ret;
diff --git a/security/security.c b/security/security.c
index b9e57f4fc44a..b7edaae77d1d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -160,6 +160,12 @@ int security_capable(const struct cred *cred, struct user_namespace *ns,
 	return security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
 
+int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
+			     int cap)
+{
+	return security_ops->capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
+}
+
 int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
 			  int cap)
 {
-- 
cgit v1.2.3


From 2920a8409de5a51575d03deca07e5bb2be6fc98d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilities: remove all _real_ interfaces

The name security_real_capable and security_real_capable_noaudit just don't
make much sense to me.  Convert them to use security_capable and
security_capable_noaudit.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 include/linux/security.h | 25 -------------------------
 kernel/capability.c      | 18 +++++++++++++++---
 security/security.c      | 24 ------------------------
 3 files changed, 15 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index caff54eee686..e345a9313a60 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1670,10 +1670,6 @@ int security_capable(const struct cred *cred, struct user_namespace *ns,
 			int cap);
 int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
 			     int cap);
-int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
-			int cap);
-int security_real_capable_noaudit(struct task_struct *tsk,
-			struct user_namespace *ns, int cap);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
 int security_quota_on(struct dentry *dentry);
 int security_syslog(int type);
@@ -1876,27 +1872,6 @@ static inline int security_capable_noaudit(const struct cred *cred,
 	return cap_capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
 }
 
-static inline int security_real_capable(struct task_struct *tsk, struct user_namespace *ns, int cap)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = cap_capable(__task_cred(tsk), ns, cap, SECURITY_CAP_AUDIT);
-	rcu_read_unlock();
-	return ret;
-}
-
-static inline
-int security_real_capable_noaudit(struct task_struct *tsk, struct user_namespace *ns, int cap)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = cap_capable(__task_cred(tsk), ns, cap, SECURITY_CAP_NOAUDIT);
-	rcu_read_unlock();
-	return ret;
-}
-
 static inline int security_quotactl(int cmds, int type, int id,
 				     struct super_block *sb)
 {
diff --git a/kernel/capability.c b/kernel/capability.c
index d98392719adb..ff50ab62cfca 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -298,7 +298,11 @@ error:
  */
 bool has_capability(struct task_struct *t, int cap)
 {
-	int ret = security_real_capable(t, &init_user_ns, cap);
+	int ret;
+
+	rcu_read_lock();
+	ret = security_capable(__task_cred(t), &init_user_ns, cap);
+	rcu_read_unlock();
 
 	return (ret == 0);
 }
@@ -317,7 +321,11 @@ bool has_capability(struct task_struct *t, int cap)
 bool has_ns_capability(struct task_struct *t,
 		       struct user_namespace *ns, int cap)
 {
-	int ret = security_real_capable(t, ns, cap);
+	int ret;
+
+	rcu_read_lock();
+	ret = security_capable(__task_cred(t), ns, cap);
+	rcu_read_unlock();
 
 	return (ret == 0);
 }
@@ -335,7 +343,11 @@ bool has_ns_capability(struct task_struct *t,
  */
 bool has_capability_noaudit(struct task_struct *t, int cap)
 {
-	int ret = security_real_capable_noaudit(t, &init_user_ns, cap);
+	int ret;
+
+	rcu_read_lock();
+	ret = security_capable_noaudit(__task_cred(t), &init_user_ns, cap);
+	rcu_read_unlock();
 
 	return (ret == 0);
 }
diff --git a/security/security.c b/security/security.c
index b7edaae77d1d..8900c5c4db5c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -166,30 +166,6 @@ int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
 	return security_ops->capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
 }
 
-int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
-			  int cap)
-{
-	const struct cred *cred;
-	int ret;
-
-	cred = get_task_cred(tsk);
-	ret = security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
-	put_cred(cred);
-	return ret;
-}
-
-int security_real_capable_noaudit(struct task_struct *tsk,
-				  struct user_namespace *ns, int cap)
-{
-	const struct cred *cred;
-	int ret;
-
-	cred = get_task_cred(tsk);
-	ret = security_ops->capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
-	put_cred(cred);
-	return ret;
-}
-
 int security_quotactl(int cmds, int type, int id, struct super_block *sb)
 {
 	return security_ops->quotactl(cmds, type, id, sb);
-- 
cgit v1.2.3


From 7b61d648499e74dbec3d4ce645675e0ae040ae78 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilites: introduce new has_ns_capabilities_noaudit

For consistency in interfaces, introduce a new interface called
has_ns_capabilities_noaudit.  It checks if the given task has the given
capability in the given namespace.  Use this new function by
has_capabilities_noaudit.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 include/linux/capability.h |  2 ++
 kernel/capability.c        | 30 +++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c42112350003..63f59fa8769d 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -543,6 +543,8 @@ extern bool has_capability(struct task_struct *t, int cap);
 extern bool has_ns_capability(struct task_struct *t,
 			      struct user_namespace *ns, int cap);
 extern bool has_capability_noaudit(struct task_struct *t, int cap);
+extern bool has_ns_capability_noaudit(struct task_struct *t,
+				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool task_ns_capable(struct task_struct *t, int cap);
diff --git a/kernel/capability.c b/kernel/capability.c
index fb815d1b9ea2..d8398e962470 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -325,27 +325,47 @@ bool has_capability(struct task_struct *t, int cap)
 }
 
 /**
- * has_capability_noaudit - Does a task have a capability (unaudited)
+ * has_ns_capability_noaudit - Does a task have a capability (unaudited)
+ * in a specific user ns.
  * @t: The task in question
+ * @ns: target user namespace
  * @cap: The capability to be tested for
  *
  * Return true if the specified task has the given superior capability
- * currently in effect to init_user_ns, false if not.  Don't write an
- * audit message for the check.
+ * currently in effect to the specified user namespace, false if not.
+ * Do not write an audit message for the check.
  *
  * Note that this does not set PF_SUPERPRIV on the task.
  */
-bool has_capability_noaudit(struct task_struct *t, int cap)
+bool has_ns_capability_noaudit(struct task_struct *t,
+			       struct user_namespace *ns, int cap)
 {
 	int ret;
 
 	rcu_read_lock();
-	ret = security_capable_noaudit(__task_cred(t), &init_user_ns, cap);
+	ret = security_capable_noaudit(__task_cred(t), ns, cap);
 	rcu_read_unlock();
 
 	return (ret == 0);
 }
 
+/**
+ * has_capability_noaudit - Does a task have a capability (unaudited) in the
+ * initial user ns
+ * @t: The task in question
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect to init_user_ns, false if not.  Don't write an
+ * audit message for the check.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+bool has_capability_noaudit(struct task_struct *t, int cap)
+{
+	return has_ns_capability_noaudit(t, &init_user_ns, cap);
+}
+
 /**
  * capable - Determine if the current task has a superior capability in effect
  * @cap: The capability to be tested for
-- 
cgit v1.2.3


From f1c84dae0ecc51aa35c81f19a0ebcd6c0921ddcb Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilities: remove task_ns_* functions

task_ in the front of a function, in the security subsystem anyway, means
to me at least, that we are operating with that task as the subject of the
security decision.  In this case what it means is that we are using current as
the subject but we use the task to get the right namespace.  Who in the world
would ever realize that's what task_ns_capability means just by the name?  This
patch eliminates the task_ns functions entirely and uses the has_ns_capability
function instead.  This means we explicitly open code the ns in question in
the caller.  I think it makes the caller a LOT more clear what is going on.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 include/linux/capability.h |  1 -
 include/linux/cred.h       |  6 ++++--
 kernel/capability.c        | 14 --------------
 kernel/ptrace.c            |  4 ++--
 kernel/sched.c             |  2 +-
 5 files changed, 7 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 63f59fa8769d..e3e8d9cb9b08 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -547,7 +547,6 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
 				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
-extern bool task_ns_capable(struct task_struct *t, int cap);
 extern bool nsown_capable(int cap);
 
 /* audit system wants to get cap info from files as well */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 40308969ed00..adadf71a7327 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -358,10 +358,12 @@ static inline void put_cred(const struct cred *_cred)
 #define current_security()	(current_cred_xxx(security))
 
 #ifdef CONFIG_USER_NS
-#define current_user_ns() (current_cred_xxx(user_ns))
+#define current_user_ns()	(current_cred_xxx(user_ns))
+#define task_user_ns(task)	(task_cred_xxx((task), user_ns))
 #else
 extern struct user_namespace init_user_ns;
-#define current_user_ns() (&init_user_ns)
+#define current_user_ns()	(&init_user_ns)
+#define task_user_ns(task)	(&init_user_ns)
 #endif
 
 
diff --git a/kernel/capability.c b/kernel/capability.c
index 47626446c39a..74fb3b603045 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -408,20 +408,6 @@ bool capable(int cap)
 }
 EXPORT_SYMBOL(capable);
 
-/**
- * task_ns_capable - Determine whether current task has a superior
- * capability targeted at a specific task's user namespace.
- * @t: The task whose user namespace is targeted.
- * @cap: The capability in question.
- *
- *  Return true if it does, false otherwise.
- */
-bool task_ns_capable(struct task_struct *t, int cap)
-{
-	return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
-}
-EXPORT_SYMBOL(task_ns_capable);
-
 /**
  * nsown_capable - Check superior capability to one's own user_ns
  * @cap: The capability in question
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a70d2a5d8c7b..210bbf045ee9 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -196,7 +196,7 @@ ok:
 	smp_rmb();
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
-	if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
+	if (!dumpable && !ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
 		return -EPERM;
 
 	return security_ptrace_access_check(task, mode);
@@ -266,7 +266,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 	task->ptrace = PT_PTRACED;
 	if (seize)
 		task->ptrace |= PT_SEIZED;
-	if (task_ns_capable(task, CAP_SYS_PTRACE))
+	if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
 		task->ptrace |= PT_PTRACE_CAP;
 
 	__ptrace_link(task, current);
diff --git a/kernel/sched.c b/kernel/sched.c
index b50b0f0c9aa9..5670028a9c16 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5409,7 +5409,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 		goto out_free_cpus_allowed;
 	}
 	retval = -EPERM;
-	if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
+	if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
 		goto out_unlock;
 
 	retval = security_task_setscheduler(p);
-- 
cgit v1.2.3


From 69f594a38967f4540ce7a29b3fd214e68a8330bd Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: ptrace: do not audit capability check when outputing /proc/pid/stat

Reading /proc/pid/stat of another process checks if one has ptrace permissions
on that process.  If one does have permissions it outputs some data about the
process which might have security and attack implications.  If the current
task does not have ptrace permissions the read still works, but those fields
are filled with inocuous (0) values.  Since this check and a subsequent denial
is not a violation of the security policy we should not audit such denials.

This can be quite useful to removing ptrace broadly across a system without
flooding the logs when ps is run or something which harmlessly walks proc.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 fs/proc/array.c          |  2 +-
 include/linux/ptrace.h   |  5 +++--
 kernel/ptrace.c          | 12 ++++++++++--
 security/selinux/hooks.c |  2 +-
 4 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a1dafd228d1..ddffd7a88b97 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -380,7 +380,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 	state = *get_task_state(task);
 	vsize = eip = esp = 0;
-	permitted = ptrace_may_access(task, PTRACE_MODE_READ);
+	permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT);
 	mm = get_task_mm(task);
 	if (mm) {
 		vsize = task_vsize(mm);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 800f113bea66..a27e56ca41a4 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -127,8 +127,9 @@ extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer);
-#define PTRACE_MODE_READ   1
-#define PTRACE_MODE_ATTACH 2
+#define PTRACE_MODE_READ	0x01
+#define PTRACE_MODE_ATTACH	0x02
+#define PTRACE_MODE_NOAUDIT	0x04
 /* Returns 0 on success, -errno on denial. */
 extern int __ptrace_may_access(struct task_struct *task, unsigned int mode);
 /* Returns true on success, false on denial. */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 210bbf045ee9..c890ac9a7962 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -161,6 +161,14 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 	return ret;
 }
 
+static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
+{
+	if (mode & PTRACE_MODE_NOAUDIT)
+		return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
+	else
+		return has_ns_capability(current, ns, CAP_SYS_PTRACE);
+}
+
 int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
 	const struct cred *cred = current_cred(), *tcred;
@@ -187,7 +195,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	     cred->gid == tcred->sgid &&
 	     cred->gid == tcred->gid))
 		goto ok;
-	if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
+	if (ptrace_has_cap(tcred->user->user_ns, mode))
 		goto ok;
 	rcu_read_unlock();
 	return -EPERM;
@@ -196,7 +204,7 @@ ok:
 	smp_rmb();
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
-	if (!dumpable && !ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
+	if (!dumpable  && !ptrace_has_cap(task_user_ns(task), mode))
 		return -EPERM;
 
 	return security_ptrace_access_check(task, mode);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c9605c4a2e08..14f94cd29c80 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1809,7 +1809,7 @@ static int selinux_ptrace_access_check(struct task_struct *child,
 	if (rc)
 		return rc;
 
-	if (mode == PTRACE_MODE_READ) {
+	if (mode & PTRACE_MODE_READ) {
 		u32 sid = current_sid();
 		u32 csid = task_sid(child);
 		return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL);
-- 
cgit v1.2.3


From fd778461524849afd035679030ae8e8873c72b81 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:16 -0500
Subject: security: remove the security_netlink_recv hook as it is equivalent
 to capable()

Once upon a time netlink was not sync and we had to get the effective
capabilities from the skb that was being received.  Today we instead get
the capabilities from the current task.  This has rendered the entire
purpose of the hook moot as it is now functionally equivalent to the
capable() call.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 drivers/scsi/scsi_netlink.c     |  2 +-
 include/linux/security.h        | 14 --------------
 kernel/audit.c                  |  4 ++--
 net/core/rtnetlink.c            |  2 +-
 net/decnet/netfilter/dn_rtmsg.c |  2 +-
 net/ipv4/netfilter/ip_queue.c   |  2 +-
 net/ipv6/netfilter/ip6_queue.c  |  2 +-
 net/netfilter/nfnetlink.c       |  2 +-
 net/netlink/genetlink.c         |  2 +-
 net/xfrm/xfrm_user.c            |  2 +-
 security/capability.c           |  1 -
 security/commoncap.c            |  8 --------
 security/security.c             |  6 ------
 security/selinux/hooks.c        | 19 -------------------
 14 files changed, 10 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 26a8a45584ef..feee1cc39ea0 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -111,7 +111,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
 			goto next_msg;
 		}
 
-		if (security_netlink_recv(skb, CAP_SYS_ADMIN)) {
+		if (!capable(CAP_SYS_ADMIN)) {
 			err = -EPERM;
 			goto next_msg;
 		}
diff --git a/include/linux/security.h b/include/linux/security.h
index e345a9313a60..ba2d531c123f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -95,7 +95,6 @@ struct xfrm_user_sec_ctx;
 struct seq_file;
 
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
-extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
 void reset_security_ops(void);
 
@@ -792,12 +791,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@skb contains the sk_buff structure for the netlink message.
  *	Return 0 if the information was successfully saved and message
  *	is allowed to be transmitted.
- * @netlink_recv:
- *	Check permission before processing the received netlink message in
- *	@skb.
- *	@skb contains the sk_buff structure for the netlink message.
- *	@cap indicates the capability required
- *	Return 0 if permission is granted.
  *
  * Security hooks for Unix domain networking.
  *
@@ -1556,7 +1549,6 @@ struct security_operations {
 			  struct sembuf *sops, unsigned nsops, int alter);
 
 	int (*netlink_send) (struct sock *sk, struct sk_buff *skb);
-	int (*netlink_recv) (struct sk_buff *skb, int cap);
 
 	void (*d_instantiate) (struct dentry *dentry, struct inode *inode);
 
@@ -1803,7 +1795,6 @@ void security_d_instantiate(struct dentry *dentry, struct inode *inode);
 int security_getprocattr(struct task_struct *p, char *name, char **value);
 int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size);
 int security_netlink_send(struct sock *sk, struct sk_buff *skb);
-int security_netlink_recv(struct sk_buff *skb, int cap);
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
@@ -2478,11 +2469,6 @@ static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb)
 	return cap_netlink_send(sk, skb);
 }
 
-static inline int security_netlink_recv(struct sk_buff *skb, int cap)
-{
-	return cap_netlink_recv(skb, cap);
-}
-
 static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
 {
 	return -EOPNOTSUPP;
diff --git a/kernel/audit.c b/kernel/audit.c
index 0a1355ca3d79..f3ba55fa0b70 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -601,13 +601,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
 	case AUDIT_TTY_SET:
 	case AUDIT_TRIM:
 	case AUDIT_MAKE_EQUIV:
-		if (security_netlink_recv(skb, CAP_AUDIT_CONTROL))
+		if (!capable(CAP_AUDIT_CONTROL))
 			err = -EPERM;
 		break;
 	case AUDIT_USER:
 	case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
 	case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
-		if (security_netlink_recv(skb, CAP_AUDIT_WRITE))
+		if (!capable(CAP_AUDIT_WRITE))
 			err = -EPERM;
 		break;
 	default:  /* bad msg */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 99d9e953fe39..d3a628196716 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1931,7 +1931,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	sz_idx = type>>2;
 	kind = type&3;
 
-	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (kind != 2 && !capable(CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 69975e0bcdea..1531135130db 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -108,7 +108,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 	if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
 		return;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
 	/* Eventually we might send routing messages too */
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e59aabd0eae4..ffabb2674718 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -430,7 +430,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
 	if (type <= IPQM_BASE)
 		return;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
 	spin_lock_bh(&queue_lock);
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index e63c3972a739..5e5ce778be7f 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -431,7 +431,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
 	if (type <= IPQM_BASE)
 		return;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
 	spin_lock_bh(&queue_lock);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 1905976b5135..e6c2b8f32180 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -130,7 +130,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	const struct nfnetlink_subsystem *ss;
 	int type, err;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
 	/* All the messages must at least contain nfgenmsg */
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 482fa571b4ee..05fedbf489a5 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -516,7 +516,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return -EOPNOTSUPP;
 
 	if ((ops->flags & GENL_ADMIN_PERM) &&
-	    security_netlink_recv(skb, CAP_NET_ADMIN))
+	    !capable(CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0256b8a0a7cf..71de86698efa 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2290,7 +2290,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	link = &xfrm_dispatch[type];
 
 	/* All operations require privileges, even GET */
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
 	if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
diff --git a/security/capability.c b/security/capability.c
index 2984ea4f776f..a2c064d10448 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -999,7 +999,6 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, sem_semctl);
 	set_to_cap_if_null(ops, sem_semop);
 	set_to_cap_if_null(ops, netlink_send);
-	set_to_cap_if_null(ops, netlink_recv);
 	set_to_cap_if_null(ops, d_instantiate);
 	set_to_cap_if_null(ops, getprocattr);
 	set_to_cap_if_null(ops, setprocattr);
diff --git a/security/commoncap.c b/security/commoncap.c
index 89f02ff66af9..7817a763444d 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -56,14 +56,6 @@ int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-int cap_netlink_recv(struct sk_buff *skb, int cap)
-{
-	if (!cap_raised(current_cap(), cap))
-		return -EPERM;
-	return 0;
-}
-EXPORT_SYMBOL(cap_netlink_recv);
-
 /**
  * cap_capable - Determine whether a task has a particular effective capability
  * @cred: The credentials to use
diff --git a/security/security.c b/security/security.c
index 8900c5c4db5c..85481a9c5632 100644
--- a/security/security.c
+++ b/security/security.c
@@ -922,12 +922,6 @@ int security_netlink_send(struct sock *sk, struct sk_buff *skb)
 	return security_ops->netlink_send(sk, skb);
 }
 
-int security_netlink_recv(struct sk_buff *skb, int cap)
-{
-	return security_ops->netlink_recv(skb, cap);
-}
-EXPORT_SYMBOL(security_netlink_recv);
-
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
 {
 	return security_ops->secid_to_secctx(secid, secdata, seclen);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 14f94cd29c80..3e37d25a9bbe 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4713,24 +4713,6 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
 	return selinux_nlmsg_perm(sk, skb);
 }
 
-static int selinux_netlink_recv(struct sk_buff *skb, int capability)
-{
-	int err;
-	struct common_audit_data ad;
-	u32 sid;
-
-	err = cap_netlink_recv(skb, capability);
-	if (err)
-		return err;
-
-	COMMON_AUDIT_DATA_INIT(&ad, CAP);
-	ad.u.cap = capability;
-
-	security_task_getsecid(current, &sid);
-	return avc_has_perm(sid, sid, SECCLASS_CAPABILITY,
-			    CAP_TO_MASK(capability), &ad);
-}
-
 static int ipc_alloc_security(struct task_struct *task,
 			      struct kern_ipc_perm *perm,
 			      u16 sclass)
@@ -5459,7 +5441,6 @@ static struct security_operations selinux_ops = {
 	.vm_enough_memory =		selinux_vm_enough_memory,
 
 	.netlink_send =			selinux_netlink_send,
-	.netlink_recv =			selinux_netlink_recv,
 
 	.bprm_set_creds =		selinux_bprm_set_creds,
 	.bprm_committing_creds =	selinux_bprm_committing_creds,
-- 
cgit v1.2.3


From f423e5ba76e7e4a6fcb4836b4f072d1fdebba8b5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:16 -0500
Subject: capabilities: remove __cap_full_set definition

In 5163b583a036b103c3cec7171d6731c125773ed6 I removed __cap_full_set but
forgot to remove it from a header.  Do that.

Reported-by: Kornilios Kourtis <kkourt@cslab.ece.ntua.gr>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/capability.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index e3e8d9cb9b08..d527b2880331 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -379,7 +379,6 @@ struct user_namespace;
 struct user_namespace *current_user_ns(void);
 
 extern const kernel_cap_t __cap_empty_set;
-extern const kernel_cap_t __cap_full_set;
 extern const kernel_cap_t __cap_init_eff_set;
 
 /*
-- 
cgit v1.2.3


From c78f2b64963654419a8cd3b7e264251860e9f9eb Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Mon, 5 Dec 2011 17:50:45 -0800
Subject: bq20z75: Rename to sbs-battery

This driver for the bq20z75 implemented the register spec defined
by the SBS standard. As this is not unique to this the TI part this
was originally written for, we can generalize this driver to
show its support for any SBS compliant battery.

Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/Kconfig             |   8 +-
 drivers/power/Makefile            |   2 +-
 drivers/power/bq20z75.c           | 871 --------------------------------------
 drivers/power/sbs-battery.c       | 871 ++++++++++++++++++++++++++++++++++++++
 include/linux/power/bq20z75.h     |  42 --
 include/linux/power/sbs-battery.h |  42 ++
 6 files changed, 918 insertions(+), 918 deletions(-)
 delete mode 100644 drivers/power/bq20z75.c
 create mode 100644 drivers/power/sbs-battery.c
 delete mode 100644 include/linux/power/bq20z75.h
 create mode 100644 include/linux/power/sbs-battery.h

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 3bd2ed86fea2..e24485f35384 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -116,12 +116,12 @@ config BATTERY_WM97XX
 	help
 	  Say Y to enable support for battery measured by WM97xx aux port.
 
-config BATTERY_BQ20Z75
-        tristate "TI BQ20z75 gas gauge"
+config BATTERY_SBS
+        tristate "SBS Compliant gas gauge"
         depends on I2C
         help
-         Say Y to include support for TI BQ20z75 SBS-compliant
-         gas gauge and protection IC.
+	  Say Y to include support for SBS battery driver for SBS-compliant
+	  gas gauges.
 
 config BATTERY_BQ27x00
 	tristate "BQ27x00 battery driver"
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 9a78b1dd570b..9c3bbf76a2bf 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_BATTERY_OLPC)	+= olpc_battery.o
 obj-$(CONFIG_BATTERY_TOSA)	+= tosa_battery.o
 obj-$(CONFIG_BATTERY_COLLIE)	+= collie_battery.o
 obj-$(CONFIG_BATTERY_WM97XX)	+= wm97xx_battery.o
-obj-$(CONFIG_BATTERY_BQ20Z75)	+= bq20z75.o
+obj-$(CONFIG_BATTERY_SBS)	+= sbs-battery.o
 obj-$(CONFIG_BATTERY_BQ27x00)	+= bq27x00_battery.o
 obj-$(CONFIG_BATTERY_DA9030)	+= da9030_battery.o
 obj-$(CONFIG_BATTERY_MAX17040)	+= max17040_battery.o
diff --git a/drivers/power/bq20z75.c b/drivers/power/bq20z75.c
deleted file mode 100644
index ce95ff791016..000000000000
--- a/drivers/power/bq20z75.c
+++ /dev/null
@@ -1,871 +0,0 @@
-/*
- * Gas Gauge driver for TI's BQ20Z75
- *
- * Copyright (c) 2010, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/power_supply.h>
-#include <linux/i2c.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/gpio.h>
-
-#include <linux/power/bq20z75.h>
-
-enum {
-	REG_MANUFACTURER_DATA,
-	REG_TEMPERATURE,
-	REG_VOLTAGE,
-	REG_CURRENT,
-	REG_CAPACITY,
-	REG_TIME_TO_EMPTY,
-	REG_TIME_TO_FULL,
-	REG_STATUS,
-	REG_CYCLE_COUNT,
-	REG_SERIAL_NUMBER,
-	REG_REMAINING_CAPACITY,
-	REG_REMAINING_CAPACITY_CHARGE,
-	REG_FULL_CHARGE_CAPACITY,
-	REG_FULL_CHARGE_CAPACITY_CHARGE,
-	REG_DESIGN_CAPACITY,
-	REG_DESIGN_CAPACITY_CHARGE,
-	REG_DESIGN_VOLTAGE,
-};
-
-/* Battery Mode defines */
-#define BATTERY_MODE_OFFSET		0x03
-#define BATTERY_MODE_MASK		0x8000
-enum bq20z75_battery_mode {
-	BATTERY_MODE_AMPS,
-	BATTERY_MODE_WATTS
-};
-
-/* manufacturer access defines */
-#define MANUFACTURER_ACCESS_STATUS	0x0006
-#define MANUFACTURER_ACCESS_SLEEP	0x0011
-
-/* battery status value bits */
-#define BATTERY_DISCHARGING		0x40
-#define BATTERY_FULL_CHARGED		0x20
-#define BATTERY_FULL_DISCHARGED		0x10
-
-#define BQ20Z75_DATA(_psp, _addr, _min_value, _max_value) { \
-	.psp = _psp, \
-	.addr = _addr, \
-	.min_value = _min_value, \
-	.max_value = _max_value, \
-}
-
-static const struct bq20z75_device_data {
-	enum power_supply_property psp;
-	u8 addr;
-	int min_value;
-	int max_value;
-} bq20z75_data[] = {
-	[REG_MANUFACTURER_DATA] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_PRESENT, 0x00, 0, 65535),
-	[REG_TEMPERATURE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TEMP, 0x08, 0, 65535),
-	[REG_VOLTAGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 20000),
-	[REG_CURRENT] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CURRENT_NOW, 0x0A, -32768,
-			32767),
-	[REG_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CAPACITY, 0x0E, 0, 100),
-	[REG_REMAINING_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_NOW, 0x0F, 0, 65535),
-	[REG_REMAINING_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_NOW, 0x0F, 0, 65535),
-	[REG_FULL_CHARGE_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
-	[REG_FULL_CHARGE_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
-	[REG_TIME_TO_EMPTY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0,
-			65535),
-	[REG_TIME_TO_FULL] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, 0x13, 0,
-			65535),
-	[REG_STATUS] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_STATUS, 0x16, 0, 65535),
-	[REG_CYCLE_COUNT] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CYCLE_COUNT, 0x17, 0, 65535),
-	[REG_DESIGN_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, 0x18, 0,
-			65535),
-	[REG_DESIGN_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, 0x18, 0,
-			65535),
-	[REG_DESIGN_VOLTAGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, 0x19, 0,
-			65535),
-	[REG_SERIAL_NUMBER] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_SERIAL_NUMBER, 0x1C, 0, 65535),
-};
-
-static enum power_supply_property bq20z75_properties[] = {
-	POWER_SUPPLY_PROP_STATUS,
-	POWER_SUPPLY_PROP_HEALTH,
-	POWER_SUPPLY_PROP_PRESENT,
-	POWER_SUPPLY_PROP_TECHNOLOGY,
-	POWER_SUPPLY_PROP_CYCLE_COUNT,
-	POWER_SUPPLY_PROP_VOLTAGE_NOW,
-	POWER_SUPPLY_PROP_CURRENT_NOW,
-	POWER_SUPPLY_PROP_CAPACITY,
-	POWER_SUPPLY_PROP_TEMP,
-	POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
-	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
-	POWER_SUPPLY_PROP_SERIAL_NUMBER,
-	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
-	POWER_SUPPLY_PROP_ENERGY_NOW,
-	POWER_SUPPLY_PROP_ENERGY_FULL,
-	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
-	POWER_SUPPLY_PROP_CHARGE_NOW,
-	POWER_SUPPLY_PROP_CHARGE_FULL,
-	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
-};
-
-struct bq20z75_info {
-	struct i2c_client		*client;
-	struct power_supply		power_supply;
-	struct bq20z75_platform_data	*pdata;
-	bool				is_present;
-	bool				gpio_detect;
-	bool				enable_detection;
-	int				irq;
-	int				last_state;
-	int				poll_time;
-	struct delayed_work		work;
-	int				ignore_changes;
-};
-
-static int bq20z75_read_word_data(struct i2c_client *client, u8 address)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-	s32 ret = 0;
-	int retries = 1;
-
-	if (bq20z75_device->pdata)
-		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
-
-	while (retries > 0) {
-		ret = i2c_smbus_read_word_data(client, address);
-		if (ret >= 0)
-			break;
-		retries--;
-	}
-
-	if (ret < 0) {
-		dev_dbg(&client->dev,
-			"%s: i2c read at address 0x%x failed\n",
-			__func__, address);
-		return ret;
-	}
-
-	return le16_to_cpu(ret);
-}
-
-static int bq20z75_write_word_data(struct i2c_client *client, u8 address,
-	u16 value)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-	s32 ret = 0;
-	int retries = 1;
-
-	if (bq20z75_device->pdata)
-		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
-
-	while (retries > 0) {
-		ret = i2c_smbus_write_word_data(client, address,
-			le16_to_cpu(value));
-		if (ret >= 0)
-			break;
-		retries--;
-	}
-
-	if (ret < 0) {
-		dev_dbg(&client->dev,
-			"%s: i2c write to address 0x%x failed\n",
-			__func__, address);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int bq20z75_get_battery_presence_and_health(
-	struct i2c_client *client, enum power_supply_property psp,
-	union power_supply_propval *val)
-{
-	s32 ret;
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-
-	if (psp == POWER_SUPPLY_PROP_PRESENT &&
-		bq20z75_device->gpio_detect) {
-		ret = gpio_get_value(
-			bq20z75_device->pdata->battery_detect);
-		if (ret == bq20z75_device->pdata->battery_detect_present)
-			val->intval = 1;
-		else
-			val->intval = 0;
-		bq20z75_device->is_present = val->intval;
-		return ret;
-	}
-
-	/* Write to ManufacturerAccess with
-	 * ManufacturerAccess command and then
-	 * read the status */
-	ret = bq20z75_write_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr,
-		MANUFACTURER_ACCESS_STATUS);
-	if (ret < 0) {
-		if (psp == POWER_SUPPLY_PROP_PRESENT)
-			val->intval = 0; /* battery removed */
-		return ret;
-	}
-
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr);
-	if (ret < 0)
-		return ret;
-
-	if (ret < bq20z75_data[REG_MANUFACTURER_DATA].min_value ||
-	    ret > bq20z75_data[REG_MANUFACTURER_DATA].max_value) {
-		val->intval = 0;
-		return 0;
-	}
-
-	/* Mask the upper nibble of 2nd byte and
-	 * lower byte of response then
-	 * shift the result by 8 to get status*/
-	ret &= 0x0F00;
-	ret >>= 8;
-	if (psp == POWER_SUPPLY_PROP_PRESENT) {
-		if (ret == 0x0F)
-			/* battery removed */
-			val->intval = 0;
-		else
-			val->intval = 1;
-	} else if (psp == POWER_SUPPLY_PROP_HEALTH) {
-		if (ret == 0x09)
-			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
-		else if (ret == 0x0B)
-			val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
-		else if (ret == 0x0C)
-			val->intval = POWER_SUPPLY_HEALTH_DEAD;
-		else
-			val->intval = POWER_SUPPLY_HEALTH_GOOD;
-	}
-
-	return 0;
-}
-
-static int bq20z75_get_battery_property(struct i2c_client *client,
-	int reg_offset, enum power_supply_property psp,
-	union power_supply_propval *val)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-	s32 ret;
-
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[reg_offset].addr);
-	if (ret < 0)
-		return ret;
-
-	/* returned values are 16 bit */
-	if (bq20z75_data[reg_offset].min_value < 0)
-		ret = (s16)ret;
-
-	if (ret >= bq20z75_data[reg_offset].min_value &&
-	    ret <= bq20z75_data[reg_offset].max_value) {
-		val->intval = ret;
-		if (psp != POWER_SUPPLY_PROP_STATUS)
-			return 0;
-
-		if (ret & BATTERY_FULL_CHARGED)
-			val->intval = POWER_SUPPLY_STATUS_FULL;
-		else if (ret & BATTERY_FULL_DISCHARGED)
-			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
-		else if (ret & BATTERY_DISCHARGING)
-			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
-		else
-			val->intval = POWER_SUPPLY_STATUS_CHARGING;
-
-		if (bq20z75_device->poll_time == 0)
-			bq20z75_device->last_state = val->intval;
-		else if (bq20z75_device->last_state != val->intval) {
-			cancel_delayed_work_sync(&bq20z75_device->work);
-			power_supply_changed(&bq20z75_device->power_supply);
-			bq20z75_device->poll_time = 0;
-		}
-	} else {
-		if (psp == POWER_SUPPLY_PROP_STATUS)
-			val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
-		else
-			val->intval = 0;
-	}
-
-	return 0;
-}
-
-static void  bq20z75_unit_adjustment(struct i2c_client *client,
-	enum power_supply_property psp, union power_supply_propval *val)
-{
-#define BASE_UNIT_CONVERSION		1000
-#define BATTERY_MODE_CAP_MULT_WATT	(10 * BASE_UNIT_CONVERSION)
-#define TIME_UNIT_CONVERSION		60
-#define TEMP_KELVIN_TO_CELSIUS		2731
-	switch (psp) {
-	case POWER_SUPPLY_PROP_ENERGY_NOW:
-	case POWER_SUPPLY_PROP_ENERGY_FULL:
-	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
-		/* bq20z75 provides energy in units of 10mWh.
-		 * Convert to µWh
-		 */
-		val->intval *= BATTERY_MODE_CAP_MULT_WATT;
-		break;
-
-	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-	case POWER_SUPPLY_PROP_CURRENT_NOW:
-	case POWER_SUPPLY_PROP_CHARGE_NOW:
-	case POWER_SUPPLY_PROP_CHARGE_FULL:
-	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
-		val->intval *= BASE_UNIT_CONVERSION;
-		break;
-
-	case POWER_SUPPLY_PROP_TEMP:
-		/* bq20z75 provides battery temperature in 0.1K
-		 * so convert it to 0.1°C
-		 */
-		val->intval -= TEMP_KELVIN_TO_CELSIUS;
-		break;
-
-	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
-	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
-		/* bq20z75 provides time to empty and time to full in minutes.
-		 * Convert to seconds
-		 */
-		val->intval *= TIME_UNIT_CONVERSION;
-		break;
-
-	default:
-		dev_dbg(&client->dev,
-			"%s: no need for unit conversion %d\n", __func__, psp);
-	}
-}
-
-static enum bq20z75_battery_mode
-bq20z75_set_battery_mode(struct i2c_client *client,
-	enum bq20z75_battery_mode mode)
-{
-	int ret, original_val;
-
-	original_val = bq20z75_read_word_data(client, BATTERY_MODE_OFFSET);
-	if (original_val < 0)
-		return original_val;
-
-	if ((original_val & BATTERY_MODE_MASK) == mode)
-		return mode;
-
-	if (mode == BATTERY_MODE_AMPS)
-		ret = original_val & ~BATTERY_MODE_MASK;
-	else
-		ret = original_val | BATTERY_MODE_MASK;
-
-	ret = bq20z75_write_word_data(client, BATTERY_MODE_OFFSET, ret);
-	if (ret < 0)
-		return ret;
-
-	return original_val & BATTERY_MODE_MASK;
-}
-
-static int bq20z75_get_battery_capacity(struct i2c_client *client,
-	int reg_offset, enum power_supply_property psp,
-	union power_supply_propval *val)
-{
-	s32 ret;
-	enum bq20z75_battery_mode mode = BATTERY_MODE_WATTS;
-
-	if (power_supply_is_amp_property(psp))
-		mode = BATTERY_MODE_AMPS;
-
-	mode = bq20z75_set_battery_mode(client, mode);
-	if (mode < 0)
-		return mode;
-
-	ret = bq20z75_read_word_data(client, bq20z75_data[reg_offset].addr);
-	if (ret < 0)
-		return ret;
-
-	if (psp == POWER_SUPPLY_PROP_CAPACITY) {
-		/* bq20z75 spec says that this can be >100 %
-		* even if max value is 100 % */
-		val->intval = min(ret, 100);
-	} else
-		val->intval = ret;
-
-	ret = bq20z75_set_battery_mode(client, mode);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static char bq20z75_serial[5];
-static int bq20z75_get_battery_serial_number(struct i2c_client *client,
-	union power_supply_propval *val)
-{
-	int ret;
-
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[REG_SERIAL_NUMBER].addr);
-	if (ret < 0)
-		return ret;
-
-	ret = sprintf(bq20z75_serial, "%04x", ret);
-	val->strval = bq20z75_serial;
-
-	return 0;
-}
-
-static int bq20z75_get_property_index(struct i2c_client *client,
-	enum power_supply_property psp)
-{
-	int count;
-	for (count = 0; count < ARRAY_SIZE(bq20z75_data); count++)
-		if (psp == bq20z75_data[count].psp)
-			return count;
-
-	dev_warn(&client->dev,
-		"%s: Invalid Property - %d\n", __func__, psp);
-
-	return -EINVAL;
-}
-
-static int bq20z75_get_property(struct power_supply *psy,
-	enum power_supply_property psp,
-	union power_supply_propval *val)
-{
-	int ret = 0;
-	struct bq20z75_info *bq20z75_device = container_of(psy,
-				struct bq20z75_info, power_supply);
-	struct i2c_client *client = bq20z75_device->client;
-
-	switch (psp) {
-	case POWER_SUPPLY_PROP_PRESENT:
-	case POWER_SUPPLY_PROP_HEALTH:
-		ret = bq20z75_get_battery_presence_and_health(client, psp, val);
-		if (psp == POWER_SUPPLY_PROP_PRESENT)
-			return 0;
-		break;
-
-	case POWER_SUPPLY_PROP_TECHNOLOGY:
-		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
-		break;
-
-	case POWER_SUPPLY_PROP_ENERGY_NOW:
-	case POWER_SUPPLY_PROP_ENERGY_FULL:
-	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
-	case POWER_SUPPLY_PROP_CHARGE_NOW:
-	case POWER_SUPPLY_PROP_CHARGE_FULL:
-	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
-	case POWER_SUPPLY_PROP_CAPACITY:
-		ret = bq20z75_get_property_index(client, psp);
-		if (ret < 0)
-			break;
-
-		ret = bq20z75_get_battery_capacity(client, ret, psp, val);
-		break;
-
-	case POWER_SUPPLY_PROP_SERIAL_NUMBER:
-		ret = bq20z75_get_battery_serial_number(client, val);
-		break;
-
-	case POWER_SUPPLY_PROP_STATUS:
-	case POWER_SUPPLY_PROP_CYCLE_COUNT:
-	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-	case POWER_SUPPLY_PROP_CURRENT_NOW:
-	case POWER_SUPPLY_PROP_TEMP:
-	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
-	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
-	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-		ret = bq20z75_get_property_index(client, psp);
-		if (ret < 0)
-			break;
-
-		ret = bq20z75_get_battery_property(client, ret, psp, val);
-		break;
-
-	default:
-		dev_err(&client->dev,
-			"%s: INVALID property\n", __func__);
-		return -EINVAL;
-	}
-
-	if (!bq20z75_device->enable_detection)
-		goto done;
-
-	if (!bq20z75_device->gpio_detect &&
-		bq20z75_device->is_present != (ret >= 0)) {
-		bq20z75_device->is_present = (ret >= 0);
-		power_supply_changed(&bq20z75_device->power_supply);
-	}
-
-done:
-	if (!ret) {
-		/* Convert units to match requirements for power supply class */
-		bq20z75_unit_adjustment(client, psp, val);
-	}
-
-	dev_dbg(&client->dev,
-		"%s: property = %d, value = %x\n", __func__, psp, val->intval);
-
-	if (ret && bq20z75_device->is_present)
-		return ret;
-
-	/* battery not present, so return NODATA for properties */
-	if (ret)
-		return -ENODATA;
-
-	return 0;
-}
-
-static irqreturn_t bq20z75_irq(int irq, void *devid)
-{
-	struct power_supply *battery = devid;
-
-	power_supply_changed(battery);
-
-	return IRQ_HANDLED;
-}
-
-static void bq20z75_external_power_changed(struct power_supply *psy)
-{
-	struct bq20z75_info *bq20z75_device;
-
-	bq20z75_device = container_of(psy, struct bq20z75_info, power_supply);
-
-	if (bq20z75_device->ignore_changes > 0) {
-		bq20z75_device->ignore_changes--;
-		return;
-	}
-
-	/* cancel outstanding work */
-	cancel_delayed_work_sync(&bq20z75_device->work);
-
-	schedule_delayed_work(&bq20z75_device->work, HZ);
-	bq20z75_device->poll_time = bq20z75_device->pdata->poll_retry_count;
-}
-
-static void bq20z75_delayed_work(struct work_struct *work)
-{
-	struct bq20z75_info *bq20z75_device;
-	s32 ret;
-
-	bq20z75_device = container_of(work, struct bq20z75_info, work.work);
-
-	ret = bq20z75_read_word_data(bq20z75_device->client,
-				     bq20z75_data[REG_STATUS].addr);
-	/* if the read failed, give up on this work */
-	if (ret < 0) {
-		bq20z75_device->poll_time = 0;
-		return;
-	}
-
-	if (ret & BATTERY_FULL_CHARGED)
-		ret = POWER_SUPPLY_STATUS_FULL;
-	else if (ret & BATTERY_FULL_DISCHARGED)
-		ret = POWER_SUPPLY_STATUS_NOT_CHARGING;
-	else if (ret & BATTERY_DISCHARGING)
-		ret = POWER_SUPPLY_STATUS_DISCHARGING;
-	else
-		ret = POWER_SUPPLY_STATUS_CHARGING;
-
-	if (bq20z75_device->last_state != ret) {
-		bq20z75_device->poll_time = 0;
-		power_supply_changed(&bq20z75_device->power_supply);
-		return;
-	}
-	if (bq20z75_device->poll_time > 0) {
-		schedule_delayed_work(&bq20z75_device->work, HZ);
-		bq20z75_device->poll_time--;
-		return;
-	}
-}
-
-#if defined(CONFIG_OF)
-
-#include <linux/of_device.h>
-#include <linux/of_gpio.h>
-
-static const struct of_device_id bq20z75_dt_ids[] = {
-	{ .compatible = "ti,bq20z75" },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, bq20z75_dt_ids);
-
-static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
-		struct i2c_client *client)
-{
-	struct device_node *of_node = client->dev.of_node;
-	struct bq20z75_platform_data *pdata = client->dev.platform_data;
-	enum of_gpio_flags gpio_flags;
-	int rc;
-	u32 prop;
-
-	/* verify this driver matches this device */
-	if (!of_node)
-		return NULL;
-
-	/* if platform data is set, honor it */
-	if (pdata)
-		return pdata;
-
-	/* first make sure at least one property is set, otherwise
-	 * it won't change behavior from running without pdata.
-	 */
-	if (!of_get_property(of_node, "ti,i2c-retry-count", NULL) &&
-		!of_get_property(of_node, "ti,poll-retry-count", NULL) &&
-		!of_get_property(of_node, "ti,battery-detect-gpios", NULL))
-		goto of_out;
-
-	pdata = devm_kzalloc(&client->dev, sizeof(struct bq20z75_platform_data),
-				GFP_KERNEL);
-	if (!pdata)
-		goto of_out;
-
-	rc = of_property_read_u32(of_node, "ti,i2c-retry-count", &prop);
-	if (!rc)
-		pdata->i2c_retry_count = prop;
-
-	rc = of_property_read_u32(of_node, "ti,poll-retry-count", &prop);
-	if (!rc)
-		pdata->poll_retry_count = prop;
-
-	if (!of_get_property(of_node, "ti,battery-detect-gpios", NULL)) {
-		pdata->battery_detect = -1;
-		goto of_out;
-	}
-
-	pdata->battery_detect = of_get_named_gpio_flags(of_node,
-			"ti,battery-detect-gpios", 0, &gpio_flags);
-
-	if (gpio_flags & OF_GPIO_ACTIVE_LOW)
-		pdata->battery_detect_present = 0;
-	else
-		pdata->battery_detect_present = 1;
-
-of_out:
-	return pdata;
-}
-#else
-#define bq20z75_dt_ids NULL
-static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
-	struct i2c_client *client)
-{
-	return client->dev.platform_data;
-}
-#endif
-
-static int __devinit bq20z75_probe(struct i2c_client *client,
-	const struct i2c_device_id *id)
-{
-	struct bq20z75_info *bq20z75_device;
-	struct bq20z75_platform_data *pdata = client->dev.platform_data;
-	int rc;
-	int irq;
-
-	bq20z75_device = kzalloc(sizeof(struct bq20z75_info), GFP_KERNEL);
-	if (!bq20z75_device)
-		return -ENOMEM;
-
-	bq20z75_device->client = client;
-	bq20z75_device->enable_detection = false;
-	bq20z75_device->gpio_detect = false;
-	bq20z75_device->power_supply.name = "battery";
-	bq20z75_device->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
-	bq20z75_device->power_supply.properties = bq20z75_properties;
-	bq20z75_device->power_supply.num_properties =
-		ARRAY_SIZE(bq20z75_properties);
-	bq20z75_device->power_supply.get_property = bq20z75_get_property;
-	/* ignore first notification of external change, it is generated
-	 * from the power_supply_register call back
-	 */
-	bq20z75_device->ignore_changes = 1;
-	bq20z75_device->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
-	bq20z75_device->power_supply.external_power_changed =
-		bq20z75_external_power_changed;
-
-	pdata = bq20z75_of_populate_pdata(client);
-
-	if (pdata) {
-		bq20z75_device->gpio_detect =
-			gpio_is_valid(pdata->battery_detect);
-		bq20z75_device->pdata = pdata;
-	}
-
-	i2c_set_clientdata(client, bq20z75_device);
-
-	if (!bq20z75_device->gpio_detect)
-		goto skip_gpio;
-
-	rc = gpio_request(pdata->battery_detect, dev_name(&client->dev));
-	if (rc) {
-		dev_warn(&client->dev, "Failed to request gpio: %d\n", rc);
-		bq20z75_device->gpio_detect = false;
-		goto skip_gpio;
-	}
-
-	rc = gpio_direction_input(pdata->battery_detect);
-	if (rc) {
-		dev_warn(&client->dev, "Failed to get gpio as input: %d\n", rc);
-		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
-		goto skip_gpio;
-	}
-
-	irq = gpio_to_irq(pdata->battery_detect);
-	if (irq <= 0) {
-		dev_warn(&client->dev, "Failed to get gpio as irq: %d\n", irq);
-		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
-		goto skip_gpio;
-	}
-
-	rc = request_irq(irq, bq20z75_irq,
-		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-		dev_name(&client->dev), &bq20z75_device->power_supply);
-	if (rc) {
-		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
-		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
-		goto skip_gpio;
-	}
-
-	bq20z75_device->irq = irq;
-
-skip_gpio:
-
-	rc = power_supply_register(&client->dev, &bq20z75_device->power_supply);
-	if (rc) {
-		dev_err(&client->dev,
-			"%s: Failed to register power supply\n", __func__);
-		goto exit_psupply;
-	}
-
-	dev_info(&client->dev,
-		"%s: battery gas gauge device registered\n", client->name);
-
-	INIT_DELAYED_WORK(&bq20z75_device->work, bq20z75_delayed_work);
-
-	bq20z75_device->enable_detection = true;
-
-	return 0;
-
-exit_psupply:
-	if (bq20z75_device->irq)
-		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
-	if (bq20z75_device->gpio_detect)
-		gpio_free(pdata->battery_detect);
-
-	kfree(bq20z75_device);
-
-	return rc;
-}
-
-static int __devexit bq20z75_remove(struct i2c_client *client)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-
-	if (bq20z75_device->irq)
-		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
-	if (bq20z75_device->gpio_detect)
-		gpio_free(bq20z75_device->pdata->battery_detect);
-
-	power_supply_unregister(&bq20z75_device->power_supply);
-
-	cancel_delayed_work_sync(&bq20z75_device->work);
-
-	kfree(bq20z75_device);
-	bq20z75_device = NULL;
-
-	return 0;
-}
-
-#if defined CONFIG_PM
-static int bq20z75_suspend(struct i2c_client *client,
-	pm_message_t state)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-	s32 ret;
-
-	if (bq20z75_device->poll_time > 0)
-		cancel_delayed_work_sync(&bq20z75_device->work);
-
-	/* write to manufacturer access with sleep command */
-	ret = bq20z75_write_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr,
-		MANUFACTURER_ACCESS_SLEEP);
-	if (bq20z75_device->is_present && ret < 0)
-		return ret;
-
-	return 0;
-}
-#else
-#define bq20z75_suspend		NULL
-#endif
-/* any smbus transaction will wake up bq20z75 */
-#define bq20z75_resume		NULL
-
-static const struct i2c_device_id bq20z75_id[] = {
-	{ "bq20z75", 0 },
-	{}
-};
-MODULE_DEVICE_TABLE(i2c, bq20z75_id);
-
-static struct i2c_driver bq20z75_battery_driver = {
-	.probe		= bq20z75_probe,
-	.remove		= __devexit_p(bq20z75_remove),
-	.suspend	= bq20z75_suspend,
-	.resume		= bq20z75_resume,
-	.id_table	= bq20z75_id,
-	.driver = {
-		.name	= "bq20z75-battery",
-		.of_match_table = bq20z75_dt_ids,
-	},
-};
-
-static int __init bq20z75_battery_init(void)
-{
-	return i2c_add_driver(&bq20z75_battery_driver);
-}
-module_init(bq20z75_battery_init);
-
-static void __exit bq20z75_battery_exit(void)
-{
-	i2c_del_driver(&bq20z75_battery_driver);
-}
-module_exit(bq20z75_battery_exit);
-
-MODULE_DESCRIPTION("BQ20z75 battery monitor driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/power/sbs-battery.c b/drivers/power/sbs-battery.c
new file mode 100644
index 000000000000..ce95ff791016
--- /dev/null
+++ b/drivers/power/sbs-battery.c
@@ -0,0 +1,871 @@
+/*
+ * Gas Gauge driver for TI's BQ20Z75
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/power_supply.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+
+#include <linux/power/bq20z75.h>
+
+enum {
+	REG_MANUFACTURER_DATA,
+	REG_TEMPERATURE,
+	REG_VOLTAGE,
+	REG_CURRENT,
+	REG_CAPACITY,
+	REG_TIME_TO_EMPTY,
+	REG_TIME_TO_FULL,
+	REG_STATUS,
+	REG_CYCLE_COUNT,
+	REG_SERIAL_NUMBER,
+	REG_REMAINING_CAPACITY,
+	REG_REMAINING_CAPACITY_CHARGE,
+	REG_FULL_CHARGE_CAPACITY,
+	REG_FULL_CHARGE_CAPACITY_CHARGE,
+	REG_DESIGN_CAPACITY,
+	REG_DESIGN_CAPACITY_CHARGE,
+	REG_DESIGN_VOLTAGE,
+};
+
+/* Battery Mode defines */
+#define BATTERY_MODE_OFFSET		0x03
+#define BATTERY_MODE_MASK		0x8000
+enum bq20z75_battery_mode {
+	BATTERY_MODE_AMPS,
+	BATTERY_MODE_WATTS
+};
+
+/* manufacturer access defines */
+#define MANUFACTURER_ACCESS_STATUS	0x0006
+#define MANUFACTURER_ACCESS_SLEEP	0x0011
+
+/* battery status value bits */
+#define BATTERY_DISCHARGING		0x40
+#define BATTERY_FULL_CHARGED		0x20
+#define BATTERY_FULL_DISCHARGED		0x10
+
+#define BQ20Z75_DATA(_psp, _addr, _min_value, _max_value) { \
+	.psp = _psp, \
+	.addr = _addr, \
+	.min_value = _min_value, \
+	.max_value = _max_value, \
+}
+
+static const struct bq20z75_device_data {
+	enum power_supply_property psp;
+	u8 addr;
+	int min_value;
+	int max_value;
+} bq20z75_data[] = {
+	[REG_MANUFACTURER_DATA] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_PRESENT, 0x00, 0, 65535),
+	[REG_TEMPERATURE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_TEMP, 0x08, 0, 65535),
+	[REG_VOLTAGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 20000),
+	[REG_CURRENT] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CURRENT_NOW, 0x0A, -32768,
+			32767),
+	[REG_CAPACITY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CAPACITY, 0x0E, 0, 100),
+	[REG_REMAINING_CAPACITY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_NOW, 0x0F, 0, 65535),
+	[REG_REMAINING_CAPACITY_CHARGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_NOW, 0x0F, 0, 65535),
+	[REG_FULL_CHARGE_CAPACITY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
+	[REG_FULL_CHARGE_CAPACITY_CHARGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
+	[REG_TIME_TO_EMPTY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0,
+			65535),
+	[REG_TIME_TO_FULL] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, 0x13, 0,
+			65535),
+	[REG_STATUS] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_STATUS, 0x16, 0, 65535),
+	[REG_CYCLE_COUNT] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CYCLE_COUNT, 0x17, 0, 65535),
+	[REG_DESIGN_CAPACITY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, 0x18, 0,
+			65535),
+	[REG_DESIGN_CAPACITY_CHARGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, 0x18, 0,
+			65535),
+	[REG_DESIGN_VOLTAGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, 0x19, 0,
+			65535),
+	[REG_SERIAL_NUMBER] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_SERIAL_NUMBER, 0x1C, 0, 65535),
+};
+
+static enum power_supply_property bq20z75_properties[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
+	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
+	POWER_SUPPLY_PROP_SERIAL_NUMBER,
+	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+	POWER_SUPPLY_PROP_ENERGY_NOW,
+	POWER_SUPPLY_PROP_ENERGY_FULL,
+	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+};
+
+struct bq20z75_info {
+	struct i2c_client		*client;
+	struct power_supply		power_supply;
+	struct bq20z75_platform_data	*pdata;
+	bool				is_present;
+	bool				gpio_detect;
+	bool				enable_detection;
+	int				irq;
+	int				last_state;
+	int				poll_time;
+	struct delayed_work		work;
+	int				ignore_changes;
+};
+
+static int bq20z75_read_word_data(struct i2c_client *client, u8 address)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret = 0;
+	int retries = 1;
+
+	if (bq20z75_device->pdata)
+		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+
+	while (retries > 0) {
+		ret = i2c_smbus_read_word_data(client, address);
+		if (ret >= 0)
+			break;
+		retries--;
+	}
+
+	if (ret < 0) {
+		dev_dbg(&client->dev,
+			"%s: i2c read at address 0x%x failed\n",
+			__func__, address);
+		return ret;
+	}
+
+	return le16_to_cpu(ret);
+}
+
+static int bq20z75_write_word_data(struct i2c_client *client, u8 address,
+	u16 value)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret = 0;
+	int retries = 1;
+
+	if (bq20z75_device->pdata)
+		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+
+	while (retries > 0) {
+		ret = i2c_smbus_write_word_data(client, address,
+			le16_to_cpu(value));
+		if (ret >= 0)
+			break;
+		retries--;
+	}
+
+	if (ret < 0) {
+		dev_dbg(&client->dev,
+			"%s: i2c write to address 0x%x failed\n",
+			__func__, address);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int bq20z75_get_battery_presence_and_health(
+	struct i2c_client *client, enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	s32 ret;
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+
+	if (psp == POWER_SUPPLY_PROP_PRESENT &&
+		bq20z75_device->gpio_detect) {
+		ret = gpio_get_value(
+			bq20z75_device->pdata->battery_detect);
+		if (ret == bq20z75_device->pdata->battery_detect_present)
+			val->intval = 1;
+		else
+			val->intval = 0;
+		bq20z75_device->is_present = val->intval;
+		return ret;
+	}
+
+	/* Write to ManufacturerAccess with
+	 * ManufacturerAccess command and then
+	 * read the status */
+	ret = bq20z75_write_word_data(client,
+		bq20z75_data[REG_MANUFACTURER_DATA].addr,
+		MANUFACTURER_ACCESS_STATUS);
+	if (ret < 0) {
+		if (psp == POWER_SUPPLY_PROP_PRESENT)
+			val->intval = 0; /* battery removed */
+		return ret;
+	}
+
+	ret = bq20z75_read_word_data(client,
+		bq20z75_data[REG_MANUFACTURER_DATA].addr);
+	if (ret < 0)
+		return ret;
+
+	if (ret < bq20z75_data[REG_MANUFACTURER_DATA].min_value ||
+	    ret > bq20z75_data[REG_MANUFACTURER_DATA].max_value) {
+		val->intval = 0;
+		return 0;
+	}
+
+	/* Mask the upper nibble of 2nd byte and
+	 * lower byte of response then
+	 * shift the result by 8 to get status*/
+	ret &= 0x0F00;
+	ret >>= 8;
+	if (psp == POWER_SUPPLY_PROP_PRESENT) {
+		if (ret == 0x0F)
+			/* battery removed */
+			val->intval = 0;
+		else
+			val->intval = 1;
+	} else if (psp == POWER_SUPPLY_PROP_HEALTH) {
+		if (ret == 0x09)
+			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		else if (ret == 0x0B)
+			val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+		else if (ret == 0x0C)
+			val->intval = POWER_SUPPLY_HEALTH_DEAD;
+		else
+			val->intval = POWER_SUPPLY_HEALTH_GOOD;
+	}
+
+	return 0;
+}
+
+static int bq20z75_get_battery_property(struct i2c_client *client,
+	int reg_offset, enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret;
+
+	ret = bq20z75_read_word_data(client,
+		bq20z75_data[reg_offset].addr);
+	if (ret < 0)
+		return ret;
+
+	/* returned values are 16 bit */
+	if (bq20z75_data[reg_offset].min_value < 0)
+		ret = (s16)ret;
+
+	if (ret >= bq20z75_data[reg_offset].min_value &&
+	    ret <= bq20z75_data[reg_offset].max_value) {
+		val->intval = ret;
+		if (psp != POWER_SUPPLY_PROP_STATUS)
+			return 0;
+
+		if (ret & BATTERY_FULL_CHARGED)
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		else if (ret & BATTERY_FULL_DISCHARGED)
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		else if (ret & BATTERY_DISCHARGING)
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+
+		if (bq20z75_device->poll_time == 0)
+			bq20z75_device->last_state = val->intval;
+		else if (bq20z75_device->last_state != val->intval) {
+			cancel_delayed_work_sync(&bq20z75_device->work);
+			power_supply_changed(&bq20z75_device->power_supply);
+			bq20z75_device->poll_time = 0;
+		}
+	} else {
+		if (psp == POWER_SUPPLY_PROP_STATUS)
+			val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+		else
+			val->intval = 0;
+	}
+
+	return 0;
+}
+
+static void  bq20z75_unit_adjustment(struct i2c_client *client,
+	enum power_supply_property psp, union power_supply_propval *val)
+{
+#define BASE_UNIT_CONVERSION		1000
+#define BATTERY_MODE_CAP_MULT_WATT	(10 * BASE_UNIT_CONVERSION)
+#define TIME_UNIT_CONVERSION		60
+#define TEMP_KELVIN_TO_CELSIUS		2731
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+	case POWER_SUPPLY_PROP_ENERGY_FULL:
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+		/* bq20z75 provides energy in units of 10mWh.
+		 * Convert to µWh
+		 */
+		val->intval *= BATTERY_MODE_CAP_MULT_WATT;
+		break;
+
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+		val->intval *= BASE_UNIT_CONVERSION;
+		break;
+
+	case POWER_SUPPLY_PROP_TEMP:
+		/* bq20z75 provides battery temperature in 0.1K
+		 * so convert it to 0.1°C
+		 */
+		val->intval -= TEMP_KELVIN_TO_CELSIUS;
+		break;
+
+	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
+	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
+		/* bq20z75 provides time to empty and time to full in minutes.
+		 * Convert to seconds
+		 */
+		val->intval *= TIME_UNIT_CONVERSION;
+		break;
+
+	default:
+		dev_dbg(&client->dev,
+			"%s: no need for unit conversion %d\n", __func__, psp);
+	}
+}
+
+static enum bq20z75_battery_mode
+bq20z75_set_battery_mode(struct i2c_client *client,
+	enum bq20z75_battery_mode mode)
+{
+	int ret, original_val;
+
+	original_val = bq20z75_read_word_data(client, BATTERY_MODE_OFFSET);
+	if (original_val < 0)
+		return original_val;
+
+	if ((original_val & BATTERY_MODE_MASK) == mode)
+		return mode;
+
+	if (mode == BATTERY_MODE_AMPS)
+		ret = original_val & ~BATTERY_MODE_MASK;
+	else
+		ret = original_val | BATTERY_MODE_MASK;
+
+	ret = bq20z75_write_word_data(client, BATTERY_MODE_OFFSET, ret);
+	if (ret < 0)
+		return ret;
+
+	return original_val & BATTERY_MODE_MASK;
+}
+
+static int bq20z75_get_battery_capacity(struct i2c_client *client,
+	int reg_offset, enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	s32 ret;
+	enum bq20z75_battery_mode mode = BATTERY_MODE_WATTS;
+
+	if (power_supply_is_amp_property(psp))
+		mode = BATTERY_MODE_AMPS;
+
+	mode = bq20z75_set_battery_mode(client, mode);
+	if (mode < 0)
+		return mode;
+
+	ret = bq20z75_read_word_data(client, bq20z75_data[reg_offset].addr);
+	if (ret < 0)
+		return ret;
+
+	if (psp == POWER_SUPPLY_PROP_CAPACITY) {
+		/* bq20z75 spec says that this can be >100 %
+		* even if max value is 100 % */
+		val->intval = min(ret, 100);
+	} else
+		val->intval = ret;
+
+	ret = bq20z75_set_battery_mode(client, mode);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static char bq20z75_serial[5];
+static int bq20z75_get_battery_serial_number(struct i2c_client *client,
+	union power_supply_propval *val)
+{
+	int ret;
+
+	ret = bq20z75_read_word_data(client,
+		bq20z75_data[REG_SERIAL_NUMBER].addr);
+	if (ret < 0)
+		return ret;
+
+	ret = sprintf(bq20z75_serial, "%04x", ret);
+	val->strval = bq20z75_serial;
+
+	return 0;
+}
+
+static int bq20z75_get_property_index(struct i2c_client *client,
+	enum power_supply_property psp)
+{
+	int count;
+	for (count = 0; count < ARRAY_SIZE(bq20z75_data); count++)
+		if (psp == bq20z75_data[count].psp)
+			return count;
+
+	dev_warn(&client->dev,
+		"%s: Invalid Property - %d\n", __func__, psp);
+
+	return -EINVAL;
+}
+
+static int bq20z75_get_property(struct power_supply *psy,
+	enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	int ret = 0;
+	struct bq20z75_info *bq20z75_device = container_of(psy,
+				struct bq20z75_info, power_supply);
+	struct i2c_client *client = bq20z75_device->client;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_PRESENT:
+	case POWER_SUPPLY_PROP_HEALTH:
+		ret = bq20z75_get_battery_presence_and_health(client, psp, val);
+		if (psp == POWER_SUPPLY_PROP_PRESENT)
+			return 0;
+		break;
+
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+		break;
+
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+	case POWER_SUPPLY_PROP_ENERGY_FULL:
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_CAPACITY:
+		ret = bq20z75_get_property_index(client, psp);
+		if (ret < 0)
+			break;
+
+		ret = bq20z75_get_battery_capacity(client, ret, psp, val);
+		break;
+
+	case POWER_SUPPLY_PROP_SERIAL_NUMBER:
+		ret = bq20z75_get_battery_serial_number(client, val);
+		break;
+
+	case POWER_SUPPLY_PROP_STATUS:
+	case POWER_SUPPLY_PROP_CYCLE_COUNT:
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+	case POWER_SUPPLY_PROP_TEMP:
+	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
+	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		ret = bq20z75_get_property_index(client, psp);
+		if (ret < 0)
+			break;
+
+		ret = bq20z75_get_battery_property(client, ret, psp, val);
+		break;
+
+	default:
+		dev_err(&client->dev,
+			"%s: INVALID property\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!bq20z75_device->enable_detection)
+		goto done;
+
+	if (!bq20z75_device->gpio_detect &&
+		bq20z75_device->is_present != (ret >= 0)) {
+		bq20z75_device->is_present = (ret >= 0);
+		power_supply_changed(&bq20z75_device->power_supply);
+	}
+
+done:
+	if (!ret) {
+		/* Convert units to match requirements for power supply class */
+		bq20z75_unit_adjustment(client, psp, val);
+	}
+
+	dev_dbg(&client->dev,
+		"%s: property = %d, value = %x\n", __func__, psp, val->intval);
+
+	if (ret && bq20z75_device->is_present)
+		return ret;
+
+	/* battery not present, so return NODATA for properties */
+	if (ret)
+		return -ENODATA;
+
+	return 0;
+}
+
+static irqreturn_t bq20z75_irq(int irq, void *devid)
+{
+	struct power_supply *battery = devid;
+
+	power_supply_changed(battery);
+
+	return IRQ_HANDLED;
+}
+
+static void bq20z75_external_power_changed(struct power_supply *psy)
+{
+	struct bq20z75_info *bq20z75_device;
+
+	bq20z75_device = container_of(psy, struct bq20z75_info, power_supply);
+
+	if (bq20z75_device->ignore_changes > 0) {
+		bq20z75_device->ignore_changes--;
+		return;
+	}
+
+	/* cancel outstanding work */
+	cancel_delayed_work_sync(&bq20z75_device->work);
+
+	schedule_delayed_work(&bq20z75_device->work, HZ);
+	bq20z75_device->poll_time = bq20z75_device->pdata->poll_retry_count;
+}
+
+static void bq20z75_delayed_work(struct work_struct *work)
+{
+	struct bq20z75_info *bq20z75_device;
+	s32 ret;
+
+	bq20z75_device = container_of(work, struct bq20z75_info, work.work);
+
+	ret = bq20z75_read_word_data(bq20z75_device->client,
+				     bq20z75_data[REG_STATUS].addr);
+	/* if the read failed, give up on this work */
+	if (ret < 0) {
+		bq20z75_device->poll_time = 0;
+		return;
+	}
+
+	if (ret & BATTERY_FULL_CHARGED)
+		ret = POWER_SUPPLY_STATUS_FULL;
+	else if (ret & BATTERY_FULL_DISCHARGED)
+		ret = POWER_SUPPLY_STATUS_NOT_CHARGING;
+	else if (ret & BATTERY_DISCHARGING)
+		ret = POWER_SUPPLY_STATUS_DISCHARGING;
+	else
+		ret = POWER_SUPPLY_STATUS_CHARGING;
+
+	if (bq20z75_device->last_state != ret) {
+		bq20z75_device->poll_time = 0;
+		power_supply_changed(&bq20z75_device->power_supply);
+		return;
+	}
+	if (bq20z75_device->poll_time > 0) {
+		schedule_delayed_work(&bq20z75_device->work, HZ);
+		bq20z75_device->poll_time--;
+		return;
+	}
+}
+
+#if defined(CONFIG_OF)
+
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+
+static const struct of_device_id bq20z75_dt_ids[] = {
+	{ .compatible = "ti,bq20z75" },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, bq20z75_dt_ids);
+
+static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
+		struct i2c_client *client)
+{
+	struct device_node *of_node = client->dev.of_node;
+	struct bq20z75_platform_data *pdata = client->dev.platform_data;
+	enum of_gpio_flags gpio_flags;
+	int rc;
+	u32 prop;
+
+	/* verify this driver matches this device */
+	if (!of_node)
+		return NULL;
+
+	/* if platform data is set, honor it */
+	if (pdata)
+		return pdata;
+
+	/* first make sure at least one property is set, otherwise
+	 * it won't change behavior from running without pdata.
+	 */
+	if (!of_get_property(of_node, "ti,i2c-retry-count", NULL) &&
+		!of_get_property(of_node, "ti,poll-retry-count", NULL) &&
+		!of_get_property(of_node, "ti,battery-detect-gpios", NULL))
+		goto of_out;
+
+	pdata = devm_kzalloc(&client->dev, sizeof(struct bq20z75_platform_data),
+				GFP_KERNEL);
+	if (!pdata)
+		goto of_out;
+
+	rc = of_property_read_u32(of_node, "ti,i2c-retry-count", &prop);
+	if (!rc)
+		pdata->i2c_retry_count = prop;
+
+	rc = of_property_read_u32(of_node, "ti,poll-retry-count", &prop);
+	if (!rc)
+		pdata->poll_retry_count = prop;
+
+	if (!of_get_property(of_node, "ti,battery-detect-gpios", NULL)) {
+		pdata->battery_detect = -1;
+		goto of_out;
+	}
+
+	pdata->battery_detect = of_get_named_gpio_flags(of_node,
+			"ti,battery-detect-gpios", 0, &gpio_flags);
+
+	if (gpio_flags & OF_GPIO_ACTIVE_LOW)
+		pdata->battery_detect_present = 0;
+	else
+		pdata->battery_detect_present = 1;
+
+of_out:
+	return pdata;
+}
+#else
+#define bq20z75_dt_ids NULL
+static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
+	struct i2c_client *client)
+{
+	return client->dev.platform_data;
+}
+#endif
+
+static int __devinit bq20z75_probe(struct i2c_client *client,
+	const struct i2c_device_id *id)
+{
+	struct bq20z75_info *bq20z75_device;
+	struct bq20z75_platform_data *pdata = client->dev.platform_data;
+	int rc;
+	int irq;
+
+	bq20z75_device = kzalloc(sizeof(struct bq20z75_info), GFP_KERNEL);
+	if (!bq20z75_device)
+		return -ENOMEM;
+
+	bq20z75_device->client = client;
+	bq20z75_device->enable_detection = false;
+	bq20z75_device->gpio_detect = false;
+	bq20z75_device->power_supply.name = "battery";
+	bq20z75_device->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
+	bq20z75_device->power_supply.properties = bq20z75_properties;
+	bq20z75_device->power_supply.num_properties =
+		ARRAY_SIZE(bq20z75_properties);
+	bq20z75_device->power_supply.get_property = bq20z75_get_property;
+	/* ignore first notification of external change, it is generated
+	 * from the power_supply_register call back
+	 */
+	bq20z75_device->ignore_changes = 1;
+	bq20z75_device->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
+	bq20z75_device->power_supply.external_power_changed =
+		bq20z75_external_power_changed;
+
+	pdata = bq20z75_of_populate_pdata(client);
+
+	if (pdata) {
+		bq20z75_device->gpio_detect =
+			gpio_is_valid(pdata->battery_detect);
+		bq20z75_device->pdata = pdata;
+	}
+
+	i2c_set_clientdata(client, bq20z75_device);
+
+	if (!bq20z75_device->gpio_detect)
+		goto skip_gpio;
+
+	rc = gpio_request(pdata->battery_detect, dev_name(&client->dev));
+	if (rc) {
+		dev_warn(&client->dev, "Failed to request gpio: %d\n", rc);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	rc = gpio_direction_input(pdata->battery_detect);
+	if (rc) {
+		dev_warn(&client->dev, "Failed to get gpio as input: %d\n", rc);
+		gpio_free(pdata->battery_detect);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	irq = gpio_to_irq(pdata->battery_detect);
+	if (irq <= 0) {
+		dev_warn(&client->dev, "Failed to get gpio as irq: %d\n", irq);
+		gpio_free(pdata->battery_detect);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	rc = request_irq(irq, bq20z75_irq,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		dev_name(&client->dev), &bq20z75_device->power_supply);
+	if (rc) {
+		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
+		gpio_free(pdata->battery_detect);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	bq20z75_device->irq = irq;
+
+skip_gpio:
+
+	rc = power_supply_register(&client->dev, &bq20z75_device->power_supply);
+	if (rc) {
+		dev_err(&client->dev,
+			"%s: Failed to register power supply\n", __func__);
+		goto exit_psupply;
+	}
+
+	dev_info(&client->dev,
+		"%s: battery gas gauge device registered\n", client->name);
+
+	INIT_DELAYED_WORK(&bq20z75_device->work, bq20z75_delayed_work);
+
+	bq20z75_device->enable_detection = true;
+
+	return 0;
+
+exit_psupply:
+	if (bq20z75_device->irq)
+		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
+	if (bq20z75_device->gpio_detect)
+		gpio_free(pdata->battery_detect);
+
+	kfree(bq20z75_device);
+
+	return rc;
+}
+
+static int __devexit bq20z75_remove(struct i2c_client *client)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+
+	if (bq20z75_device->irq)
+		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
+	if (bq20z75_device->gpio_detect)
+		gpio_free(bq20z75_device->pdata->battery_detect);
+
+	power_supply_unregister(&bq20z75_device->power_supply);
+
+	cancel_delayed_work_sync(&bq20z75_device->work);
+
+	kfree(bq20z75_device);
+	bq20z75_device = NULL;
+
+	return 0;
+}
+
+#if defined CONFIG_PM
+static int bq20z75_suspend(struct i2c_client *client,
+	pm_message_t state)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret;
+
+	if (bq20z75_device->poll_time > 0)
+		cancel_delayed_work_sync(&bq20z75_device->work);
+
+	/* write to manufacturer access with sleep command */
+	ret = bq20z75_write_word_data(client,
+		bq20z75_data[REG_MANUFACTURER_DATA].addr,
+		MANUFACTURER_ACCESS_SLEEP);
+	if (bq20z75_device->is_present && ret < 0)
+		return ret;
+
+	return 0;
+}
+#else
+#define bq20z75_suspend		NULL
+#endif
+/* any smbus transaction will wake up bq20z75 */
+#define bq20z75_resume		NULL
+
+static const struct i2c_device_id bq20z75_id[] = {
+	{ "bq20z75", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, bq20z75_id);
+
+static struct i2c_driver bq20z75_battery_driver = {
+	.probe		= bq20z75_probe,
+	.remove		= __devexit_p(bq20z75_remove),
+	.suspend	= bq20z75_suspend,
+	.resume		= bq20z75_resume,
+	.id_table	= bq20z75_id,
+	.driver = {
+		.name	= "bq20z75-battery",
+		.of_match_table = bq20z75_dt_ids,
+	},
+};
+
+static int __init bq20z75_battery_init(void)
+{
+	return i2c_add_driver(&bq20z75_battery_driver);
+}
+module_init(bq20z75_battery_init);
+
+static void __exit bq20z75_battery_exit(void)
+{
+	i2c_del_driver(&bq20z75_battery_driver);
+}
+module_exit(bq20z75_battery_exit);
+
+MODULE_DESCRIPTION("BQ20z75 battery monitor driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/power/bq20z75.h b/include/linux/power/bq20z75.h
deleted file mode 100644
index 1398eb004e83..000000000000
--- a/include/linux/power/bq20z75.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Gas Gauge driver for TI's BQ20Z75
- *
- * Copyright (c) 2010, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef __LINUX_POWER_BQ20Z75_H_
-#define __LINUX_POWER_BQ20Z75_H_
-
-#include <linux/power_supply.h>
-#include <linux/types.h>
-
-/**
- * struct bq20z75_platform_data - platform data for bq20z75 devices
- * @battery_detect:		GPIO which is used to detect battery presence
- * @battery_detect_present:	gpio state when battery is present (0 / 1)
- * @i2c_retry_count:		# of times to retry on i2c IO failure
- * @poll_retry_count:		# of times to retry looking for new status after
- *				external change notification
- */
-struct bq20z75_platform_data {
-	int battery_detect;
-	int battery_detect_present;
-	int i2c_retry_count;
-	int poll_retry_count;
-};
-
-#endif
diff --git a/include/linux/power/sbs-battery.h b/include/linux/power/sbs-battery.h
new file mode 100644
index 000000000000..1398eb004e83
--- /dev/null
+++ b/include/linux/power/sbs-battery.h
@@ -0,0 +1,42 @@
+/*
+ * Gas Gauge driver for TI's BQ20Z75
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __LINUX_POWER_BQ20Z75_H_
+#define __LINUX_POWER_BQ20Z75_H_
+
+#include <linux/power_supply.h>
+#include <linux/types.h>
+
+/**
+ * struct bq20z75_platform_data - platform data for bq20z75 devices
+ * @battery_detect:		GPIO which is used to detect battery presence
+ * @battery_detect_present:	gpio state when battery is present (0 / 1)
+ * @i2c_retry_count:		# of times to retry on i2c IO failure
+ * @poll_retry_count:		# of times to retry looking for new status after
+ *				external change notification
+ */
+struct bq20z75_platform_data {
+	int battery_detect;
+	int battery_detect_present;
+	int i2c_retry_count;
+	int poll_retry_count;
+};
+
+#endif
-- 
cgit v1.2.3


From 3ddca062f8d71724529b0d52609994c9886f1a18 Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Mon, 5 Dec 2011 17:50:46 -0800
Subject: sbs-battery: Rename internals to new name

Now that this driver is named more generally, this change updates
the internal variables, defines and functions to use this new name.

Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/sbs-battery.c       | 423 ++++++++++++++++++--------------------
 include/linux/power/sbs-battery.h |  10 +-
 2 files changed, 209 insertions(+), 224 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/sbs-battery.c b/drivers/power/sbs-battery.c
index ce95ff791016..00bd9e079e80 100644
--- a/drivers/power/sbs-battery.c
+++ b/drivers/power/sbs-battery.c
@@ -1,5 +1,5 @@
 /*
- * Gas Gauge driver for TI's BQ20Z75
+ * Gas Gauge driver for SBS Compliant Batteries
  *
  * Copyright (c) 2010, NVIDIA Corporation.
  *
@@ -28,7 +28,7 @@
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
 
-#include <linux/power/bq20z75.h>
+#include <linux/power/sbs-battery.h>
 
 enum {
 	REG_MANUFACTURER_DATA,
@@ -53,7 +53,7 @@ enum {
 /* Battery Mode defines */
 #define BATTERY_MODE_OFFSET		0x03
 #define BATTERY_MODE_MASK		0x8000
-enum bq20z75_battery_mode {
+enum sbs_battery_mode {
 	BATTERY_MODE_AMPS,
 	BATTERY_MODE_WATTS
 };
@@ -67,62 +67,56 @@ enum bq20z75_battery_mode {
 #define BATTERY_FULL_CHARGED		0x20
 #define BATTERY_FULL_DISCHARGED		0x10
 
-#define BQ20Z75_DATA(_psp, _addr, _min_value, _max_value) { \
+#define SBS_DATA(_psp, _addr, _min_value, _max_value) { \
 	.psp = _psp, \
 	.addr = _addr, \
 	.min_value = _min_value, \
 	.max_value = _max_value, \
 }
 
-static const struct bq20z75_device_data {
+static const struct chip_data {
 	enum power_supply_property psp;
 	u8 addr;
 	int min_value;
 	int max_value;
-} bq20z75_data[] = {
+} sbs_data[] = {
 	[REG_MANUFACTURER_DATA] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_PRESENT, 0x00, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_PRESENT, 0x00, 0, 65535),
 	[REG_TEMPERATURE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TEMP, 0x08, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_TEMP, 0x08, 0, 65535),
 	[REG_VOLTAGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 20000),
+		SBS_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 20000),
 	[REG_CURRENT] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CURRENT_NOW, 0x0A, -32768,
-			32767),
+		SBS_DATA(POWER_SUPPLY_PROP_CURRENT_NOW, 0x0A, -32768, 32767),
 	[REG_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CAPACITY, 0x0E, 0, 100),
+		SBS_DATA(POWER_SUPPLY_PROP_CAPACITY, 0x0E, 0, 100),
 	[REG_REMAINING_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_NOW, 0x0F, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_ENERGY_NOW, 0x0F, 0, 65535),
 	[REG_REMAINING_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_NOW, 0x0F, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_CHARGE_NOW, 0x0F, 0, 65535),
 	[REG_FULL_CHARGE_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
 	[REG_FULL_CHARGE_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
 	[REG_TIME_TO_EMPTY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0, 65535),
 	[REG_TIME_TO_FULL] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, 0x13, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, 0x13, 0, 65535),
 	[REG_STATUS] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_STATUS, 0x16, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_STATUS, 0x16, 0, 65535),
 	[REG_CYCLE_COUNT] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CYCLE_COUNT, 0x17, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_CYCLE_COUNT, 0x17, 0, 65535),
 	[REG_DESIGN_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, 0x18, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, 0x18, 0, 65535),
 	[REG_DESIGN_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, 0x18, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, 0x18, 0, 65535),
 	[REG_DESIGN_VOLTAGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, 0x19, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, 0x19, 0, 65535),
 	[REG_SERIAL_NUMBER] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_SERIAL_NUMBER, 0x1C, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_SERIAL_NUMBER, 0x1C, 0, 65535),
 };
 
-static enum power_supply_property bq20z75_properties[] = {
+static enum power_supply_property sbs_properties[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_PRESENT,
@@ -144,10 +138,10 @@ static enum power_supply_property bq20z75_properties[] = {
 	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
 };
 
-struct bq20z75_info {
+struct sbs_info {
 	struct i2c_client		*client;
 	struct power_supply		power_supply;
-	struct bq20z75_platform_data	*pdata;
+	struct sbs_platform_data	*pdata;
 	bool				is_present;
 	bool				gpio_detect;
 	bool				enable_detection;
@@ -158,14 +152,14 @@ struct bq20z75_info {
 	int				ignore_changes;
 };
 
-static int bq20z75_read_word_data(struct i2c_client *client, u8 address)
+static int sbs_read_word_data(struct i2c_client *client, u8 address)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 	s32 ret = 0;
 	int retries = 1;
 
-	if (bq20z75_device->pdata)
-		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+	if (chip->pdata)
+		retries = max(chip->pdata->i2c_retry_count + 1, 1);
 
 	while (retries > 0) {
 		ret = i2c_smbus_read_word_data(client, address);
@@ -184,15 +178,15 @@ static int bq20z75_read_word_data(struct i2c_client *client, u8 address)
 	return le16_to_cpu(ret);
 }
 
-static int bq20z75_write_word_data(struct i2c_client *client, u8 address,
+static int sbs_write_word_data(struct i2c_client *client, u8 address,
 	u16 value)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 	s32 ret = 0;
 	int retries = 1;
 
-	if (bq20z75_device->pdata)
-		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+	if (chip->pdata)
+		retries = max(chip->pdata->i2c_retry_count + 1, 1);
 
 	while (retries > 0) {
 		ret = i2c_smbus_write_word_data(client, address,
@@ -212,44 +206,41 @@ static int bq20z75_write_word_data(struct i2c_client *client, u8 address,
 	return 0;
 }
 
-static int bq20z75_get_battery_presence_and_health(
+static int sbs_get_battery_presence_and_health(
 	struct i2c_client *client, enum power_supply_property psp,
 	union power_supply_propval *val)
 {
 	s32 ret;
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 
 	if (psp == POWER_SUPPLY_PROP_PRESENT &&
-		bq20z75_device->gpio_detect) {
-		ret = gpio_get_value(
-			bq20z75_device->pdata->battery_detect);
-		if (ret == bq20z75_device->pdata->battery_detect_present)
+		chip->gpio_detect) {
+		ret = gpio_get_value(chip->pdata->battery_detect);
+		if (ret == chip->pdata->battery_detect_present)
 			val->intval = 1;
 		else
 			val->intval = 0;
-		bq20z75_device->is_present = val->intval;
+		chip->is_present = val->intval;
 		return ret;
 	}
 
 	/* Write to ManufacturerAccess with
 	 * ManufacturerAccess command and then
 	 * read the status */
-	ret = bq20z75_write_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr,
-		MANUFACTURER_ACCESS_STATUS);
+	ret = sbs_write_word_data(client, sbs_data[REG_MANUFACTURER_DATA].addr,
+					MANUFACTURER_ACCESS_STATUS);
 	if (ret < 0) {
 		if (psp == POWER_SUPPLY_PROP_PRESENT)
 			val->intval = 0; /* battery removed */
 		return ret;
 	}
 
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr);
+	ret = sbs_read_word_data(client, sbs_data[REG_MANUFACTURER_DATA].addr);
 	if (ret < 0)
 		return ret;
 
-	if (ret < bq20z75_data[REG_MANUFACTURER_DATA].min_value ||
-	    ret > bq20z75_data[REG_MANUFACTURER_DATA].max_value) {
+	if (ret < sbs_data[REG_MANUFACTURER_DATA].min_value ||
+	    ret > sbs_data[REG_MANUFACTURER_DATA].max_value) {
 		val->intval = 0;
 		return 0;
 	}
@@ -279,24 +270,23 @@ static int bq20z75_get_battery_presence_and_health(
 	return 0;
 }
 
-static int bq20z75_get_battery_property(struct i2c_client *client,
+static int sbs_get_battery_property(struct i2c_client *client,
 	int reg_offset, enum power_supply_property psp,
 	union power_supply_propval *val)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 	s32 ret;
 
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[reg_offset].addr);
+	ret = sbs_read_word_data(client, sbs_data[reg_offset].addr);
 	if (ret < 0)
 		return ret;
 
 	/* returned values are 16 bit */
-	if (bq20z75_data[reg_offset].min_value < 0)
+	if (sbs_data[reg_offset].min_value < 0)
 		ret = (s16)ret;
 
-	if (ret >= bq20z75_data[reg_offset].min_value &&
-	    ret <= bq20z75_data[reg_offset].max_value) {
+	if (ret >= sbs_data[reg_offset].min_value &&
+	    ret <= sbs_data[reg_offset].max_value) {
 		val->intval = ret;
 		if (psp != POWER_SUPPLY_PROP_STATUS)
 			return 0;
@@ -310,12 +300,12 @@ static int bq20z75_get_battery_property(struct i2c_client *client,
 		else
 			val->intval = POWER_SUPPLY_STATUS_CHARGING;
 
-		if (bq20z75_device->poll_time == 0)
-			bq20z75_device->last_state = val->intval;
-		else if (bq20z75_device->last_state != val->intval) {
-			cancel_delayed_work_sync(&bq20z75_device->work);
-			power_supply_changed(&bq20z75_device->power_supply);
-			bq20z75_device->poll_time = 0;
+		if (chip->poll_time == 0)
+			chip->last_state = val->intval;
+		else if (chip->last_state != val->intval) {
+			cancel_delayed_work_sync(&chip->work);
+			power_supply_changed(&chip->power_supply);
+			chip->poll_time = 0;
 		}
 	} else {
 		if (psp == POWER_SUPPLY_PROP_STATUS)
@@ -327,7 +317,7 @@ static int bq20z75_get_battery_property(struct i2c_client *client,
 	return 0;
 }
 
-static void  bq20z75_unit_adjustment(struct i2c_client *client,
+static void  sbs_unit_adjustment(struct i2c_client *client,
 	enum power_supply_property psp, union power_supply_propval *val)
 {
 #define BASE_UNIT_CONVERSION		1000
@@ -338,7 +328,7 @@ static void  bq20z75_unit_adjustment(struct i2c_client *client,
 	case POWER_SUPPLY_PROP_ENERGY_NOW:
 	case POWER_SUPPLY_PROP_ENERGY_FULL:
 	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
-		/* bq20z75 provides energy in units of 10mWh.
+		/* sbs provides energy in units of 10mWh.
 		 * Convert to µWh
 		 */
 		val->intval *= BATTERY_MODE_CAP_MULT_WATT;
@@ -354,7 +344,7 @@ static void  bq20z75_unit_adjustment(struct i2c_client *client,
 		break;
 
 	case POWER_SUPPLY_PROP_TEMP:
-		/* bq20z75 provides battery temperature in 0.1K
+		/* sbs provides battery temperature in 0.1K
 		 * so convert it to 0.1°C
 		 */
 		val->intval -= TEMP_KELVIN_TO_CELSIUS;
@@ -362,7 +352,7 @@ static void  bq20z75_unit_adjustment(struct i2c_client *client,
 
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
 	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
-		/* bq20z75 provides time to empty and time to full in minutes.
+		/* sbs provides time to empty and time to full in minutes.
 		 * Convert to seconds
 		 */
 		val->intval *= TIME_UNIT_CONVERSION;
@@ -374,13 +364,12 @@ static void  bq20z75_unit_adjustment(struct i2c_client *client,
 	}
 }
 
-static enum bq20z75_battery_mode
-bq20z75_set_battery_mode(struct i2c_client *client,
-	enum bq20z75_battery_mode mode)
+static enum sbs_battery_mode sbs_set_battery_mode(struct i2c_client *client,
+	enum sbs_battery_mode mode)
 {
 	int ret, original_val;
 
-	original_val = bq20z75_read_word_data(client, BATTERY_MODE_OFFSET);
+	original_val = sbs_read_word_data(client, BATTERY_MODE_OFFSET);
 	if (original_val < 0)
 		return original_val;
 
@@ -392,68 +381,67 @@ bq20z75_set_battery_mode(struct i2c_client *client,
 	else
 		ret = original_val | BATTERY_MODE_MASK;
 
-	ret = bq20z75_write_word_data(client, BATTERY_MODE_OFFSET, ret);
+	ret = sbs_write_word_data(client, BATTERY_MODE_OFFSET, ret);
 	if (ret < 0)
 		return ret;
 
 	return original_val & BATTERY_MODE_MASK;
 }
 
-static int bq20z75_get_battery_capacity(struct i2c_client *client,
+static int sbs_get_battery_capacity(struct i2c_client *client,
 	int reg_offset, enum power_supply_property psp,
 	union power_supply_propval *val)
 {
 	s32 ret;
-	enum bq20z75_battery_mode mode = BATTERY_MODE_WATTS;
+	enum sbs_battery_mode mode = BATTERY_MODE_WATTS;
 
 	if (power_supply_is_amp_property(psp))
 		mode = BATTERY_MODE_AMPS;
 
-	mode = bq20z75_set_battery_mode(client, mode);
+	mode = sbs_set_battery_mode(client, mode);
 	if (mode < 0)
 		return mode;
 
-	ret = bq20z75_read_word_data(client, bq20z75_data[reg_offset].addr);
+	ret = sbs_read_word_data(client, sbs_data[reg_offset].addr);
 	if (ret < 0)
 		return ret;
 
 	if (psp == POWER_SUPPLY_PROP_CAPACITY) {
-		/* bq20z75 spec says that this can be >100 %
+		/* sbs spec says that this can be >100 %
 		* even if max value is 100 % */
 		val->intval = min(ret, 100);
 	} else
 		val->intval = ret;
 
-	ret = bq20z75_set_battery_mode(client, mode);
+	ret = sbs_set_battery_mode(client, mode);
 	if (ret < 0)
 		return ret;
 
 	return 0;
 }
 
-static char bq20z75_serial[5];
-static int bq20z75_get_battery_serial_number(struct i2c_client *client,
+static char sbs_serial[5];
+static int sbs_get_battery_serial_number(struct i2c_client *client,
 	union power_supply_propval *val)
 {
 	int ret;
 
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[REG_SERIAL_NUMBER].addr);
+	ret = sbs_read_word_data(client, sbs_data[REG_SERIAL_NUMBER].addr);
 	if (ret < 0)
 		return ret;
 
-	ret = sprintf(bq20z75_serial, "%04x", ret);
-	val->strval = bq20z75_serial;
+	ret = sprintf(sbs_serial, "%04x", ret);
+	val->strval = sbs_serial;
 
 	return 0;
 }
 
-static int bq20z75_get_property_index(struct i2c_client *client,
+static int sbs_get_property_index(struct i2c_client *client,
 	enum power_supply_property psp)
 {
 	int count;
-	for (count = 0; count < ARRAY_SIZE(bq20z75_data); count++)
-		if (psp == bq20z75_data[count].psp)
+	for (count = 0; count < ARRAY_SIZE(sbs_data); count++)
+		if (psp == sbs_data[count].psp)
 			return count;
 
 	dev_warn(&client->dev,
@@ -462,19 +450,19 @@ static int bq20z75_get_property_index(struct i2c_client *client,
 	return -EINVAL;
 }
 
-static int bq20z75_get_property(struct power_supply *psy,
+static int sbs_get_property(struct power_supply *psy,
 	enum power_supply_property psp,
 	union power_supply_propval *val)
 {
 	int ret = 0;
-	struct bq20z75_info *bq20z75_device = container_of(psy,
-				struct bq20z75_info, power_supply);
-	struct i2c_client *client = bq20z75_device->client;
+	struct sbs_info *chip = container_of(psy,
+				struct sbs_info, power_supply);
+	struct i2c_client *client = chip->client;
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_PRESENT:
 	case POWER_SUPPLY_PROP_HEALTH:
-		ret = bq20z75_get_battery_presence_and_health(client, psp, val);
+		ret = sbs_get_battery_presence_and_health(client, psp, val);
 		if (psp == POWER_SUPPLY_PROP_PRESENT)
 			return 0;
 		break;
@@ -490,15 +478,15 @@ static int bq20z75_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_CHARGE_FULL:
 	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
 	case POWER_SUPPLY_PROP_CAPACITY:
-		ret = bq20z75_get_property_index(client, psp);
+		ret = sbs_get_property_index(client, psp);
 		if (ret < 0)
 			break;
 
-		ret = bq20z75_get_battery_capacity(client, ret, psp, val);
+		ret = sbs_get_battery_capacity(client, ret, psp, val);
 		break;
 
 	case POWER_SUPPLY_PROP_SERIAL_NUMBER:
-		ret = bq20z75_get_battery_serial_number(client, val);
+		ret = sbs_get_battery_serial_number(client, val);
 		break;
 
 	case POWER_SUPPLY_PROP_STATUS:
@@ -509,11 +497,11 @@ static int bq20z75_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
 	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
 	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-		ret = bq20z75_get_property_index(client, psp);
+		ret = sbs_get_property_index(client, psp);
 		if (ret < 0)
 			break;
 
-		ret = bq20z75_get_battery_property(client, ret, psp, val);
+		ret = sbs_get_battery_property(client, ret, psp, val);
 		break;
 
 	default:
@@ -522,25 +510,25 @@ static int bq20z75_get_property(struct power_supply *psy,
 		return -EINVAL;
 	}
 
-	if (!bq20z75_device->enable_detection)
+	if (!chip->enable_detection)
 		goto done;
 
-	if (!bq20z75_device->gpio_detect &&
-		bq20z75_device->is_present != (ret >= 0)) {
-		bq20z75_device->is_present = (ret >= 0);
-		power_supply_changed(&bq20z75_device->power_supply);
+	if (!chip->gpio_detect &&
+		chip->is_present != (ret >= 0)) {
+		chip->is_present = (ret >= 0);
+		power_supply_changed(&chip->power_supply);
 	}
 
 done:
 	if (!ret) {
 		/* Convert units to match requirements for power supply class */
-		bq20z75_unit_adjustment(client, psp, val);
+		sbs_unit_adjustment(client, psp, val);
 	}
 
 	dev_dbg(&client->dev,
 		"%s: property = %d, value = %x\n", __func__, psp, val->intval);
 
-	if (ret && bq20z75_device->is_present)
+	if (ret && chip->is_present)
 		return ret;
 
 	/* battery not present, so return NODATA for properties */
@@ -550,7 +538,7 @@ done:
 	return 0;
 }
 
-static irqreturn_t bq20z75_irq(int irq, void *devid)
+static irqreturn_t sbs_irq(int irq, void *devid)
 {
 	struct power_supply *battery = devid;
 
@@ -559,36 +547,35 @@ static irqreturn_t bq20z75_irq(int irq, void *devid)
 	return IRQ_HANDLED;
 }
 
-static void bq20z75_external_power_changed(struct power_supply *psy)
+static void sbs_external_power_changed(struct power_supply *psy)
 {
-	struct bq20z75_info *bq20z75_device;
+	struct sbs_info *chip;
 
-	bq20z75_device = container_of(psy, struct bq20z75_info, power_supply);
+	chip = container_of(psy, struct sbs_info, power_supply);
 
-	if (bq20z75_device->ignore_changes > 0) {
-		bq20z75_device->ignore_changes--;
+	if (chip->ignore_changes > 0) {
+		chip->ignore_changes--;
 		return;
 	}
 
 	/* cancel outstanding work */
-	cancel_delayed_work_sync(&bq20z75_device->work);
+	cancel_delayed_work_sync(&chip->work);
 
-	schedule_delayed_work(&bq20z75_device->work, HZ);
-	bq20z75_device->poll_time = bq20z75_device->pdata->poll_retry_count;
+	schedule_delayed_work(&chip->work, HZ);
+	chip->poll_time = chip->pdata->poll_retry_count;
 }
 
-static void bq20z75_delayed_work(struct work_struct *work)
+static void sbs_delayed_work(struct work_struct *work)
 {
-	struct bq20z75_info *bq20z75_device;
+	struct sbs_info *chip;
 	s32 ret;
 
-	bq20z75_device = container_of(work, struct bq20z75_info, work.work);
+	chip = container_of(work, struct sbs_info, work.work);
 
-	ret = bq20z75_read_word_data(bq20z75_device->client,
-				     bq20z75_data[REG_STATUS].addr);
+	ret = sbs_read_word_data(chip->client, sbs_data[REG_STATUS].addr);
 	/* if the read failed, give up on this work */
 	if (ret < 0) {
-		bq20z75_device->poll_time = 0;
+		chip->poll_time = 0;
 		return;
 	}
 
@@ -601,14 +588,14 @@ static void bq20z75_delayed_work(struct work_struct *work)
 	else
 		ret = POWER_SUPPLY_STATUS_CHARGING;
 
-	if (bq20z75_device->last_state != ret) {
-		bq20z75_device->poll_time = 0;
-		power_supply_changed(&bq20z75_device->power_supply);
+	if (chip->last_state != ret) {
+		chip->poll_time = 0;
+		power_supply_changed(&chip->power_supply);
 		return;
 	}
-	if (bq20z75_device->poll_time > 0) {
-		schedule_delayed_work(&bq20z75_device->work, HZ);
-		bq20z75_device->poll_time--;
+	if (chip->poll_time > 0) {
+		schedule_delayed_work(&chip->work, HZ);
+		chip->poll_time--;
 		return;
 	}
 }
@@ -618,17 +605,18 @@ static void bq20z75_delayed_work(struct work_struct *work)
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
 
-static const struct of_device_id bq20z75_dt_ids[] = {
+static const struct of_device_id sbs_dt_ids[] = {
+	{ .compatible = "sbs,sbs-battery" },
 	{ .compatible = "ti,bq20z75" },
 	{ }
 };
-MODULE_DEVICE_TABLE(i2c, bq20z75_dt_ids);
+MODULE_DEVICE_TABLE(i2c, sbs_dt_ids);
 
-static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
+static struct sbs_platform_data *sbs_of_populate_pdata(
 		struct i2c_client *client)
 {
 	struct device_node *of_node = client->dev.of_node;
-	struct bq20z75_platform_data *pdata = client->dev.platform_data;
+	struct sbs_platform_data *pdata = client->dev.platform_data;
 	enum of_gpio_flags gpio_flags;
 	int rc;
 	u32 prop;
@@ -644,31 +632,31 @@ static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
 	/* first make sure at least one property is set, otherwise
 	 * it won't change behavior from running without pdata.
 	 */
-	if (!of_get_property(of_node, "ti,i2c-retry-count", NULL) &&
-		!of_get_property(of_node, "ti,poll-retry-count", NULL) &&
-		!of_get_property(of_node, "ti,battery-detect-gpios", NULL))
+	if (!of_get_property(of_node, "sbs,i2c-retry-count", NULL) &&
+		!of_get_property(of_node, "sbs,poll-retry-count", NULL) &&
+		!of_get_property(of_node, "sbs,battery-detect-gpios", NULL))
 		goto of_out;
 
-	pdata = devm_kzalloc(&client->dev, sizeof(struct bq20z75_platform_data),
+	pdata = devm_kzalloc(&client->dev, sizeof(struct sbs_platform_data),
 				GFP_KERNEL);
 	if (!pdata)
 		goto of_out;
 
-	rc = of_property_read_u32(of_node, "ti,i2c-retry-count", &prop);
+	rc = of_property_read_u32(of_node, "sbs,i2c-retry-count", &prop);
 	if (!rc)
 		pdata->i2c_retry_count = prop;
 
-	rc = of_property_read_u32(of_node, "ti,poll-retry-count", &prop);
+	rc = of_property_read_u32(of_node, "sbs,poll-retry-count", &prop);
 	if (!rc)
 		pdata->poll_retry_count = prop;
 
-	if (!of_get_property(of_node, "ti,battery-detect-gpios", NULL)) {
+	if (!of_get_property(of_node, "sbs,battery-detect-gpios", NULL)) {
 		pdata->battery_detect = -1;
 		goto of_out;
 	}
 
 	pdata->battery_detect = of_get_named_gpio_flags(of_node,
-			"ti,battery-detect-gpios", 0, &gpio_flags);
+			"sbs,battery-detect-gpios", 0, &gpio_flags);
 
 	if (gpio_flags & OF_GPIO_ACTIVE_LOW)
 		pdata->battery_detect_present = 0;
@@ -679,60 +667,57 @@ of_out:
 	return pdata;
 }
 #else
-#define bq20z75_dt_ids NULL
-static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
+#define sbs_dt_ids NULL
+static struct sbs_platform_data *sbs_of_populate_pdata(
 	struct i2c_client *client)
 {
 	return client->dev.platform_data;
 }
 #endif
 
-static int __devinit bq20z75_probe(struct i2c_client *client,
+static int __devinit sbs_probe(struct i2c_client *client,
 	const struct i2c_device_id *id)
 {
-	struct bq20z75_info *bq20z75_device;
-	struct bq20z75_platform_data *pdata = client->dev.platform_data;
+	struct sbs_info *chip;
+	struct sbs_platform_data *pdata = client->dev.platform_data;
 	int rc;
 	int irq;
 
-	bq20z75_device = kzalloc(sizeof(struct bq20z75_info), GFP_KERNEL);
-	if (!bq20z75_device)
+	chip = kzalloc(sizeof(struct sbs_info), GFP_KERNEL);
+	if (!chip)
 		return -ENOMEM;
 
-	bq20z75_device->client = client;
-	bq20z75_device->enable_detection = false;
-	bq20z75_device->gpio_detect = false;
-	bq20z75_device->power_supply.name = "battery";
-	bq20z75_device->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
-	bq20z75_device->power_supply.properties = bq20z75_properties;
-	bq20z75_device->power_supply.num_properties =
-		ARRAY_SIZE(bq20z75_properties);
-	bq20z75_device->power_supply.get_property = bq20z75_get_property;
+	chip->client = client;
+	chip->enable_detection = false;
+	chip->gpio_detect = false;
+	chip->power_supply.name = "battery";
+	chip->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
+	chip->power_supply.properties = sbs_properties;
+	chip->power_supply.num_properties = ARRAY_SIZE(sbs_properties);
+	chip->power_supply.get_property = sbs_get_property;
 	/* ignore first notification of external change, it is generated
 	 * from the power_supply_register call back
 	 */
-	bq20z75_device->ignore_changes = 1;
-	bq20z75_device->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
-	bq20z75_device->power_supply.external_power_changed =
-		bq20z75_external_power_changed;
+	chip->ignore_changes = 1;
+	chip->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
+	chip->power_supply.external_power_changed = sbs_external_power_changed;
 
-	pdata = bq20z75_of_populate_pdata(client);
+	pdata = sbs_of_populate_pdata(client);
 
 	if (pdata) {
-		bq20z75_device->gpio_detect =
-			gpio_is_valid(pdata->battery_detect);
-		bq20z75_device->pdata = pdata;
+		chip->gpio_detect = gpio_is_valid(pdata->battery_detect);
+		chip->pdata = pdata;
 	}
 
-	i2c_set_clientdata(client, bq20z75_device);
+	i2c_set_clientdata(client, chip);
 
-	if (!bq20z75_device->gpio_detect)
+	if (!chip->gpio_detect)
 		goto skip_gpio;
 
 	rc = gpio_request(pdata->battery_detect, dev_name(&client->dev));
 	if (rc) {
 		dev_warn(&client->dev, "Failed to request gpio: %d\n", rc);
-		bq20z75_device->gpio_detect = false;
+		chip->gpio_detect = false;
 		goto skip_gpio;
 	}
 
@@ -740,7 +725,7 @@ static int __devinit bq20z75_probe(struct i2c_client *client,
 	if (rc) {
 		dev_warn(&client->dev, "Failed to get gpio as input: %d\n", rc);
 		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
+		chip->gpio_detect = false;
 		goto skip_gpio;
 	}
 
@@ -748,25 +733,25 @@ static int __devinit bq20z75_probe(struct i2c_client *client,
 	if (irq <= 0) {
 		dev_warn(&client->dev, "Failed to get gpio as irq: %d\n", irq);
 		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
+		chip->gpio_detect = false;
 		goto skip_gpio;
 	}
 
-	rc = request_irq(irq, bq20z75_irq,
+	rc = request_irq(irq, sbs_irq,
 		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-		dev_name(&client->dev), &bq20z75_device->power_supply);
+		dev_name(&client->dev), &chip->power_supply);
 	if (rc) {
 		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
 		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
+		chip->gpio_detect = false;
 		goto skip_gpio;
 	}
 
-	bq20z75_device->irq = irq;
+	chip->irq = irq;
 
 skip_gpio:
 
-	rc = power_supply_register(&client->dev, &bq20z75_device->power_supply);
+	rc = power_supply_register(&client->dev, &chip->power_supply);
 	if (rc) {
 		dev_err(&client->dev,
 			"%s: Failed to register power supply\n", __func__);
@@ -776,96 +761,96 @@ skip_gpio:
 	dev_info(&client->dev,
 		"%s: battery gas gauge device registered\n", client->name);
 
-	INIT_DELAYED_WORK(&bq20z75_device->work, bq20z75_delayed_work);
+	INIT_DELAYED_WORK(&chip->work, sbs_delayed_work);
 
-	bq20z75_device->enable_detection = true;
+	chip->enable_detection = true;
 
 	return 0;
 
 exit_psupply:
-	if (bq20z75_device->irq)
-		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
-	if (bq20z75_device->gpio_detect)
+	if (chip->irq)
+		free_irq(chip->irq, &chip->power_supply);
+	if (chip->gpio_detect)
 		gpio_free(pdata->battery_detect);
 
-	kfree(bq20z75_device);
+	kfree(chip);
 
 	return rc;
 }
 
-static int __devexit bq20z75_remove(struct i2c_client *client)
+static int __devexit sbs_remove(struct i2c_client *client)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 
-	if (bq20z75_device->irq)
-		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
-	if (bq20z75_device->gpio_detect)
-		gpio_free(bq20z75_device->pdata->battery_detect);
+	if (chip->irq)
+		free_irq(chip->irq, &chip->power_supply);
+	if (chip->gpio_detect)
+		gpio_free(chip->pdata->battery_detect);
 
-	power_supply_unregister(&bq20z75_device->power_supply);
+	power_supply_unregister(&chip->power_supply);
 
-	cancel_delayed_work_sync(&bq20z75_device->work);
+	cancel_delayed_work_sync(&chip->work);
 
-	kfree(bq20z75_device);
-	bq20z75_device = NULL;
+	kfree(chip);
+	chip = NULL;
 
 	return 0;
 }
 
 #if defined CONFIG_PM
-static int bq20z75_suspend(struct i2c_client *client,
+static int sbs_suspend(struct i2c_client *client,
 	pm_message_t state)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 	s32 ret;
 
-	if (bq20z75_device->poll_time > 0)
-		cancel_delayed_work_sync(&bq20z75_device->work);
+	if (chip->poll_time > 0)
+		cancel_delayed_work_sync(&chip->work);
 
 	/* write to manufacturer access with sleep command */
-	ret = bq20z75_write_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr,
+	ret = sbs_write_word_data(client, sbs_data[REG_MANUFACTURER_DATA].addr,
 		MANUFACTURER_ACCESS_SLEEP);
-	if (bq20z75_device->is_present && ret < 0)
+	if (chip->is_present && ret < 0)
 		return ret;
 
 	return 0;
 }
 #else
-#define bq20z75_suspend		NULL
+#define sbs_suspend		NULL
 #endif
-/* any smbus transaction will wake up bq20z75 */
-#define bq20z75_resume		NULL
+/* any smbus transaction will wake up sbs */
+#define sbs_resume		NULL
 
-static const struct i2c_device_id bq20z75_id[] = {
+static const struct i2c_device_id sbs_id[] = {
 	{ "bq20z75", 0 },
+	{ "sbs-battery", 1 },
 	{}
 };
-MODULE_DEVICE_TABLE(i2c, bq20z75_id);
-
-static struct i2c_driver bq20z75_battery_driver = {
-	.probe		= bq20z75_probe,
-	.remove		= __devexit_p(bq20z75_remove),
-	.suspend	= bq20z75_suspend,
-	.resume		= bq20z75_resume,
-	.id_table	= bq20z75_id,
+MODULE_DEVICE_TABLE(i2c, sbs_id);
+
+static struct i2c_driver sbs_battery_driver = {
+	.probe		= sbs_probe,
+	.remove		= __devexit_p(sbs_remove),
+	.suspend	= sbs_suspend,
+	.resume		= sbs_resume,
+	.id_table	= sbs_id,
 	.driver = {
-		.name	= "bq20z75-battery",
-		.of_match_table = bq20z75_dt_ids,
+		.name	= "sbs-battery",
+		.of_match_table = sbs_dt_ids,
 	},
 };
 
-static int __init bq20z75_battery_init(void)
+static int __init sbs_battery_init(void)
 {
-	return i2c_add_driver(&bq20z75_battery_driver);
+	return i2c_add_driver(&sbs_battery_driver);
 }
-module_init(bq20z75_battery_init);
+module_init(sbs_battery_init);
 
-static void __exit bq20z75_battery_exit(void)
+static void __exit sbs_battery_exit(void)
 {
-	i2c_del_driver(&bq20z75_battery_driver);
+	i2c_del_driver(&sbs_battery_driver);
 }
-module_exit(bq20z75_battery_exit);
+module_exit(sbs_battery_exit);
 
-MODULE_DESCRIPTION("BQ20z75 battery monitor driver");
+MODULE_DESCRIPTION("SBS battery monitor driver");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/power/sbs-battery.h b/include/linux/power/sbs-battery.h
index 1398eb004e83..2b0a9d9ff57e 100644
--- a/include/linux/power/sbs-battery.h
+++ b/include/linux/power/sbs-battery.h
@@ -1,5 +1,5 @@
 /*
- * Gas Gauge driver for TI's BQ20Z75
+ * Gas Gauge driver for SBS Compliant Gas Gauges
  *
  * Copyright (c) 2010, NVIDIA Corporation.
  *
@@ -18,21 +18,21 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
-#ifndef __LINUX_POWER_BQ20Z75_H_
-#define __LINUX_POWER_BQ20Z75_H_
+#ifndef __LINUX_POWER_SBS_BATTERY_H_
+#define __LINUX_POWER_SBS_BATTERY_H_
 
 #include <linux/power_supply.h>
 #include <linux/types.h>
 
 /**
- * struct bq20z75_platform_data - platform data for bq20z75 devices
+ * struct sbs_platform_data - platform data for sbs devices
  * @battery_detect:		GPIO which is used to detect battery presence
  * @battery_detect_present:	gpio state when battery is present (0 / 1)
  * @i2c_retry_count:		# of times to retry on i2c IO failure
  * @poll_retry_count:		# of times to retry looking for new status after
  *				external change notification
  */
-struct bq20z75_platform_data {
+struct sbs_platform_data {
 	int battery_detect;
 	int battery_detect_present;
 	int i2c_retry_count;
-- 
cgit v1.2.3


From 34aed73df3a9e75e313a7510b201f6755ae3e6bc Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Thu, 29 Dec 2011 12:52:07 +0100
Subject: s3c_adc_battery: Average over more than one adc sample

Some sources for adc battery information provide only inaccurate results
where the read value differs from the real value with positive and negative
offsets. For such sources it can be more accurate to collect two or more
value sample and use the average of all collected values.

This patch adds pdata options volt_samples, current_samples and
backup_volt_samples to specifiy the number of samples to collect,
reads the specified number of samples and calculates the average of those.
For unset sample-number-values a default of 1 is assumed.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/s3c_adc_battery.c | 25 ++++++++++++++++++++++---
 include/linux/s3c_adc_battery.h |  4 ++++
 2 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/s3c_adc_battery.c b/drivers/power/s3c_adc_battery.c
index e687ee7f18f2..8b804a566756 100644
--- a/drivers/power/s3c_adc_battery.c
+++ b/drivers/power/s3c_adc_battery.c
@@ -47,6 +47,22 @@ static void s3c_adc_bat_ext_power_changed(struct power_supply *psy)
 		msecs_to_jiffies(JITTER_DELAY));
 }
 
+static int gather_samples(struct s3c_adc_client *client, int num, int channel)
+{
+	int value, i;
+
+	/* default to 1 if nothing is set */
+	if (num < 1)
+		num = 1;
+
+	value = 0;
+	for (i = 0; i < num; i++)
+		value += s3c_adc_read(client, channel);
+	value /= num;
+
+	return value;
+}
+
 static enum power_supply_property s3c_adc_backup_bat_props[] = {
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_MIN,
@@ -67,7 +83,8 @@ static int s3c_adc_backup_bat_get_property(struct power_supply *psy,
 	if (bat->volt_value < 0 ||
 		jiffies_to_msecs(jiffies - bat->timestamp) >
 			BAT_POLL_INTERVAL) {
-		bat->volt_value = s3c_adc_read(bat->client,
+		bat->volt_value = gather_samples(bat->client,
+			bat->pdata->backup_volt_samples,
 			bat->pdata->backup_volt_channel);
 		bat->volt_value *= bat->pdata->backup_volt_mult;
 		bat->timestamp = jiffies;
@@ -139,9 +156,11 @@ static int s3c_adc_bat_get_property(struct power_supply *psy,
 	if (bat->volt_value < 0 || bat->cur_value < 0 ||
 		jiffies_to_msecs(jiffies - bat->timestamp) >
 			BAT_POLL_INTERVAL) {
-		bat->volt_value = s3c_adc_read(bat->client,
+		bat->volt_value = gather_samples(bat->client,
+			bat->pdata->volt_samples,
 			bat->pdata->volt_channel) * bat->pdata->volt_mult;
-		bat->cur_value = s3c_adc_read(bat->client,
+		bat->cur_value = gather_samples(bat->client,
+			bat->pdata->current_samples,
 			bat->pdata->current_channel) * bat->pdata->current_mult;
 		bat->timestamp = jiffies;
 	}
diff --git a/include/linux/s3c_adc_battery.h b/include/linux/s3c_adc_battery.h
index fbe58b7e63eb..99dadbffdd4f 100644
--- a/include/linux/s3c_adc_battery.h
+++ b/include/linux/s3c_adc_battery.h
@@ -25,6 +25,10 @@ struct s3c_adc_bat_pdata {
 	const unsigned int current_channel;
 	const unsigned int backup_volt_channel;
 
+	const unsigned int volt_samples;
+	const unsigned int current_samples;
+	const unsigned int backup_volt_samples;
+
 	const unsigned int volt_mult;
 	const unsigned int current_mult;
 	const unsigned int backup_volt_mult;
-- 
cgit v1.2.3


From c11b46c32c8a9bf05fdb76d70d8dc74fcbfd02d1 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 4 Jan 2012 15:34:17 +0100
Subject: dma: shdma: fix runtime PM: clear channel buffers on reset

On platforms, supporting power domains, if the domain, containing a DMAC
instance is powered down, the driver fails to resume correctly. On those
platforms DMAC channels have an additional CHCLR register for clearing
channel buffers. Using this register during runtime resume fixes the
problem.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/shdma.c    | 47 +++++++++++++++++++++++++++++++----------------
 include/linux/sh_dma.h |  2 ++
 2 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 592304fb41a6..54043cd831c8 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -56,6 +56,15 @@ static LIST_HEAD(sh_dmae_devices);
 static unsigned long sh_dmae_slave_used[BITS_TO_LONGS(SH_DMA_SLAVE_NUMBER)];
 
 static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all);
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
+
+static void chclr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	__raw_writel(data, shdev->chan_reg +
+		     shdev->pdata->channel[sh_dc->id].chclr_offset);
+}
 
 static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
 {
@@ -128,6 +137,15 @@ static int sh_dmae_rst(struct sh_dmae_device *shdev)
 
 	dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
 
+	if (shdev->pdata->chclr_present) {
+		int i;
+		for (i = 0; i < shdev->pdata->channel_num; i++) {
+			struct sh_dmae_chan *sh_chan = shdev->chan[i];
+			if (sh_chan)
+				chclr_write(sh_chan, 0);
+		}
+	}
+
 	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
 
 	dmaor = dmaor_read(shdev);
@@ -138,6 +156,10 @@ static int sh_dmae_rst(struct sh_dmae_device *shdev)
 		dev_warn(shdev->common.dev, "Can't initialize DMAOR.\n");
 		return -EIO;
 	}
+	if (shdev->pdata->dmaor_init & ~dmaor)
+		dev_warn(shdev->common.dev,
+			 "DMAOR=0x%x hasn't latched the initial value 0x%x.\n",
+			 dmaor, shdev->pdata->dmaor_init);
 	return 0;
 }
 
@@ -258,8 +280,6 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
 	return 0;
 }
 
-static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
-
 static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c;
@@ -339,6 +359,8 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
 				sh_chan_xfer_ld_queue(sh_chan);
 			sh_chan->pm_state = DMAE_PM_ESTABLISHED;
 		}
+	} else {
+		sh_chan->pm_state = DMAE_PM_PENDING;
 	}
 
 	spin_unlock_irq(&sh_chan->desc_lock);
@@ -1224,6 +1246,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, shdev);
 
+	shdev->common.dev = &pdev->dev;
+
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
@@ -1253,7 +1277,6 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 	shdev->common.device_prep_slave_sg = sh_dmae_prep_slave_sg;
 	shdev->common.device_control = sh_dmae_control;
 
-	shdev->common.dev = &pdev->dev;
 	/* Default transfer size of 32 bytes requires 32-byte alignment */
 	shdev->common.copy_align = LOG2_DEFAULT_XFER_SIZE;
 
@@ -1434,22 +1457,17 @@ static int sh_dmae_runtime_resume(struct device *dev)
 #ifdef CONFIG_PM
 static int sh_dmae_suspend(struct device *dev)
 {
-	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
-	int i;
-
-	for (i = 0; i < shdev->pdata->channel_num; i++) {
-		struct sh_dmae_chan *sh_chan = shdev->chan[i];
-		if (sh_chan->descs_allocated)
-			sh_chan->pm_error = pm_runtime_put_sync(dev);
-	}
-
 	return 0;
 }
 
 static int sh_dmae_resume(struct device *dev)
 {
 	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
-	int i;
+	int i, ret;
+
+	ret = sh_dmae_rst(shdev);
+	if (ret < 0)
+		dev_err(dev, "Failed to reset!\n");
 
 	for (i = 0; i < shdev->pdata->channel_num; i++) {
 		struct sh_dmae_chan *sh_chan = shdev->chan[i];
@@ -1458,9 +1476,6 @@ static int sh_dmae_resume(struct device *dev)
 		if (!sh_chan->descs_allocated)
 			continue;
 
-		if (!sh_chan->pm_error)
-			pm_runtime_get_sync(dev);
-
 		if (param) {
 			const struct sh_dmae_slave_config *cfg = param->config;
 			dmae_set_dmars(sh_chan, cfg->mid_rid);
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index 62ef6938da10..8cd7fe59cf1a 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -48,6 +48,7 @@ struct sh_dmae_channel {
 	unsigned int	offset;
 	unsigned int	dmars;
 	unsigned int	dmars_bit;
+	unsigned int	chclr_offset;
 };
 
 struct sh_dmae_pdata {
@@ -68,6 +69,7 @@ struct sh_dmae_pdata {
 	unsigned int dmaor_is_32bit:1;
 	unsigned int needs_tend_set:1;
 	unsigned int no_dmars:1;
+	unsigned int chclr_present:1;
 };
 
 /* DMA register */
-- 
cgit v1.2.3


From d15bd7ee445d0702ad801fdaece348fdb79e6581 Mon Sep 17 00:00:00 2001
From: Sumit Semwal <sumit.semwal@ti.com>
Date: Mon, 26 Dec 2011 14:53:15 +0530
Subject: dma-buf: Introduce dma buffer sharing mechanism

This is the first step in defining a dma buffer sharing mechanism.

A new buffer object dma_buf is added, with operations and API to allow easy
sharing of this buffer object across devices.

The framework allows:
- creation of a buffer object, its association with a file pointer, and
   associated allocator-defined operations on that buffer. This operation is
   called the 'export' operation.
- different devices to 'attach' themselves to this exported buffer object, to
  facilitate backing storage negotiation, using dma_buf_attach() API.
- the exported buffer object to be shared with the other entity by asking for
   its 'file-descriptor (fd)', and sharing the fd across.
- a received fd to get the buffer object back, where it can be accessed using
   the associated exporter-defined operations.
- the exporter and user to share the scatterlist associated with this buffer
   object using map_dma_buf and unmap_dma_buf operations.

Atleast one 'attach()' call is required to be made prior to calling the
map_dma_buf() operation.

Couple of building blocks in map_dma_buf() are added to ease introduction
of sync'ing across exporter and users, and late allocation by the exporter.

For this first version, this framework will work with certain conditions:
- *ONLY* exporter will be allowed to mmap to userspace (outside of this
   framework - mmap is not a buffer object operation),
- currently, *ONLY* users that do not need CPU access to the buffer are
   allowed.

More details are there in the documentation patch.

This is based on design suggestions from many people at the mini-summits[1],
most notably from Arnd Bergmann <arnd@arndb.de>, Rob Clark <rob@ti.com> and
Daniel Vetter <daniel@ffwll.ch>.

The implementation is inspired from proof-of-concept patch-set from
Tomasz Stanislawski <t.stanislaws@samsung.com>, who demonstrated buffer sharing
between two v4l2 devices. [2]

[1]: https://wiki.linaro.org/OfficeofCTO/MemoryManagement
[2]: http://lwn.net/Articles/454389

Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@ti.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Reviewed-and-Tested-by: Rob Clark <rob.clark@linaro.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/base/Kconfig    |  10 ++
 drivers/base/Makefile   |   1 +
 drivers/base/dma-buf.c  | 291 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dma-buf.h | 176 +++++++++++++++++++++++++++++
 4 files changed, 478 insertions(+)
 create mode 100644 drivers/base/dma-buf.c
 create mode 100644 include/linux/dma-buf.h

(limited to 'include/linux')

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 21cf46f45245..8a0e87fd9064 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -174,4 +174,14 @@ config SYS_HYPERVISOR
 
 source "drivers/base/regmap/Kconfig"
 
+config DMA_SHARED_BUFFER
+	bool "Buffer framework to be shared between drivers"
+	default n
+	select ANON_INODES
+	help
+	  This option enables the framework for buffer-sharing between
+	  multiple drivers. A buffer is associated with a file using driver
+	  APIs extension; the file's descriptor can then be passed on to other
+	  driver.
+
 endmenu
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 99a375ad2cc9..d0df046a70d8 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
+obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf.o
 obj-$(CONFIG_ISA)	+= isa.o
 obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_NUMA)	+= node.o
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
new file mode 100644
index 000000000000..e38ad243b4bb
--- /dev/null
+++ b/drivers/base/dma-buf.c
@@ -0,0 +1,291 @@
+/*
+ * Framework for buffer objects that can be shared across devices/subsystems.
+ *
+ * Copyright(C) 2011 Linaro Limited. All rights reserved.
+ * Author: Sumit Semwal <sumit.semwal@ti.com>
+ *
+ * Many thanks to linaro-mm-sig list, and specially
+ * Arnd Bergmann <arnd@arndb.de>, Rob Clark <rob@ti.com> and
+ * Daniel Vetter <daniel@ffwll.ch> for their support in creation and
+ * refining of this idea.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/dma-buf.h>
+#include <linux/anon_inodes.h>
+#include <linux/export.h>
+
+static inline int is_dma_buf_file(struct file *);
+
+static int dma_buf_release(struct inode *inode, struct file *file)
+{
+	struct dma_buf *dmabuf;
+
+	if (!is_dma_buf_file(file))
+		return -EINVAL;
+
+	dmabuf = file->private_data;
+
+	dmabuf->ops->release(dmabuf);
+	kfree(dmabuf);
+	return 0;
+}
+
+static const struct file_operations dma_buf_fops = {
+	.release	= dma_buf_release,
+};
+
+/*
+ * is_dma_buf_file - Check if struct file* is associated with dma_buf
+ */
+static inline int is_dma_buf_file(struct file *file)
+{
+	return file->f_op == &dma_buf_fops;
+}
+
+/**
+ * dma_buf_export - Creates a new dma_buf, and associates an anon file
+ * with this buffer, so it can be exported.
+ * Also connect the allocator specific data and ops to the buffer.
+ *
+ * @priv:	[in]	Attach private data of allocator to this buffer
+ * @ops:	[in]	Attach allocator-defined dma buf ops to the new buffer.
+ * @size:	[in]	Size of the buffer
+ * @flags:	[in]	mode flags for the file.
+ *
+ * Returns, on success, a newly created dma_buf object, which wraps the
+ * supplied private data and operations for dma_buf_ops. On either missing
+ * ops, or error in allocating struct dma_buf, will return negative error.
+ *
+ */
+struct dma_buf *dma_buf_export(void *priv, struct dma_buf_ops *ops,
+				size_t size, int flags)
+{
+	struct dma_buf *dmabuf;
+	struct file *file;
+
+	if (WARN_ON(!priv || !ops
+			  || !ops->map_dma_buf
+			  || !ops->unmap_dma_buf
+			  || !ops->release)) {
+		return ERR_PTR(-EINVAL);
+	}
+
+	dmabuf = kzalloc(sizeof(struct dma_buf), GFP_KERNEL);
+	if (dmabuf == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	dmabuf->priv = priv;
+	dmabuf->ops = ops;
+	dmabuf->size = size;
+
+	file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf, flags);
+
+	dmabuf->file = file;
+
+	mutex_init(&dmabuf->lock);
+	INIT_LIST_HEAD(&dmabuf->attachments);
+
+	return dmabuf;
+}
+EXPORT_SYMBOL_GPL(dma_buf_export);
+
+
+/**
+ * dma_buf_fd - returns a file descriptor for the given dma_buf
+ * @dmabuf:	[in]	pointer to dma_buf for which fd is required.
+ *
+ * On success, returns an associated 'fd'. Else, returns error.
+ */
+int dma_buf_fd(struct dma_buf *dmabuf)
+{
+	int error, fd;
+
+	if (!dmabuf || !dmabuf->file)
+		return -EINVAL;
+
+	error = get_unused_fd();
+	if (error < 0)
+		return error;
+	fd = error;
+
+	fd_install(fd, dmabuf->file);
+
+	return fd;
+}
+EXPORT_SYMBOL_GPL(dma_buf_fd);
+
+/**
+ * dma_buf_get - returns the dma_buf structure related to an fd
+ * @fd:	[in]	fd associated with the dma_buf to be returned
+ *
+ * On success, returns the dma_buf structure associated with an fd; uses
+ * file's refcounting done by fget to increase refcount. returns ERR_PTR
+ * otherwise.
+ */
+struct dma_buf *dma_buf_get(int fd)
+{
+	struct file *file;
+
+	file = fget(fd);
+
+	if (!file)
+		return ERR_PTR(-EBADF);
+
+	if (!is_dma_buf_file(file)) {
+		fput(file);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return file->private_data;
+}
+EXPORT_SYMBOL_GPL(dma_buf_get);
+
+/**
+ * dma_buf_put - decreases refcount of the buffer
+ * @dmabuf:	[in]	buffer to reduce refcount of
+ *
+ * Uses file's refcounting done implicitly by fput()
+ */
+void dma_buf_put(struct dma_buf *dmabuf)
+{
+	if (WARN_ON(!dmabuf || !dmabuf->file))
+		return;
+
+	fput(dmabuf->file);
+}
+EXPORT_SYMBOL_GPL(dma_buf_put);
+
+/**
+ * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
+ * calls attach() of dma_buf_ops to allow device-specific attach functionality
+ * @dmabuf:	[in]	buffer to attach device to.
+ * @dev:	[in]	device to be attached.
+ *
+ * Returns struct dma_buf_attachment * for this attachment; may return negative
+ * error codes.
+ *
+ */
+struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+					  struct device *dev)
+{
+	struct dma_buf_attachment *attach;
+	int ret;
+
+	if (WARN_ON(!dmabuf || !dev || !dmabuf->ops))
+		return ERR_PTR(-EINVAL);
+
+	attach = kzalloc(sizeof(struct dma_buf_attachment), GFP_KERNEL);
+	if (attach == NULL)
+		goto err_alloc;
+
+	mutex_lock(&dmabuf->lock);
+
+	attach->dev = dev;
+	attach->dmabuf = dmabuf;
+	if (dmabuf->ops->attach) {
+		ret = dmabuf->ops->attach(dmabuf, dev, attach);
+		if (ret)
+			goto err_attach;
+	}
+	list_add(&attach->node, &dmabuf->attachments);
+
+	mutex_unlock(&dmabuf->lock);
+	return attach;
+
+err_alloc:
+	return ERR_PTR(-ENOMEM);
+err_attach:
+	kfree(attach);
+	mutex_unlock(&dmabuf->lock);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(dma_buf_attach);
+
+/**
+ * dma_buf_detach - Remove the given attachment from dmabuf's attachments list;
+ * optionally calls detach() of dma_buf_ops for device-specific detach
+ * @dmabuf:	[in]	buffer to detach from.
+ * @attach:	[in]	attachment to be detached; is free'd after this call.
+ *
+ */
+void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
+{
+	if (WARN_ON(!dmabuf || !attach || !dmabuf->ops))
+		return;
+
+	mutex_lock(&dmabuf->lock);
+	list_del(&attach->node);
+	if (dmabuf->ops->detach)
+		dmabuf->ops->detach(dmabuf, attach);
+
+	mutex_unlock(&dmabuf->lock);
+	kfree(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_detach);
+
+/**
+ * dma_buf_map_attachment - Returns the scatterlist table of the attachment;
+ * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
+ * dma_buf_ops.
+ * @attach:	[in]	attachment whose scatterlist is to be returned
+ * @direction:	[in]	direction of DMA transfer
+ *
+ * Returns sg_table containing the scatterlist to be returned; may return NULL
+ * or ERR_PTR.
+ *
+ */
+struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
+					enum dma_data_direction direction)
+{
+	struct sg_table *sg_table = ERR_PTR(-EINVAL);
+
+	might_sleep();
+
+	if (WARN_ON(!attach || !attach->dmabuf || !attach->dmabuf->ops))
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&attach->dmabuf->lock);
+	if (attach->dmabuf->ops->map_dma_buf)
+		sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+	mutex_unlock(&attach->dmabuf->lock);
+
+	return sg_table;
+}
+EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
+
+/**
+ * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might
+ * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
+ * dma_buf_ops.
+ * @attach:	[in]	attachment to unmap buffer from
+ * @sg_table:	[in]	scatterlist info of the buffer to unmap
+ *
+ */
+void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
+				struct sg_table *sg_table)
+{
+	if (WARN_ON(!attach || !attach->dmabuf || !sg_table
+			    || !attach->dmabuf->ops))
+		return;
+
+	mutex_lock(&attach->dmabuf->lock);
+	if (attach->dmabuf->ops->unmap_dma_buf)
+		attach->dmabuf->ops->unmap_dma_buf(attach, sg_table);
+	mutex_unlock(&attach->dmabuf->lock);
+
+}
+EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
new file mode 100644
index 000000000000..f8ac076afa52
--- /dev/null
+++ b/include/linux/dma-buf.h
@@ -0,0 +1,176 @@
+/*
+ * Header file for dma buffer sharing framework.
+ *
+ * Copyright(C) 2011 Linaro Limited. All rights reserved.
+ * Author: Sumit Semwal <sumit.semwal@ti.com>
+ *
+ * Many thanks to linaro-mm-sig list, and specially
+ * Arnd Bergmann <arnd@arndb.de>, Rob Clark <rob@ti.com> and
+ * Daniel Vetter <daniel@ffwll.ch> for their support in creation and
+ * refining of this idea.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __DMA_BUF_H__
+#define __DMA_BUF_H__
+
+#include <linux/file.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/scatterlist.h>
+#include <linux/list.h>
+#include <linux/dma-mapping.h>
+
+struct dma_buf;
+struct dma_buf_attachment;
+
+/**
+ * struct dma_buf_ops - operations possible on struct dma_buf
+ * @attach: [optional] allows different devices to 'attach' themselves to the
+ *	    given buffer. It might return -EBUSY to signal that backing storage
+ *	    is already allocated and incompatible with the requirements
+ *	    of requesting device.
+ * @detach: [optional] detach a given device from this buffer.
+ * @map_dma_buf: returns list of scatter pages allocated, increases usecount
+ *		 of the buffer. Requires atleast one attach to be called
+ *		 before. Returned sg list should already be mapped into
+ *		 _device_ address space. This call may sleep. May also return
+ *		 -EINTR. Should return -EINVAL if attach hasn't been called yet.
+ * @unmap_dma_buf: decreases usecount of buffer, might deallocate scatter
+ *		   pages.
+ * @release: release this buffer; to be called after the last dma_buf_put.
+ */
+struct dma_buf_ops {
+	int (*attach)(struct dma_buf *, struct device *,
+			struct dma_buf_attachment *);
+
+	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
+
+	/* For {map,unmap}_dma_buf below, any specific buffer attributes
+	 * required should get added to device_dma_parameters accessible
+	 * via dev->dma_params.
+	 */
+	struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *,
+						enum dma_data_direction);
+	void (*unmap_dma_buf)(struct dma_buf_attachment *,
+						struct sg_table *);
+	/* TODO: Add try_map_dma_buf version, to return immed with -EBUSY
+	 * if the call would block.
+	 */
+
+	/* after final dma_buf_put() */
+	void (*release)(struct dma_buf *);
+
+};
+
+/**
+ * struct dma_buf - shared buffer object
+ * @size: size of the buffer
+ * @file: file pointer used for sharing buffers across, and for refcounting.
+ * @attachments: list of dma_buf_attachment that denotes all devices attached.
+ * @ops: dma_buf_ops associated with this buffer object.
+ * @priv: exporter specific private data for this buffer object.
+ */
+struct dma_buf {
+	size_t size;
+	struct file *file;
+	struct list_head attachments;
+	const struct dma_buf_ops *ops;
+	/* mutex to serialize list manipulation and other ops */
+	struct mutex lock;
+	void *priv;
+};
+
+/**
+ * struct dma_buf_attachment - holds device-buffer attachment data
+ * @dmabuf: buffer for this attachment.
+ * @dev: device attached to the buffer.
+ * @node: list of dma_buf_attachment.
+ * @priv: exporter specific attachment data.
+ *
+ * This structure holds the attachment information between the dma_buf buffer
+ * and its user device(s). The list contains one attachment struct per device
+ * attached to the buffer.
+ */
+struct dma_buf_attachment {
+	struct dma_buf *dmabuf;
+	struct device *dev;
+	struct list_head node;
+	void *priv;
+};
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+							struct device *dev);
+void dma_buf_detach(struct dma_buf *dmabuf,
+				struct dma_buf_attachment *dmabuf_attach);
+struct dma_buf *dma_buf_export(void *priv, struct dma_buf_ops *ops,
+			size_t size, int flags);
+int dma_buf_fd(struct dma_buf *dmabuf);
+struct dma_buf *dma_buf_get(int fd);
+void dma_buf_put(struct dma_buf *dmabuf);
+
+struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
+					enum dma_data_direction);
+void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *);
+#else
+
+static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+							struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void dma_buf_detach(struct dma_buf *dmabuf,
+				  struct dma_buf_attachment *dmabuf_attach)
+{
+	return;
+}
+
+static inline struct dma_buf *dma_buf_export(void *priv,
+						struct dma_buf_ops *ops,
+						size_t size, int flags)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline int dma_buf_fd(struct dma_buf *dmabuf)
+{
+	return -ENODEV;
+}
+
+static inline struct dma_buf *dma_buf_get(int fd)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void dma_buf_put(struct dma_buf *dmabuf)
+{
+	return;
+}
+
+static inline struct sg_table *dma_buf_map_attachment(
+	struct dma_buf_attachment *attach, enum dma_data_direction write)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
+						struct sg_table *sg)
+{
+	return;
+}
+
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+#endif /* __DMA_BUF_H__ */
-- 
cgit v1.2.3


From fe3449a4aa4e62404cc1c57c945fd56152b2f877 Mon Sep 17 00:00:00 2001
From: Theodore Kilgore <kilgota@banach.math.auburn.edu>
Date: Tue, 13 Dec 2011 18:09:15 -0300
Subject: [media] gspca: Add jl2005bcd sub driver

Written by Theodore Kilgore

With minor changes by Hans de Goede:
-Code style fixes
-Correct the verbose level on various PDEBUG messages
-Make error messages use pr_err instead of PDEBUG
-Document the jl20 pixel format

Signed-off-by: Theodore Kilgore <kilgota@auburn.edu>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/pixfmt.xml |   5 +
 Documentation/video4linux/gspca.txt        |   1 +
 drivers/media/video/gspca/Kconfig          |  10 +
 drivers/media/video/gspca/Makefile         |   2 +
 drivers/media/video/gspca/jl2005bcd.c      | 554 +++++++++++++++++++++++++++++
 include/linux/videodev2.h                  |   1 +
 6 files changed, 573 insertions(+)
 create mode 100644 drivers/media/video/gspca/jl2005bcd.c

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index a33a4b22173b..9ddc57cb2ef9 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -890,6 +890,11 @@ kernel sources in the file <filename>Documentation/video4linux/cx2341x/README.hm
 	    <entry>'M310'</entry>
 	    <entry>Compressed BGGR Bayer format used by the gspca driver.</entry>
 	  </row>
+	  <row id="V4L2-PIX-FMT-JL2005BCD">
+	    <entry><constant>V4L2_PIX_FMT_JL2005BCD</constant></entry>
+	    <entry>'JL20'</entry>
+	    <entry>JPEG compressed RGGB Bayer format used by the gspca driver.</entry>
+	  </row>
 	  <row id="V4L2-PIX-FMT-OV511">
 	    <entry><constant>V4L2_PIX_FMT_OV511</constant></entry>
 	    <entry>'O511'</entry>
diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
index 393815b62810..f2060f0dc02c 100644
--- a/Documentation/video4linux/gspca.txt
+++ b/Documentation/video4linux/gspca.txt
@@ -279,6 +279,7 @@ pac7302		093a:2628	Genius iLook 300
 pac7302		093a:2629	Genious iSlim 300
 pac7302		093a:262a	Webcam 300k
 pac7302		093a:262c	Philips SPC 230 NC
+jl2005bcd	0979:0227	Various brands, 19 known cameras supported
 jeilinj		0979:0280	Sakar 57379
 jeilinj		0979:0280	Sportscam DV15
 zc3xx		0ac8:0302	Z-star Vimicro zc0302
diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig
index 103af3fe5aa0..dfe268bfa4f8 100644
--- a/drivers/media/video/gspca/Kconfig
+++ b/drivers/media/video/gspca/Kconfig
@@ -77,6 +77,16 @@ config USB_GSPCA_JEILINJ
 	  To compile this driver as a module, choose M here: the
 	  module will be called gspca_jeilinj.
 
+config USB_GSPCA_JL2005BCD
+	tristate "JL2005B/C/D USB V4L2 driver"
+	depends on VIDEO_V4L2 && USB_GSPCA
+	help
+	  Say Y here if you want support for cameras based the
+	  JL2005B, JL2005C, or JL2005D chip.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gspca_jl2005bcd.
+
 config USB_GSPCA_KINECT
 	tristate "Kinect sensor device USB Camera Driver"
 	depends on VIDEO_V4L2 && USB_GSPCA
diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile
index f345f494d0f3..79ebe46e1ad7 100644
--- a/drivers/media/video/gspca/Makefile
+++ b/drivers/media/video/gspca/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_USB_GSPCA_CPIA1)    += gspca_cpia1.o
 obj-$(CONFIG_USB_GSPCA_ETOMS)    += gspca_etoms.o
 obj-$(CONFIG_USB_GSPCA_FINEPIX)  += gspca_finepix.o
 obj-$(CONFIG_USB_GSPCA_JEILINJ)  += gspca_jeilinj.o
+obj-$(CONFIG_USB_GSPCA_JL2005BCD) += gspca_jl2005bcd.o
 obj-$(CONFIG_USB_GSPCA_KINECT)   += gspca_kinect.o
 obj-$(CONFIG_USB_GSPCA_KONICA)   += gspca_konica.o
 obj-$(CONFIG_USB_GSPCA_MARS)     += gspca_mars.o
@@ -49,6 +50,7 @@ gspca_cpia1-objs    := cpia1.o
 gspca_etoms-objs    := etoms.o
 gspca_finepix-objs  := finepix.o
 gspca_jeilinj-objs  := jeilinj.o
+gspca_jl2005bcd-objs  := jl2005bcd.o
 gspca_kinect-objs   := kinect.o
 gspca_konica-objs   := konica.o
 gspca_mars-objs     := mars.o
diff --git a/drivers/media/video/gspca/jl2005bcd.c b/drivers/media/video/gspca/jl2005bcd.c
new file mode 100644
index 000000000000..53f58ef367cf
--- /dev/null
+++ b/drivers/media/video/gspca/jl2005bcd.c
@@ -0,0 +1,554 @@
+/*
+ * Jeilin JL2005B/C/D library
+ *
+ * Copyright (C) 2011 Theodore Kilgore <kilgota@auburn.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define MODULE_NAME "jl2005bcd"
+
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include "gspca.h"
+
+
+MODULE_AUTHOR("Theodore Kilgore <kilgota@auburn.edu>");
+MODULE_DESCRIPTION("JL2005B/C/D USB Camera Driver");
+MODULE_LICENSE("GPL");
+
+/* Default timeouts, in ms */
+#define JL2005C_CMD_TIMEOUT 500
+#define JL2005C_DATA_TIMEOUT 1000
+
+/* Maximum transfer size to use. */
+#define JL2005C_MAX_TRANSFER 0x200
+#define FRAME_HEADER_LEN 16
+
+
+/* specific webcam descriptor */
+struct sd {
+	struct gspca_dev gspca_dev;  /* !! must be the first item */
+	unsigned char firmware_id[6];
+	const struct v4l2_pix_format *cap_mode;
+	/* Driver stuff */
+	struct work_struct work_struct;
+	struct workqueue_struct *work_thread;
+	u8 frame_brightness;
+	int block_size;	/* block size of camera */
+	int vga;	/* 1 if vga cam, 0 if cif cam */
+};
+
+
+/* Camera has two resolution settings. What they are depends on model. */
+static const struct v4l2_pix_format cif_mode[] = {
+	{176, 144, V4L2_PIX_FMT_JL2005BCD, V4L2_FIELD_NONE,
+		.bytesperline = 176,
+		.sizeimage = 176 * 144,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+	{352, 288, V4L2_PIX_FMT_JL2005BCD, V4L2_FIELD_NONE,
+		.bytesperline = 352,
+		.sizeimage = 352 * 288,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+};
+
+static const struct v4l2_pix_format vga_mode[] = {
+	{320, 240, V4L2_PIX_FMT_JL2005BCD, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 320 * 240,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+	{640, 480, V4L2_PIX_FMT_JL2005BCD, V4L2_FIELD_NONE,
+		.bytesperline = 640,
+		.sizeimage = 640 * 480,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+};
+
+/*
+ * cam uses endpoint 0x03 to send commands, 0x84 for read commands,
+ * and 0x82 for bulk data transfer.
+ */
+
+/* All commands are two bytes only */
+static int jl2005c_write2(struct gspca_dev *gspca_dev, unsigned char *command)
+{
+	int retval;
+
+	memcpy(gspca_dev->usb_buf, command, 2);
+	retval = usb_bulk_msg(gspca_dev->dev,
+			usb_sndbulkpipe(gspca_dev->dev, 3),
+			gspca_dev->usb_buf, 2, NULL, 500);
+	if (retval < 0)
+		pr_err("command write [%02x] error %d\n",
+		       gspca_dev->usb_buf[0], retval);
+	return retval;
+}
+
+/* Response to a command is one byte in usb_buf[0], only if requested. */
+static int jl2005c_read1(struct gspca_dev *gspca_dev)
+{
+	int retval;
+
+	retval = usb_bulk_msg(gspca_dev->dev,
+				usb_rcvbulkpipe(gspca_dev->dev, 0x84),
+				gspca_dev->usb_buf, 1, NULL, 500);
+	if (retval < 0)
+		pr_err("read command [0x%02x] error %d\n",
+		       gspca_dev->usb_buf[0], retval);
+	return retval;
+}
+
+/* Response appears in gspca_dev->usb_buf[0] */
+static int jl2005c_read_reg(struct gspca_dev *gspca_dev, unsigned char reg)
+{
+	int retval;
+
+	static u8 instruction[2] = {0x95, 0x00};
+	/* put register to read in byte 1 */
+	instruction[1] = reg;
+	/* Send the read request */
+	retval = jl2005c_write2(gspca_dev, instruction);
+	if (retval < 0)
+		return retval;
+	retval = jl2005c_read1(gspca_dev);
+
+	return retval;
+}
+
+static int jl2005c_start_new_frame(struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval;
+	int frame_brightness = 0;
+
+	static u8 instruction[2] = {0x7f, 0x01};
+
+	retval = jl2005c_write2(gspca_dev, instruction);
+	if (retval < 0)
+		return retval;
+
+	i = 0;
+	while (i < 20 && !frame_brightness) {
+		/* If we tried 20 times, give up. */
+		retval = jl2005c_read_reg(gspca_dev, 0x7e);
+		if (retval < 0)
+			return retval;
+		frame_brightness = gspca_dev->usb_buf[0];
+		retval = jl2005c_read_reg(gspca_dev, 0x7d);
+		if (retval < 0)
+			return retval;
+		i++;
+	}
+	PDEBUG(D_FRAM, "frame_brightness is 0x%02x", gspca_dev->usb_buf[0]);
+	return retval;
+}
+
+static int jl2005c_write_reg(struct gspca_dev *gspca_dev, unsigned char reg,
+						    unsigned char value)
+{
+	int retval;
+	u8 instruction[2];
+
+	instruction[0] = reg;
+	instruction[1] = value;
+
+	retval = jl2005c_write2(gspca_dev, instruction);
+	if (retval < 0)
+			return retval;
+
+	return retval;
+}
+
+static int jl2005c_get_firmware_id(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *)gspca_dev;
+	int i = 0;
+	int retval = -1;
+	unsigned char regs_to_read[] = {0x57, 0x02, 0x03, 0x5d, 0x5e, 0x5f};
+
+	PDEBUG(D_PROBE, "Running jl2005c_get_firmware_id");
+	/* Read the first ID byte once for warmup */
+	retval = jl2005c_read_reg(gspca_dev, regs_to_read[0]);
+	PDEBUG(D_PROBE, "response is %02x", gspca_dev->usb_buf[0]);
+	if (retval < 0)
+		return retval;
+	/* Now actually get the ID string */
+	for (i = 0; i < 6; i++) {
+		retval = jl2005c_read_reg(gspca_dev, regs_to_read[i]);
+		if (retval < 0)
+			return retval;
+		sd->firmware_id[i] = gspca_dev->usb_buf[0];
+	}
+	PDEBUG(D_PROBE, "firmware ID is %02x%02x%02x%02x%02x%02x",
+						sd->firmware_id[0],
+						sd->firmware_id[1],
+						sd->firmware_id[2],
+						sd->firmware_id[3],
+						sd->firmware_id[4],
+						sd->firmware_id[5]);
+	return 0;
+}
+
+static int jl2005c_stream_start_vga_lg
+		    (struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval = -1;
+	static u8 instruction[][2] = {
+		{0x05, 0x00},
+		{0x7c, 0x00},
+		{0x7d, 0x18},
+		{0x02, 0x00},
+		{0x01, 0x00},
+		{0x04, 0x52},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(instruction); i++) {
+		msleep(60);
+		retval = jl2005c_write2(gspca_dev, instruction[i]);
+		if (retval < 0)
+			return retval;
+	}
+	msleep(60);
+	return retval;
+}
+
+static int jl2005c_stream_start_vga_small(struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval = -1;
+	static u8 instruction[][2] = {
+		{0x06, 0x00},
+		{0x7c, 0x00},
+		{0x7d, 0x1a},
+		{0x02, 0x00},
+		{0x01, 0x00},
+		{0x04, 0x52},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(instruction); i++) {
+		msleep(60);
+		retval = jl2005c_write2(gspca_dev, instruction[i]);
+		if (retval < 0)
+			return retval;
+	}
+	msleep(60);
+	return retval;
+}
+
+static int jl2005c_stream_start_cif_lg(struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval = -1;
+	static u8 instruction[][2] = {
+		{0x05, 0x00},
+		{0x7c, 0x00},
+		{0x7d, 0x30},
+		{0x02, 0x00},
+		{0x01, 0x00},
+		{0x04, 0x42},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(instruction); i++) {
+		msleep(60);
+		retval = jl2005c_write2(gspca_dev, instruction[i]);
+		if (retval < 0)
+			return retval;
+	}
+	msleep(60);
+	return retval;
+}
+
+static int jl2005c_stream_start_cif_small(struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval = -1;
+	static u8 instruction[][2] = {
+		{0x06, 0x00},
+		{0x7c, 0x00},
+		{0x7d, 0x32},
+		{0x02, 0x00},
+		{0x01, 0x00},
+		{0x04, 0x42},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(instruction); i++) {
+		msleep(60);
+		retval = jl2005c_write2(gspca_dev, instruction[i]);
+		if (retval < 0)
+			return retval;
+	}
+	msleep(60);
+	return retval;
+}
+
+
+static int jl2005c_stop(struct gspca_dev *gspca_dev)
+{
+	int retval;
+
+	retval = jl2005c_write_reg(gspca_dev, 0x07, 0x00);
+	return retval;
+}
+
+/* This function is called as a workqueue function and runs whenever the camera
+ * is streaming data. Because it is a workqueue function it is allowed to sleep
+ * so we can use synchronous USB calls. To avoid possible collisions with other
+ * threads attempting to use the camera's USB interface the gspca usb_lock is
+ * used when performing the one USB control operation inside the workqueue,
+ * which tells the camera to close the stream. In practice the only thing
+ * which needs to be protected against is the usb_set_interface call that
+ * gspca makes during stream_off. Otherwise the camera doesn't provide any
+ * controls that the user could try to change.
+ */
+static void jl2005c_dostream(struct work_struct *work)
+{
+	struct sd *dev = container_of(work, struct sd, work_struct);
+	struct gspca_dev *gspca_dev = &dev->gspca_dev;
+	int bytes_left = 0; /* bytes remaining in current frame. */
+	int data_len;   /* size to use for the next read. */
+	int header_read = 0;
+	unsigned char header_sig[2] = {0x4a, 0x4c};
+	int act_len;
+	int packet_type;
+	int ret;
+	u8 *buffer;
+
+	buffer = kmalloc(JL2005C_MAX_TRANSFER, GFP_KERNEL | GFP_DMA);
+	if (!buffer) {
+		pr_err("Couldn't allocate USB buffer\n");
+		goto quit_stream;
+	}
+
+	while (gspca_dev->present && gspca_dev->streaming) {
+		/* Check if this is a new frame. If so, start the frame first */
+		if (!header_read) {
+			mutex_lock(&gspca_dev->usb_lock);
+			ret = jl2005c_start_new_frame(gspca_dev);
+			mutex_unlock(&gspca_dev->usb_lock);
+			if (ret < 0)
+				goto quit_stream;
+			ret = usb_bulk_msg(gspca_dev->dev,
+				usb_rcvbulkpipe(gspca_dev->dev, 0x82),
+				buffer, JL2005C_MAX_TRANSFER, &act_len,
+				JL2005C_DATA_TIMEOUT);
+			PDEBUG(D_PACK,
+				"Got %d bytes out of %d for header",
+					act_len, JL2005C_MAX_TRANSFER);
+			if (ret < 0 || act_len < JL2005C_MAX_TRANSFER)
+				goto quit_stream;
+			/* Check whether we actually got the first blodk */
+			if (memcmp(header_sig, buffer, 2) != 0) {
+				pr_err("First block is not the first block\n");
+				goto quit_stream;
+			}
+			/* total size to fetch is byte 7, times blocksize
+			 * of which we already got act_len */
+			bytes_left = buffer[0x07] * dev->block_size - act_len;
+			PDEBUG(D_PACK, "bytes_left = 0x%x", bytes_left);
+			/* We keep the header. It has other information, too.*/
+			packet_type = FIRST_PACKET;
+			gspca_frame_add(gspca_dev, packet_type,
+					buffer, act_len);
+			header_read = 1;
+		}
+		while (bytes_left > 0 && gspca_dev->present) {
+			data_len = bytes_left > JL2005C_MAX_TRANSFER ?
+				JL2005C_MAX_TRANSFER : bytes_left;
+			ret = usb_bulk_msg(gspca_dev->dev,
+				usb_rcvbulkpipe(gspca_dev->dev, 0x82),
+				buffer, data_len, &act_len,
+				JL2005C_DATA_TIMEOUT);
+			if (ret < 0 || act_len < data_len)
+				goto quit_stream;
+			PDEBUG(D_PACK,
+				"Got %d bytes out of %d for frame",
+						data_len, bytes_left);
+			bytes_left -= data_len;
+			if (bytes_left == 0) {
+				packet_type = LAST_PACKET;
+				header_read = 0;
+			} else
+				packet_type = INTER_PACKET;
+			gspca_frame_add(gspca_dev, packet_type,
+					buffer, data_len);
+		}
+	}
+quit_stream:
+	if (gspca_dev->present) {
+		mutex_lock(&gspca_dev->usb_lock);
+		jl2005c_stop(gspca_dev);
+		mutex_unlock(&gspca_dev->usb_lock);
+	}
+	kfree(buffer);
+}
+
+
+
+
+/* This function is called at probe time */
+static int sd_config(struct gspca_dev *gspca_dev,
+			const struct usb_device_id *id)
+{
+	struct cam *cam;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	cam = &gspca_dev->cam;
+	/* We don't use the buffer gspca allocates so make it small. */
+	cam->bulk_size = 64;
+	cam->bulk = 1;
+	/* For the rest, the camera needs to be detected */
+	jl2005c_get_firmware_id(gspca_dev);
+	/* Here are some known firmware IDs
+	 * First some JL2005B cameras
+	 * {0x41, 0x07, 0x04, 0x2c, 0xe8, 0xf2}	Sakar KidzCam
+	 * {0x45, 0x02, 0x08, 0xb9, 0x00, 0xd2}	No-name JL2005B
+	 * JL2005C cameras
+	 * {0x01, 0x0c, 0x16, 0x10, 0xf8, 0xc8}	Argus DC-1512
+	 * {0x12, 0x04, 0x03, 0xc0, 0x00, 0xd8}	ICarly
+	 * {0x86, 0x08, 0x05, 0x02, 0x00, 0xd4}	Jazz
+	 *
+	 * Based upon this scanty evidence, we can detect a CIF camera by
+	 * testing byte 0 for 0x4x.
+	 */
+	if ((sd->firmware_id[0] & 0xf0) == 0x40) {
+		cam->cam_mode	= cif_mode;
+		cam->nmodes	= ARRAY_SIZE(cif_mode);
+		sd->block_size	= 0x80;
+	} else {
+		cam->cam_mode	= vga_mode;
+		cam->nmodes	= ARRAY_SIZE(vga_mode);
+		sd->block_size	= 0x200;
+	}
+
+	INIT_WORK(&sd->work_struct, jl2005c_dostream);
+
+	return 0;
+}
+
+/* this function is called at probe and resume time */
+static int sd_init(struct gspca_dev *gspca_dev)
+{
+	return 0;
+}
+
+static int sd_start(struct gspca_dev *gspca_dev)
+{
+
+	struct sd *sd = (struct sd *) gspca_dev;
+	sd->cap_mode = gspca_dev->cam.cam_mode;
+
+	switch (gspca_dev->width) {
+	case 640:
+		PDEBUG(D_STREAM, "Start streaming at vga resolution");
+		jl2005c_stream_start_vga_lg(gspca_dev);
+		break;
+	case 320:
+		PDEBUG(D_STREAM, "Start streaming at qvga resolution");
+		jl2005c_stream_start_vga_small(gspca_dev);
+		break;
+	case 352:
+		PDEBUG(D_STREAM, "Start streaming at cif resolution");
+		jl2005c_stream_start_cif_lg(gspca_dev);
+		break;
+	case 176:
+		PDEBUG(D_STREAM, "Start streaming at qcif resolution");
+		jl2005c_stream_start_cif_small(gspca_dev);
+		break;
+	default:
+		pr_err("Unknown resolution specified\n");
+		return -1;
+	}
+
+	/* Start the workqueue function to do the streaming */
+	sd->work_thread = create_singlethread_workqueue(MODULE_NAME);
+	queue_work(sd->work_thread, &sd->work_struct);
+
+	return 0;
+}
+
+/* called on streamoff with alt==0 and on disconnect */
+/* the usb_lock is held at entry - restore on exit */
+static void sd_stop0(struct gspca_dev *gspca_dev)
+{
+	struct sd *dev = (struct sd *) gspca_dev;
+
+	/* wait for the work queue to terminate */
+	mutex_unlock(&gspca_dev->usb_lock);
+	/* This waits for sq905c_dostream to finish */
+	destroy_workqueue(dev->work_thread);
+	dev->work_thread = NULL;
+	mutex_lock(&gspca_dev->usb_lock);
+}
+
+
+
+/* sub-driver description */
+static const struct sd_desc sd_desc = {
+	.name = MODULE_NAME,
+	/* .ctrls = none have been detected */
+	/* .nctrls = ARRAY_SIZE(sd_ctrls),  */
+	.config = sd_config,
+	.init = sd_init,
+	.start = sd_start,
+	.stop0 = sd_stop0,
+};
+
+/* -- module initialisation -- */
+static const __devinitdata struct usb_device_id device_table[] = {
+	{USB_DEVICE(0x0979, 0x0227)},
+	{}
+};
+MODULE_DEVICE_TABLE(usb, device_table);
+
+/* -- device connect -- */
+static int sd_probe(struct usb_interface *intf,
+				const struct usb_device_id *id)
+{
+	return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd),
+				THIS_MODULE);
+}
+
+static struct usb_driver sd_driver = {
+	.name = MODULE_NAME,
+	.id_table = device_table,
+	.probe = sd_probe,
+	.disconnect = gspca_disconnect,
+#ifdef CONFIG_PM
+	.suspend = gspca_suspend,
+	.resume = gspca_resume,
+#endif
+};
+
+/* -- module insert / remove -- */
+static int __init sd_mod_init(void)
+{
+	int ret;
+
+	ret = usb_register(&sd_driver);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+static void __exit sd_mod_exit(void)
+{
+	usb_deregister(&sd_driver);
+}
+
+module_init(sd_mod_init);
+module_exit(sd_mod_exit);
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 2965906a02c9..6bfaa767a817 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -401,6 +401,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S', '5', '6', '1') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P', '2', '0', '7') /* compressed BGGR bayer */
 #define V4L2_PIX_FMT_MR97310A v4l2_fourcc('M', '3', '1', '0') /* compressed BGGR bayer */
+#define V4L2_PIX_FMT_JL2005BCD v4l2_fourcc('J', 'L', '2', '0') /* compressed RGGB bayer */
 #define V4L2_PIX_FMT_SN9C2028 v4l2_fourcc('S', 'O', 'N', 'X') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_SQ905C   v4l2_fourcc('9', '0', '5', 'C') /* compressed RGGB bayer */
 #define V4L2_PIX_FMT_PJPG     v4l2_fourcc('P', 'J', 'P', 'G') /* Pixart 73xx JPEG */
-- 
cgit v1.2.3


From ff0b3cd4a416bc727b0797b95b229b278d2a28f2 Mon Sep 17 00:00:00 2001
From: Wim Van Sebroeck <wim@iguana.be>
Date: Tue, 29 Nov 2011 16:24:16 +0100
Subject: watchdog: add nowayout helpers to Watchdog Timer Driver Kernel API

Add two nowayout helpers for the Watchdog Timer Driver Kernel API.
And apply this to the already converted drivers.
Note: s3c2410_wdt lost the nowayout feature during the conversion.

Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 Documentation/watchdog/watchdog-kernel-api.txt | 10 +++++++++-
 drivers/watchdog/s3c2410_wdt.c                 |  2 ++
 drivers/watchdog/wm831x_wdt.c                  |  4 +---
 include/linux/watchdog.h                       | 21 ++++++++++++++++-----
 4 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
index 4f7c894244d2..4b93c28e35c6 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -1,6 +1,6 @@
 The Linux WatchDog Timer Driver Core kernel API.
 ===============================================
-Last reviewed: 22-Jul-2011
+Last reviewed: 29-Nov-2011
 
 Wim Van Sebroeck <wim@iguana.be>
 
@@ -142,6 +142,14 @@ bit-operations. The status bits that are defined are:
 * WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
   If this bit is set then the watchdog timer will not be able to stop.
 
+  To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog
+  timer device) you can either:
+  * set it statically in your watchdog_device struct with
+	.status = WATCHDOG_NOWAYOUT_INIT_STATUS,
+    (this will set the value the same as CONFIG_WATCHDOG_NOWAYOUT) or
+  * use the following helper function:
+  static inline void watchdog_set_nowayout(struct watchdog_device *wdd, int nowayout)
+
 Note: The WatchDog Timer Driver Core supports the magic close feature and
 the nowayout feature. To use the magic close feature you must set the
 WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index a79e3840782a..4bc3744e14e4 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -378,6 +378,8 @@ static int __devinit s3c2410wdt_probe(struct platform_device *pdev)
 							"cannot start\n");
 	}
 
+	watchdog_set_nowayout(&s3c2410_wdd, nowayout);
+
 	ret = watchdog_register_device(&s3c2410_wdd);
 	if (ret) {
 		dev_err(dev, "cannot register watchdog (%d)\n", ret);
diff --git a/drivers/watchdog/wm831x_wdt.c b/drivers/watchdog/wm831x_wdt.c
index beb3ad2294d3..6cd1ba41b47c 100644
--- a/drivers/watchdog/wm831x_wdt.c
+++ b/drivers/watchdog/wm831x_wdt.c
@@ -213,11 +213,9 @@ static int __devinit wm831x_wdt_probe(struct platform_device *pdev)
 
 	wm831x_wdt->info = &wm831x_wdt_info;
 	wm831x_wdt->ops = &wm831x_wdt_ops;
+	watchdog_set_nowayout(wm831x_wdt, nowayout);
 	watchdog_set_drvdata(wm831x_wdt, driver_data);
 
-	if (nowayout)
-		wm831x_wdt->status |= WDOG_NO_WAY_OUT;
-
 	reg = wm831x_reg_read(wm831x, WM831X_WATCHDOG);
 	reg &= WM831X_WDOG_TO_MASK;
 	for (i = 0; i < ARRAY_SIZE(wm831x_wdt_cfgs); i++)
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 111843f88b2a..43ba5b3ce2a3 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -53,11 +53,7 @@ struct watchdog_info {
 
 #ifdef __KERNEL__
 
-#ifdef CONFIG_WATCHDOG_NOWAYOUT
-#define WATCHDOG_NOWAYOUT	1
-#else
-#define WATCHDOG_NOWAYOUT	0
-#endif
+#include <linux/bitops.h>
 
 struct watchdog_ops;
 struct watchdog_device;
@@ -122,6 +118,21 @@ struct watchdog_device {
 #define WDOG_NO_WAY_OUT		3	/* Is 'nowayout' feature set ? */
 };
 
+#ifdef CONFIG_WATCHDOG_NOWAYOUT
+#define WATCHDOG_NOWAYOUT		1
+#define WATCHDOG_NOWAYOUT_INIT_STATUS	(1 << WDOG_NO_WAY_OUT)
+#else
+#define WATCHDOG_NOWAYOUT		0
+#define WATCHDOG_NOWAYOUT_INIT_STATUS	0
+#endif
+
+/* Use the following function to set the nowayout feature */
+static inline void watchdog_set_nowayout(struct watchdog_device *wdd, int nowayout)
+{
+	if (nowayout)
+		set_bit(WDOG_NO_WAY_OUT, &wdd->status);
+}
+
 /* Use the following functions to manipulate watchdog driver specific data */
 static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
 {
-- 
cgit v1.2.3


From da8d1c8ba4dcb16d60be54b233deca9a7cac98dc Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 6 Oct 2011 14:08:18 -0400
Subject: PCI/sysfs: add per pci device msi[x] irq listing (v5)

This patch adds a per-pci-device subdirectory in sysfs called:
/sys/bus/pci/devices/<device>/msi_irqs

This sub-directory exports the set of msi vectors allocated by a given
pci device, by creating a numbered sub-directory for each vector beneath
msi_irqs.  For each vector various attributes can be exported.
Currently the only attribute is called mode, which tracks the
operational mode of that vector (msi vs. msix)

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-bus-pci |  18 ++++++
 drivers/pci/msi.c                       | 111 ++++++++++++++++++++++++++++++++
 include/linux/msi.h                     |   3 +
 include/linux/pci.h                     |   1 +
 4 files changed, 133 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf26ce10..34f51100f029 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
 		re-discover previously removed devices.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../msi_irqs/
+Date:		September, 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		The /sys/devices/.../msi_irqs directory contains a variable set
+		of sub-directories, with each sub-directory being named after a
+		corresponding msi irq vector allocated to that device.  Each
+		numbered sub-directory N contains attributes of that irq.
+		Note that this directory is not created for device drivers which
+		do not support msi irqs
+
+What:		/sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date:		September 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		This attribute indicates the mode that the irq vector named by
+		the parent directory is in (msi vs. msix)
+
 What:		/sys/bus/pci/devices/.../remove
 Date:		January 2009
 Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 0e6d04d7ba4f..e6b6b9c67023 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -323,6 +323,8 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
 		list_del(&entry->list);
 		kfree(entry);
 	}
@@ -403,6 +405,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+			     char *buf)
+{
+	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+void msi_kobj_release(struct kobject *kobj)
+{
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+	.release = msi_kobj_release,
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		kobj = &entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+				     "%u", entry->irq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -454,6 +548,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, pos, 1);
@@ -574,6 +675,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	msix_program_entries(dev, entries);
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		ret = 0;
+		goto error;
+	}
+
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
@@ -732,6 +839,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
 	pci_msi_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -830,6 +939,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
 	pci_msix_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 05acced439a3..ce93a341337d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
+
+	struct kobject kobj;
 };
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7cda65b5f798..84225c756bd1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -336,6 +336,7 @@ struct pci_dev {
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
+	struct kset *msi_kset;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_ATS
-- 
cgit v1.2.3


From cfa4d8cc56853ec945956d182ecb4c99102b110a Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 2 Nov 2011 14:07:15 -0600
Subject: PCI: Fix PRI and PASID consistency

These are extended capabilities, rename and move to proper
group for consistency.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/ats.c        | 20 ++++++++++----------
 include/linux/pci_regs.h |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b0dd08e6a9da..e11ebafaf774 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -175,7 +175,7 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	u32 max_requests;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -206,7 +206,7 @@ void pci_disable_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return;
 
@@ -227,7 +227,7 @@ bool pci_pri_enabled(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return false;
 
@@ -249,7 +249,7 @@ int pci_reset_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -282,7 +282,7 @@ bool pci_pri_stopped(struct pci_dev *pdev)
 	u16 control, status;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return true;
 
@@ -311,7 +311,7 @@ int pci_pri_status(struct pci_dev *pdev)
 	u16 status, control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -342,7 +342,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	u16 control, supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
@@ -376,7 +376,7 @@ void pci_disable_pasid(struct pci_dev *pdev)
 	u16 control = 0;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return;
 
@@ -400,7 +400,7 @@ int pci_pasid_features(struct pci_dev *pdev)
 	u16 supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
@@ -426,7 +426,7 @@ int pci_max_pasids(struct pci_dev *pdev)
 	u16 supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index b5d9657f3100..090d3a9f5b26 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -537,7 +537,9 @@
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
 #define PCI_EXT_CAP_ID_SRIOV	16
+#define PCI_EXT_CAP_ID_PRI	19
 #define PCI_EXT_CAP_ID_LTR	24
+#define PCI_EXT_CAP_ID_PASID	27
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -664,7 +666,6 @@
 #define  PCI_ATS_MIN_STU	12	/* shift of minimum STU block */
 
 /* Page Request Interface */
-#define PCI_PRI_CAP		0x13    /* PRI capability ID */
 #define PCI_PRI_CONTROL_OFF	0x04	/* Offset of control register */
 #define PCI_PRI_STATUS_OFF	0x06	/* Offset of status register */
 #define PCI_PRI_ENABLE		0x0001	/* Enable mask */
@@ -676,7 +677,6 @@
 #define PCI_PRI_ALLOC_REQ_OFF	0x0c	/* Cap offset for max reqs allowed */
 
 /* PASID capability */
-#define PCI_PASID_CAP		0x1b    /* PASID capability ID */
 #define PCI_PASID_CAP_OFF	0x04    /* PASID feature register */
 #define PCI_PASID_CONTROL_OFF   0x06    /* PASID control register */
 #define PCI_PASID_ENABLE	0x01	/* Enable/Supported bit */
-- 
cgit v1.2.3


From 10f6dc7eede9a8895626e9c1b4f2c3b75fbf2850 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 10 Nov 2011 16:38:33 -0500
Subject: PCI: Rework ASPM disable code

Right now we forcibly clear ASPM state on all devices if the BIOS indicates
that the feature isn't supported. Based on the Microsoft presentation
"PCI Express In Depth for Windows Vista and Beyond", I'm starting to think
that this may be an error. The implication is that unless the platform
grants full control via _OSC, Windows will not touch any PCIe features -
including ASPM. In that case clearing ASPM state would be an error unless
the platform has granted us that control.

This patch reworks the ASPM disabling code such that the actual clearing
of state is triggered by a successful handoff of PCIe control to the OS.
The general ASPM code undergoes some changes in order to ensure that the
ability to clear the bits isn't overridden by ASPM having already been
disabled. Further, this theoretically now allows for situations where
only a subset of PCIe roots hand over control, leaving the others in the
BIOS state.

It's difficult to know for sure that this is the right thing to do -
there's zero public documentation on the interaction between all of these
components. But enough vendors enable ASPM on platforms and then set this
bit that it seems likely that they're expecting the OS to leave them alone.

Measured to save around 5W on an idle Thinkpad X220.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c  |  7 ++++++
 drivers/pci/pci-acpi.c   |  1 -
 drivers/pci/pcie/aspm.c  | 58 ++++++++++++++++++++++++++++++------------------
 include/linux/pci-aspm.h |  4 ++--
 4 files changed, 46 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 2672c798272f..7aff6312ce7c 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -596,6 +596,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 		if (ACPI_SUCCESS(status)) {
 			dev_info(root->bus->bridge,
 				"ACPI _OSC control (0x%02x) granted\n", flags);
+			if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
+				/*
+				 * We have ASPM control, but the FADT indicates
+				 * that it's unsupported. Clear it.
+				 */
+				pcie_clear_aspm(root->bus);
+			}
 		} else {
 			dev_info(root->bus->bridge,
 				"ACPI _OSC request failed (%s), "
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 4ecb6408b0d6..c8e75851a314 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -395,7 +395,6 @@ static int __init acpi_pci_init(void)
 
 	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
 		printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
-		pcie_clear_aspm();
 		pcie_no_aspm();
 	}
 
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index cbfbab18be91..1cfbf228fbb1 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -68,7 +68,7 @@ struct pcie_link_state {
 	struct aspm_latency acceptable[8];
 };
 
-static int aspm_disabled, aspm_force, aspm_clear_state;
+static int aspm_disabled, aspm_force;
 static bool aspm_support_enabled = true;
 static DEFINE_MUTEX(aspm_lock);
 static LIST_HEAD(link_list);
@@ -500,9 +500,6 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 	int pos;
 	u32 reg32;
 
-	if (aspm_clear_state)
-		return -EINVAL;
-
 	/*
 	 * Some functions in a slot might not all be PCIe functions,
 	 * very strange. Disable ASPM for the whole slot
@@ -574,9 +571,6 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 	    pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
 		return;
 
-	if (aspm_disabled && !aspm_clear_state)
-		return;
-
 	/* VIA has a strange chipset, root port is under a bridge */
 	if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT &&
 	    pdev->bus->self)
@@ -608,7 +602,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 	 * the BIOS's expectation, we'll do so once pci_enable_device() is
 	 * called.
 	 */
-	if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) {
+	if (aspm_policy != POLICY_POWERSAVE) {
 		pcie_config_aspm_path(link);
 		pcie_set_clkpm(link, policy_to_clkpm_state(link));
 	}
@@ -649,8 +643,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link, *root, *parent_link;
 
-	if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) ||
-	    !parent || !parent->link_state)
+	if (!pci_is_pcie(pdev) || !parent || !parent->link_state)
 		return;
 	if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
 	    (parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
@@ -734,13 +727,18 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
  */
-static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
+static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
+				     bool force)
 {
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link;
 
-	if (aspm_disabled || !pci_is_pcie(pdev))
+	if (aspm_disabled && !force)
+		return;
+
+	if (!pci_is_pcie(pdev))
 		return;
+
 	if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
 	    pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
 		parent = pdev;
@@ -768,16 +766,31 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 
 void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, false);
+	__pci_disable_link_state(pdev, state, false, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state_locked);
 
 void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, true);
+	__pci_disable_link_state(pdev, state, true, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
+void pcie_clear_aspm(struct pci_bus *bus)
+{
+	struct pci_dev *child;
+
+	/*
+	 * Clear any ASPM setup that the firmware has carried out on this bus
+	 */
+	list_for_each_entry(child, &bus->devices, bus_list) {
+		__pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
+					 PCIE_LINK_STATE_L1 |
+					 PCIE_LINK_STATE_CLKPM,
+					 false, true);
+	}
+}
+
 static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
 {
 	int i;
@@ -935,6 +948,7 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
 static int __init pcie_aspm_disable(char *str)
 {
 	if (!strcmp(str, "off")) {
+		aspm_policy = POLICY_DEFAULT;
 		aspm_disabled = 1;
 		aspm_support_enabled = false;
 		printk(KERN_INFO "PCIe ASPM is disabled\n");
@@ -947,16 +961,18 @@ static int __init pcie_aspm_disable(char *str)
 
 __setup("pcie_aspm=", pcie_aspm_disable);
 
-void pcie_clear_aspm(void)
-{
-	if (!aspm_force)
-		aspm_clear_state = 1;
-}
-
 void pcie_no_aspm(void)
 {
-	if (!aspm_force)
+	/*
+	 * Disabling ASPM is intended to prevent the kernel from modifying
+	 * existing hardware state, not to clear existing state. To that end:
+	 * (a) set policy to POLICY_DEFAULT in order to avoid changing state
+	 * (b) prevent userspace from changing policy
+	 */
+	if (!aspm_force) {
+		aspm_policy = POLICY_DEFAULT;
 		aspm_disabled = 1;
+	}
 }
 
 /**
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index 7cea7b6c1413..c8320144fe79 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -29,7 +29,7 @@ extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
 extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
-extern void pcie_clear_aspm(void);
+extern void pcie_clear_aspm(struct pci_bus *bus);
 extern void pcie_no_aspm(void);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
@@ -47,7 +47,7 @@ static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }
-static inline void pcie_clear_aspm(void)
+static inline void pcie_clear_aspm(struct pci_bus *bus)
 {
 }
 static inline void pcie_no_aspm(void)
-- 
cgit v1.2.3


From 1830ea91c20b06608f7cdb2455ce05ba834b3214 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 16 Nov 2011 09:24:16 -0700
Subject: PCI: Fix PCI_EXP_TYPE_RC_EC value

Spec shows this as 1010b = 0xa

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 090d3a9f5b26..9e37250a2a22 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -392,7 +392,7 @@
 #define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
 #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
 #define  PCI_EXP_TYPE_RC_END	0x9	/* Root Complex Integrated Endpoint */
-#define  PCI_EXP_TYPE_RC_EC	0x10	/* Root Complex Event Collector */
+#define  PCI_EXP_TYPE_RC_EC	0xa	/* Root Complex Event Collector */
 #define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
 #define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
 #define PCI_EXP_DEVCAP		4	/* Device capabilities */
-- 
cgit v1.2.3


From fb51ccbf217c1c994607b6519c7d85250928553d Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 4 Nov 2011 09:45:59 +0100
Subject: PCI: Rework config space blocking services

pci_block_user_cfg_access was designed for the use case that a single
context, the IPR driver, temporarily delays user space accesses to the
config space via sysfs. This assumption became invalid by the time
pci_dev_reset was added as locking instance. Today, if you run two loops
in parallel that reset the same device via sysfs, you end up with a
kernel BUG as pci_block_user_cfg_access detect the broken assumption.

This reworks the pci_block_user_cfg_access to a sleeping service
pci_cfg_access_lock and an atomic-compatible variant called
pci_cfg_access_trylock. The former not only blocks user space access as
before but also waits if access was already locked. The latter service
just returns false in this case, allowing the caller to resolve the
conflict instead of raising a BUG.

Adaptions of the ipr driver were originally written by Brian King.

Acked-by: Brian King <brking@linux.vnet.ibm.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/access.c          | 74 ++++++++++++++++++++++++++++---------------
 drivers/pci/iov.c             | 12 +++----
 drivers/pci/pci.c             |  4 +--
 drivers/scsi/ipr.c            | 67 +++++++++++++++++++++++++++++++++++----
 drivers/scsi/ipr.h            |  1 +
 drivers/uio/uio_pci_generic.c |  9 +++---
 include/linux/pci.h           | 14 +++++---
 7 files changed, 131 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index fdaa42aac7c6..0c4c71712dfc 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -127,20 +127,20 @@ EXPORT_SYMBOL(pci_write_vpd);
  * We have a bit per device to indicate it's blocked and a global wait queue
  * for callers to sleep on until devices are unblocked.
  */
-static DECLARE_WAIT_QUEUE_HEAD(pci_ucfg_wait);
+static DECLARE_WAIT_QUEUE_HEAD(pci_cfg_wait);
 
-static noinline void pci_wait_ucfg(struct pci_dev *dev)
+static noinline void pci_wait_cfg(struct pci_dev *dev)
 {
 	DECLARE_WAITQUEUE(wait, current);
 
-	__add_wait_queue(&pci_ucfg_wait, &wait);
+	__add_wait_queue(&pci_cfg_wait, &wait);
 	do {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		raw_spin_unlock_irq(&pci_lock);
 		schedule();
 		raw_spin_lock_irq(&pci_lock);
-	} while (dev->block_ucfg_access);
-	__remove_wait_queue(&pci_ucfg_wait, &wait);
+	} while (dev->block_cfg_access);
+	__remove_wait_queue(&pci_cfg_wait, &wait);
 }
 
 /* Returns 0 on success, negative values indicate error. */
@@ -153,7 +153,8 @@ int pci_user_read_config_##size						\
 	if (PCI_##size##_BAD)						\
 		return -EINVAL;						\
 	raw_spin_lock_irq(&pci_lock);				\
-	if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev);	\
+	if (unlikely(dev->block_cfg_access))				\
+		pci_wait_cfg(dev);					\
 	ret = dev->bus->ops->read(dev->bus, dev->devfn,			\
 					pos, sizeof(type), &data);	\
 	raw_spin_unlock_irq(&pci_lock);				\
@@ -172,7 +173,8 @@ int pci_user_write_config_##size					\
 	if (PCI_##size##_BAD)						\
 		return -EINVAL;						\
 	raw_spin_lock_irq(&pci_lock);				\
-	if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev);	\
+	if (unlikely(dev->block_cfg_access))				\
+		pci_wait_cfg(dev);					\
 	ret = dev->bus->ops->write(dev->bus, dev->devfn,		\
 					pos, sizeof(type), val);	\
 	raw_spin_unlock_irq(&pci_lock);				\
@@ -401,36 +403,56 @@ int pci_vpd_truncate(struct pci_dev *dev, size_t size)
 EXPORT_SYMBOL(pci_vpd_truncate);
 
 /**
- * pci_block_user_cfg_access - Block userspace PCI config reads/writes
+ * pci_cfg_access_lock - Lock PCI config reads/writes
  * @dev:	pci device struct
  *
- * When user access is blocked, any reads or writes to config space will
- * sleep until access is unblocked again.  We don't allow nesting of
- * block/unblock calls.
+ * When access is locked, any userspace reads or writes to config
+ * space and concurrent lock requests will sleep until access is
+ * allowed via pci_cfg_access_unlocked again.
  */
-void pci_block_user_cfg_access(struct pci_dev *dev)
+void pci_cfg_access_lock(struct pci_dev *dev)
+{
+	might_sleep();
+
+	raw_spin_lock_irq(&pci_lock);
+	if (dev->block_cfg_access)
+		pci_wait_cfg(dev);
+	dev->block_cfg_access = 1;
+	raw_spin_unlock_irq(&pci_lock);
+}
+EXPORT_SYMBOL_GPL(pci_cfg_access_lock);
+
+/**
+ * pci_cfg_access_trylock - try to lock PCI config reads/writes
+ * @dev:	pci device struct
+ *
+ * Same as pci_cfg_access_lock, but will return 0 if access is
+ * already locked, 1 otherwise. This function can be used from
+ * atomic contexts.
+ */
+bool pci_cfg_access_trylock(struct pci_dev *dev)
 {
 	unsigned long flags;
-	int was_blocked;
+	bool locked = true;
 
 	raw_spin_lock_irqsave(&pci_lock, flags);
-	was_blocked = dev->block_ucfg_access;
-	dev->block_ucfg_access = 1;
+	if (dev->block_cfg_access)
+		locked = false;
+	else
+		dev->block_cfg_access = 1;
 	raw_spin_unlock_irqrestore(&pci_lock, flags);
 
-	/* If we BUG() inside the pci_lock, we're guaranteed to hose
-	 * the machine */
-	BUG_ON(was_blocked);
+	return locked;
 }
-EXPORT_SYMBOL_GPL(pci_block_user_cfg_access);
+EXPORT_SYMBOL_GPL(pci_cfg_access_trylock);
 
 /**
- * pci_unblock_user_cfg_access - Unblock userspace PCI config reads/writes
+ * pci_cfg_access_unlock - Unlock PCI config reads/writes
  * @dev:	pci device struct
  *
- * This function allows userspace PCI config accesses to resume.
+ * This function allows PCI config accesses to resume.
  */
-void pci_unblock_user_cfg_access(struct pci_dev *dev)
+void pci_cfg_access_unlock(struct pci_dev *dev)
 {
 	unsigned long flags;
 
@@ -438,10 +460,10 @@ void pci_unblock_user_cfg_access(struct pci_dev *dev)
 
 	/* This indicates a problem in the caller, but we don't need
 	 * to kill them, unlike a double-block above. */
-	WARN_ON(!dev->block_ucfg_access);
+	WARN_ON(!dev->block_cfg_access);
 
-	dev->block_ucfg_access = 0;
-	wake_up_all(&pci_ucfg_wait);
+	dev->block_cfg_access = 0;
+	wake_up_all(&pci_cfg_wait);
 	raw_spin_unlock_irqrestore(&pci_lock, flags);
 }
-EXPORT_SYMBOL_GPL(pci_unblock_user_cfg_access);
+EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 1969a3ee3058..6a4d70386a3d 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -348,10 +348,10 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 	}
 
 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
-	pci_block_user_cfg_access(dev);
+	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	msleep(100);
-	pci_unblock_user_cfg_access(dev);
+	pci_cfg_access_unlock(dev);
 
 	iov->initial = initial;
 	if (nr_virtfn < initial)
@@ -379,10 +379,10 @@ failed:
 		virtfn_remove(dev, j, 0);
 
 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
-	pci_block_user_cfg_access(dev);
+	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	ssleep(1);
-	pci_unblock_user_cfg_access(dev);
+	pci_cfg_access_unlock(dev);
 
 	if (iov->link != dev->devfn)
 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
@@ -405,10 +405,10 @@ static void sriov_disable(struct pci_dev *dev)
 		virtfn_remove(dev, i, 0);
 
 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
-	pci_block_user_cfg_access(dev);
+	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	ssleep(1);
-	pci_unblock_user_cfg_access(dev);
+	pci_cfg_access_unlock(dev);
 
 	if (iov->link != dev->devfn)
 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 6d4a5319148d..c3cca7cdc6e5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2965,7 +2965,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe)
 	might_sleep();
 
 	if (!probe) {
-		pci_block_user_cfg_access(dev);
+		pci_cfg_access_lock(dev);
 		/* block PM suspend, driver probe, etc. */
 		device_lock(&dev->dev);
 	}
@@ -2990,7 +2990,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe)
 done:
 	if (!probe) {
 		device_unlock(&dev->dev);
-		pci_unblock_user_cfg_access(dev);
+		pci_cfg_access_unlock(dev);
 	}
 
 	return rc;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index fd860d952b28..67b169b7a5be 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -7638,8 +7638,12 @@ static int ipr_reset_restore_cfg_space(struct ipr_cmnd *ipr_cmd)
  **/
 static int ipr_reset_bist_done(struct ipr_cmnd *ipr_cmd)
 {
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+
 	ENTER;
-	pci_unblock_user_cfg_access(ipr_cmd->ioa_cfg->pdev);
+	if (ioa_cfg->cfg_locked)
+		pci_cfg_access_unlock(ioa_cfg->pdev);
+	ioa_cfg->cfg_locked = 0;
 	ipr_cmd->job_step = ipr_reset_restore_cfg_space;
 	LEAVE;
 	return IPR_RC_JOB_CONTINUE;
@@ -7660,8 +7664,6 @@ static int ipr_reset_start_bist(struct ipr_cmnd *ipr_cmd)
 	int rc = PCIBIOS_SUCCESSFUL;
 
 	ENTER;
-	pci_block_user_cfg_access(ioa_cfg->pdev);
-
 	if (ioa_cfg->ipr_chip->bist_method == IPR_MMIO)
 		writel(IPR_UPROCI_SIS64_START_BIST,
 		       ioa_cfg->regs.set_uproc_interrupt_reg32);
@@ -7673,7 +7675,9 @@ static int ipr_reset_start_bist(struct ipr_cmnd *ipr_cmd)
 		ipr_reset_start_timer(ipr_cmd, IPR_WAIT_FOR_BIST_TIMEOUT);
 		rc = IPR_RC_JOB_RETURN;
 	} else {
-		pci_unblock_user_cfg_access(ipr_cmd->ioa_cfg->pdev);
+		if (ioa_cfg->cfg_locked)
+			pci_cfg_access_unlock(ipr_cmd->ioa_cfg->pdev);
+		ioa_cfg->cfg_locked = 0;
 		ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_PCI_ACCESS_ERROR);
 		rc = IPR_RC_JOB_CONTINUE;
 	}
@@ -7716,7 +7720,6 @@ static int ipr_reset_slot_reset(struct ipr_cmnd *ipr_cmd)
 	struct pci_dev *pdev = ioa_cfg->pdev;
 
 	ENTER;
-	pci_block_user_cfg_access(pdev);
 	pci_set_pcie_reset_state(pdev, pcie_warm_reset);
 	ipr_cmd->job_step = ipr_reset_slot_reset_done;
 	ipr_reset_start_timer(ipr_cmd, IPR_PCI_RESET_TIMEOUT);
@@ -7724,6 +7727,56 @@ static int ipr_reset_slot_reset(struct ipr_cmnd *ipr_cmd)
 	return IPR_RC_JOB_RETURN;
 }
 
+/**
+ * ipr_reset_block_config_access_wait - Wait for permission to block config access
+ * @ipr_cmd:	ipr command struct
+ *
+ * Description: This attempts to block config access to the IOA.
+ *
+ * Return value:
+ * 	IPR_RC_JOB_CONTINUE / IPR_RC_JOB_RETURN
+ **/
+static int ipr_reset_block_config_access_wait(struct ipr_cmnd *ipr_cmd)
+{
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+	int rc = IPR_RC_JOB_CONTINUE;
+
+	if (pci_cfg_access_trylock(ioa_cfg->pdev)) {
+		ioa_cfg->cfg_locked = 1;
+		ipr_cmd->job_step = ioa_cfg->reset;
+	} else {
+		if (ipr_cmd->u.time_left) {
+			rc = IPR_RC_JOB_RETURN;
+			ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
+			ipr_reset_start_timer(ipr_cmd,
+					      IPR_CHECK_FOR_RESET_TIMEOUT);
+		} else {
+			ipr_cmd->job_step = ioa_cfg->reset;
+			dev_err(&ioa_cfg->pdev->dev,
+				"Timed out waiting to lock config access. Resetting anyway.\n");
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * ipr_reset_block_config_access - Block config access to the IOA
+ * @ipr_cmd:	ipr command struct
+ *
+ * Description: This attempts to block config access to the IOA
+ *
+ * Return value:
+ * 	IPR_RC_JOB_CONTINUE
+ **/
+static int ipr_reset_block_config_access(struct ipr_cmnd *ipr_cmd)
+{
+	ipr_cmd->ioa_cfg->cfg_locked = 0;
+	ipr_cmd->job_step = ipr_reset_block_config_access_wait;
+	ipr_cmd->u.time_left = IPR_WAIT_FOR_RESET_TIMEOUT;
+	return IPR_RC_JOB_CONTINUE;
+}
+
 /**
  * ipr_reset_allowed - Query whether or not IOA can be reset
  * @ioa_cfg:	ioa config struct
@@ -7763,7 +7816,7 @@ static int ipr_reset_wait_to_start_bist(struct ipr_cmnd *ipr_cmd)
 		ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
 		ipr_reset_start_timer(ipr_cmd, IPR_CHECK_FOR_RESET_TIMEOUT);
 	} else {
-		ipr_cmd->job_step = ioa_cfg->reset;
+		ipr_cmd->job_step = ipr_reset_block_config_access;
 		rc = IPR_RC_JOB_CONTINUE;
 	}
 
@@ -7796,7 +7849,7 @@ static int ipr_reset_alert(struct ipr_cmnd *ipr_cmd)
 		writel(IPR_UPROCI_RESET_ALERT, ioa_cfg->regs.set_uproc_interrupt_reg32);
 		ipr_cmd->job_step = ipr_reset_wait_to_start_bist;
 	} else {
-		ipr_cmd->job_step = ioa_cfg->reset;
+		ipr_cmd->job_step = ipr_reset_block_config_access;
 	}
 
 	ipr_cmd->u.time_left = IPR_WAIT_FOR_RESET_TIMEOUT;
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index ac84736c1b9c..b13f9cc12279 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -1387,6 +1387,7 @@ struct ipr_ioa_cfg {
 	u8 msi_received:1;
 	u8 sis64:1;
 	u8 dump_timeout:1;
+	u8 cfg_locked:1;
 
 	u8 revid;
 
diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
index 02bd47bdee1c..56d00c6258f0 100644
--- a/drivers/uio/uio_pci_generic.c
+++ b/drivers/uio/uio_pci_generic.c
@@ -55,7 +55,8 @@ static irqreturn_t irqhandler(int irq, struct uio_info *info)
 	BUILD_BUG_ON(PCI_COMMAND % 4);
 	BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
 
-	pci_block_user_cfg_access(pdev);
+	if (!pci_cfg_access_trylock(pdev))
+		goto error;
 
 	/* Read both command and status registers in a single 32-bit operation.
 	 * Note: we could cache the value for command and move the status read
@@ -79,7 +80,7 @@ static irqreturn_t irqhandler(int irq, struct uio_info *info)
 	ret = IRQ_HANDLED;
 done:
 
-	pci_unblock_user_cfg_access(pdev);
+	pci_cfg_access_lock(pdev);
 	return ret;
 }
 
@@ -91,7 +92,7 @@ static int __devinit verify_pci_2_3(struct pci_dev *pdev)
 	u16 orig, new;
 	int err = 0;
 
-	pci_block_user_cfg_access(pdev);
+	pci_cfg_access_lock(pdev);
 	pci_read_config_word(pdev, PCI_COMMAND, &orig);
 	pci_write_config_word(pdev, PCI_COMMAND,
 			      orig ^ PCI_COMMAND_INTX_DISABLE);
@@ -114,7 +115,7 @@ static int __devinit verify_pci_2_3(struct pci_dev *pdev)
 	/* Now restore the original value. */
 	pci_write_config_word(pdev, PCI_COMMAND, orig);
 err:
-	pci_unblock_user_cfg_access(pdev);
+	pci_cfg_access_unlock(pdev);
 	return err;
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 84225c756bd1..72401596b2a8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -308,7 +308,7 @@ struct pci_dev {
 	unsigned int	is_added:1;
 	unsigned int	is_busmaster:1; /* device is busmaster */
 	unsigned int	no_msi:1;	/* device may not use msi */
-	unsigned int	block_ucfg_access:1;	/* userspace config space access is blocked */
+	unsigned int	block_cfg_access:1;	/* config space access is blocked */
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 	unsigned int	irq_reroute_variant:2;	/* device needs IRQ rerouting variant */
 	unsigned int 	msi_enabled:1;
@@ -1085,8 +1085,9 @@ int  ht_create_irq(struct pci_dev *dev, int idx);
 void ht_destroy_irq(unsigned int irq);
 #endif /* CONFIG_HT_IRQ */
 
-extern void pci_block_user_cfg_access(struct pci_dev *dev);
-extern void pci_unblock_user_cfg_access(struct pci_dev *dev);
+extern void pci_cfg_access_lock(struct pci_dev *dev);
+extern bool pci_cfg_access_trylock(struct pci_dev *dev);
+extern void pci_cfg_access_unlock(struct pci_dev *dev);
 
 /*
  * PCI domain support.  Sometimes called PCI segment (eg by ACPI),
@@ -1283,10 +1284,13 @@ static inline void pci_release_regions(struct pci_dev *dev)
 
 #define pci_dma_burst_advice(pdev, strat, strategy_parameter) do { } while (0)
 
-static inline void pci_block_user_cfg_access(struct pci_dev *dev)
+static inline void pci_block_cfg_access(struct pci_dev *dev)
 { }
 
-static inline void pci_unblock_user_cfg_access(struct pci_dev *dev)
+static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev)
+{ return 0; }
+
+static inline void pci_unblock_cfg_access(struct pci_dev *dev)
 { }
 
 static inline struct pci_bus *pci_find_next_bus(const struct pci_bus *from)
-- 
cgit v1.2.3


From a2e27787f893621c5a6b865acf6b7766f8671328 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 4 Nov 2011 09:46:00 +0100
Subject: PCI: Introduce INTx check & mask API

These new PCI services allow to probe for 2.3-compliant INTx masking
support and then use the feature from PCI interrupt handlers. The
services are properly synchronized with concurrent config space access
via sysfs or on device reset.

This enables generic PCI device drivers like uio_pci_generic or KVM's
device assignment to implement the necessary kernel-side IRQ handling
without any knowledge about device-specific interrupt status and control
registers.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/access.c |   2 +-
 drivers/pci/pci.c    | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.h    |   2 +
 include/linux/pci.h  |   3 ++
 4 files changed, 116 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 0c4c71712dfc..2a581642c237 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -13,7 +13,7 @@
  * configuration space.
  */
 
-static DEFINE_RAW_SPINLOCK(pci_lock);
+DEFINE_RAW_SPINLOCK(pci_lock);
 
 /*
  *  Wrappers for all PCI configuration access functions.  They just check
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c3cca7cdc6e5..924193ef4fe1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2767,6 +2767,116 @@ pci_intx(struct pci_dev *pdev, int enable)
 	}
 }
 
+/**
+ * pci_intx_mask_supported - probe for INTx masking support
+ * @pdev: the PCI device to operate on
+ *
+ * Check if the device dev support INTx masking via the config space
+ * command word.
+ */
+bool pci_intx_mask_supported(struct pci_dev *dev)
+{
+	bool mask_supported = false;
+	u16 orig, new;
+
+	pci_cfg_access_lock(dev);
+
+	pci_read_config_word(dev, PCI_COMMAND, &orig);
+	pci_write_config_word(dev, PCI_COMMAND,
+			      orig ^ PCI_COMMAND_INTX_DISABLE);
+	pci_read_config_word(dev, PCI_COMMAND, &new);
+
+	/*
+	 * There's no way to protect against hardware bugs or detect them
+	 * reliably, but as long as we know what the value should be, let's
+	 * go ahead and check it.
+	 */
+	if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
+		dev_err(&dev->dev, "Command register changed from "
+			"0x%x to 0x%x: driver or hardware bug?\n", orig, new);
+	} else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) {
+		mask_supported = true;
+		pci_write_config_word(dev, PCI_COMMAND, orig);
+	}
+
+	pci_cfg_access_unlock(dev);
+	return mask_supported;
+}
+EXPORT_SYMBOL_GPL(pci_intx_mask_supported);
+
+static bool pci_check_and_set_intx_mask(struct pci_dev *dev, bool mask)
+{
+	struct pci_bus *bus = dev->bus;
+	bool mask_updated = true;
+	u32 cmd_status_dword;
+	u16 origcmd, newcmd;
+	unsigned long flags;
+	bool irq_pending;
+
+	/*
+	 * We do a single dword read to retrieve both command and status.
+	 * Document assumptions that make this possible.
+	 */
+	BUILD_BUG_ON(PCI_COMMAND % 4);
+	BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
+
+	raw_spin_lock_irqsave(&pci_lock, flags);
+
+	bus->ops->read(bus, dev->devfn, PCI_COMMAND, 4, &cmd_status_dword);
+
+	irq_pending = (cmd_status_dword >> 16) & PCI_STATUS_INTERRUPT;
+
+	/*
+	 * Check interrupt status register to see whether our device
+	 * triggered the interrupt (when masking) or the next IRQ is
+	 * already pending (when unmasking).
+	 */
+	if (mask != irq_pending) {
+		mask_updated = false;
+		goto done;
+	}
+
+	origcmd = cmd_status_dword;
+	newcmd = origcmd & ~PCI_COMMAND_INTX_DISABLE;
+	if (mask)
+		newcmd |= PCI_COMMAND_INTX_DISABLE;
+	if (newcmd != origcmd)
+		bus->ops->write(bus, dev->devfn, PCI_COMMAND, 2, newcmd);
+
+done:
+	raw_spin_unlock_irqrestore(&pci_lock, flags);
+
+	return mask_updated;
+}
+
+/**
+ * pci_check_and_mask_intx - mask INTx on pending interrupt
+ * @pdev: the PCI device to operate on
+ *
+ * Check if the device dev has its INTx line asserted, mask it and
+ * return true in that case. False is returned if not interrupt was
+ * pending.
+ */
+bool pci_check_and_mask_intx(struct pci_dev *dev)
+{
+	return pci_check_and_set_intx_mask(dev, true);
+}
+EXPORT_SYMBOL_GPL(pci_check_and_mask_intx);
+
+/**
+ * pci_check_and_mask_intx - unmask INTx of no interrupt is pending
+ * @pdev: the PCI device to operate on
+ *
+ * Check if the device dev has its INTx line asserted, unmask it if not
+ * and return true. False is returned and the mask remains active if
+ * there was still an interrupt pending.
+ */
+bool pci_check_and_unmask_intx(struct pci_dev *dev)
+{
+	return pci_check_and_set_intx_mask(dev, false);
+}
+EXPORT_SYMBOL_GPL(pci_check_and_unmask_intx);
+
 /**
  * pci_msi_off - disables any msi or msix capabilities
  * @dev: the PCI device to operate on
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b74084e9ca12..3b6e4ed306b6 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -136,6 +136,8 @@ static inline void pci_remove_legacy_files(struct pci_bus *bus) { return; }
 /* Lock for read/write access to pci device and bus lists */
 extern struct rw_semaphore pci_bus_sem;
 
+extern raw_spinlock_t pci_lock;
+
 extern unsigned int pci_pm_d3_delay;
 
 #ifdef CONFIG_PCI_MSI
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 72401596b2a8..4286b853956e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -804,6 +804,9 @@ int __must_check pci_set_mwi(struct pci_dev *dev);
 int pci_try_set_mwi(struct pci_dev *dev);
 void pci_clear_mwi(struct pci_dev *dev);
 void pci_intx(struct pci_dev *dev, int enable);
+bool pci_intx_mask_supported(struct pci_dev *dev);
+bool pci_check_and_mask_intx(struct pci_dev *dev);
+bool pci_check_and_unmask_intx(struct pci_dev *dev);
 void pci_msi_off(struct pci_dev *dev);
 int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size);
 int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask);
-- 
cgit v1.2.3


From cfce9fb808d7d25f6ea18a804eb71b08c7d777c1 Mon Sep 17 00:00:00 2001
From: Myron Stowe <mstowe@redhat.com>
Date: Fri, 28 Oct 2011 15:47:35 -0600
Subject: PCI: add declaration for pcibios_set_master() to pci core

Currently, pcibios_set_master() is implemented in architecture-
specific code.  There is nothing architecture-specific about PCI's
'latency timer'.

This patch adds a declaration for pcibios_set_master() to PCI's core
in preperation for pulling the function itself up into the core.
Without the addition of this declaration, subsequent patches that
remove inline definitions of pcibios_set_master() would be removing
the only declaration of such.

No functional change.

Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4286b853956e..569341d2d527 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1453,8 +1453,10 @@ extern u8 pci_cache_line_size;
 extern unsigned long pci_hotplug_io_size;
 extern unsigned long pci_hotplug_mem_size;
 
+/* Architecture specific versions may override these (weak) */
 int pcibios_add_platform_entries(struct pci_dev *dev);
 void pcibios_disable_device(struct pci_dev *dev);
+void pcibios_set_master(struct pci_dev *dev);
 int pcibios_set_pcie_reset_state(struct pci_dev *dev,
 				 enum pcie_reset_state state);
 
-- 
cgit v1.2.3


From 96c5590058d7fded14f43af2ab521436cecf3125 Mon Sep 17 00:00:00 2001
From: Myron Stowe <mstowe@redhat.com>
Date: Fri, 28 Oct 2011 15:48:38 -0600
Subject: PCI: Pull PCI 'latency timer' setup up into the core

The 'latency timer' of PCI devices, both Type 0 and Type 1,
is setup in architecture-specific code [see: 'pcibios_set_master()'].
There are two approaches being taken by all the architectures - check
if the 'latency timer' is currently set between 16 and 255 and if not
bring it within bounds, or, do nothing (and then there is the
gratuitously different PA-RISC implementation).

There is nothing architecture-specific about PCI's 'latency timer' so
this patch pulls its setup functionality up into the PCI core by
creating a generic 'pcibios_set_master()' function using the '__weak'
attribute which can be used by all architectures as a default which,
if necessary, can then be over-ridden by architecture-specific code.

No functional change.

Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/blackfin/include/asm/pci.h         |  4 ----
 arch/frv/mb93090-mb00/pci-frv.c         |  6 ------
 arch/frv/mb93090-mb00/pci-frv.h         |  2 --
 arch/h8300/include/asm/pci.h            |  5 -----
 arch/mips/pci/pci.c                     |  6 ------
 arch/mn10300/unit-asb2305/pci-asb2305.c |  6 ------
 arch/mn10300/unit-asb2305/pci-asb2305.h |  2 --
 arch/sh/drivers/pci/pci.c               |  6 ------
 arch/x86/include/asm/pci_x86.h          |  2 --
 arch/x86/pci/i386.c                     |  6 ------
 drivers/pci/pci.c                       | 29 +++++++++++++++++++++++++++++
 include/linux/pci.h                     |  3 +++
 12 files changed, 32 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/include/asm/pci.h b/arch/blackfin/include/asm/pci.h
index 99cae2e3bac7..74352c4597d9 100644
--- a/arch/blackfin/include/asm/pci.h
+++ b/arch/blackfin/include/asm/pci.h
@@ -10,10 +10,6 @@
 #define PCIBIOS_MIN_IO 0x00001000
 #define PCIBIOS_MIN_MEM 0x10000000
 
-static inline void pcibios_set_master(struct pci_dev *dev)
-{
-	/* No special bus mastering setup handling */
-}
 static inline void pcibios_penalize_isa_irq(int irq)
 {
 	/* We don't do dynamic PCI IRQ allocation */
diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c
index 6b4fb28e9f99..6a0cd644d7cc 100644
--- a/arch/frv/mb93090-mb00/pci-frv.c
+++ b/arch/frv/mb93090-mb00/pci-frv.c
@@ -195,12 +195,6 @@ void __init pcibios_resource_survey(void)
 	pcibios_assign_resources();
 }
 
-/*
- *  If we set up a device for bus mastering, we need to check the latency
- *  timer as certain crappy BIOSes forget to set it properly.
- */
-unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/arch/frv/mb93090-mb00/pci-frv.h b/arch/frv/mb93090-mb00/pci-frv.h
index f3fe55914793..089eeba4f3bc 100644
--- a/arch/frv/mb93090-mb00/pci-frv.h
+++ b/arch/frv/mb93090-mb00/pci-frv.h
@@ -26,8 +26,6 @@ extern unsigned int __nongpreldata pci_probe;
 
 /* pci-frv.c */
 
-extern unsigned int pcibios_max_latency;
-
 void pcibios_resource_survey(void);
 
 /* pci-vdk.c */
diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h
index cc9762091c0a..0b2acaa3dd84 100644
--- a/arch/h8300/include/asm/pci.h
+++ b/arch/h8300/include/asm/pci.h
@@ -9,11 +9,6 @@
 
 #define pcibios_assign_all_busses()	0
 
-static inline void pcibios_set_master(struct pci_dev *dev)
-{
-	/* No special bus mastering setup handling */
-}
-
 static inline void pcibios_penalize_isa_irq(int irq, int active)
 {
 	/* We don't do dynamic PCI IRQ allocation */
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 41af7fa2887b..f93f749b92e3 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -205,12 +205,6 @@ static int pcibios_enable_resources(struct pci_dev *dev, int mask)
 	return 0;
 }
 
-/*
- *  If we set up a device for bus mastering, we need to check the latency
- *  timer as certain crappy BIOSes forget to set it properly.
- */
-static unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.c b/arch/mn10300/unit-asb2305/pci-asb2305.c
index 8e6763e6f250..2b299c413ae5 100644
--- a/arch/mn10300/unit-asb2305/pci-asb2305.c
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.c
@@ -213,12 +213,6 @@ void __init pcibios_resource_survey(void)
 	pcibios_allocate_resources(1);
 }
 
-/*
- *  If we set up a device for bus mastering, we need to check the latency
- *  timer as certain crappy BIOSes forget to set it properly.
- */
-unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.h b/arch/mn10300/unit-asb2305/pci-asb2305.h
index c3fa294b6e28..1194fe486b01 100644
--- a/arch/mn10300/unit-asb2305/pci-asb2305.h
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.h
@@ -31,8 +31,6 @@ extern unsigned int pci_probe;
 
 /* pci-asb2305.c */
 
-extern unsigned int pcibios_max_latency;
-
 extern void pcibios_resource_survey(void);
 
 /* pci.c */
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index c2691afe8f79..cfdb2f652949 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -243,12 +243,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-/*
- *  If we set up a device for bus mastering, we need to check and set
- *  the latency timer as it may not be properly set.
- */
-static unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index e38197806853..b3a531746026 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -44,8 +44,6 @@ enum pci_bf_sort_state {
 
 /* pci-i386.c */
 
-extern unsigned int pcibios_max_latency;
-
 void pcibios_resource_survey(void);
 void pcibios_set_cache_line_size(void);
 
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 794b092d01ae..dd5806b0fc8b 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -254,12 +254,6 @@ void __init pcibios_resource_survey(void)
  */
 fs_initcall(pcibios_assign_resources);
 
-/*
- *  If we set up a device for bus mastering, we need to check the latency
- *  timer as certain crappy BIOSes forget to set it properly.
- */
-unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 924193ef4fe1..f9abe84cf5e0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -88,6 +88,12 @@ enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF;
 u8 pci_dfl_cache_line_size __devinitdata = L1_CACHE_BYTES >> 2;
 u8 pci_cache_line_size;
 
+/*
+ * If we set up a device for bus mastering, we need to check the latency
+ * timer as certain BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -2595,6 +2601,29 @@ static void __pci_set_master(struct pci_dev *dev, bool enable)
 	dev->is_busmaster = enable;
 }
 
+/**
+ * pcibios_set_master - enable PCI bus-mastering for device dev
+ * @dev: the PCI device to enable
+ *
+ * Enables PCI bus-mastering for the device.  This is the default
+ * implementation.  Architecture specific implementations can override
+ * this if necessary.
+ */
+void __weak pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+	dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
 /**
  * pci_set_master - enables bus-mastering for device dev
  * @dev: the PCI device to enable
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 569341d2d527..4c16a5788998 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -795,8 +795,11 @@ static inline int pci_is_managed(struct pci_dev *pdev)
 }
 
 void pci_disable_device(struct pci_dev *dev);
+
+extern unsigned int pcibios_max_latency;
 void pci_set_master(struct pci_dev *dev);
 void pci_clear_master(struct pci_dev *dev);
+
 int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state);
 int pci_set_cacheline_size(struct pci_dev *dev);
 #define HAVE_PCI_SET_MWI
-- 
cgit v1.2.3


From 45ca9e9730c5acdb482dd95799fd8ac834481897 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:25:35 -0600
Subject: PCI: add helpers for building PCI bus resource lists

We'd like to supply a list of resources when we create a new PCI bus,
e.g., the root bus under a PCI host bridge.  These are helpers for
constructing that list.

These are exported because the plan is to replace this exported interface:
    pci_scan_bus_parented()
with this one:
    pci_add_resource(resources, ...)
    pci_scan_root_bus(..., resources)

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/bus.c   | 32 +++++++++++++++++++++++++++-----
 include/linux/pci.h |  2 ++
 2 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 1e2ad92a4752..398f5d859791 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -18,6 +18,32 @@
 
 #include "pci.h"
 
+void pci_add_resource(struct list_head *resources, struct resource *res)
+{
+	struct pci_bus_resource *bus_res;
+
+	bus_res = kzalloc(sizeof(struct pci_bus_resource), GFP_KERNEL);
+	if (!bus_res) {
+		printk(KERN_ERR "PCI: can't add bus resource %pR\n", res);
+		return;
+	}
+
+	bus_res->res = res;
+	list_add_tail(&bus_res->list, resources);
+}
+EXPORT_SYMBOL(pci_add_resource);
+
+void pci_free_resource_list(struct list_head *resources)
+{
+	struct pci_bus_resource *bus_res, *tmp;
+
+	list_for_each_entry_safe(bus_res, tmp, resources, list) {
+		list_del(&bus_res->list);
+		kfree(bus_res);
+	}
+}
+EXPORT_SYMBOL(pci_free_resource_list);
+
 void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
 			  unsigned int flags)
 {
@@ -52,16 +78,12 @@ EXPORT_SYMBOL_GPL(pci_bus_resource_n);
 
 void pci_bus_remove_resources(struct pci_bus *bus)
 {
-	struct pci_bus_resource *bus_res, *tmp;
 	int i;
 
 	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
 		bus->resource[i] = NULL;
 
-	list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
-		list_del(&bus_res->list);
-		kfree(bus_res);
-	}
+	pci_free_resource_list(&bus->resources);
 }
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4c16a5788998..9daa79901122 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -917,6 +917,8 @@ int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
 void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
+void pci_add_resource(struct list_head *resources, struct resource *res);
+void pci_free_resource_list(struct list_head *resources);
 void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, unsigned int flags);
 struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n);
 void pci_bus_remove_resources(struct pci_bus *bus);
-- 
cgit v1.2.3


From 166c6370754a0a92386e2ffb0eeb06e50ac8588d Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:25:45 -0600
Subject: PCI: add pci_create_root_bus() that accepts resource list

pci_create_bus() assigns ioport_resource and iomem_resource as the default
bus resources, i.e., the entire address space.  Architectures fix these
later, typically in pcibios_fixup_bus() or after pci_scan_bus_parented()
returns, but code that runs in the interim sees incorrect resource
information.

This patch adds a new pci_create_root_bus() that sets the bus resources
correctly from a supplied list of resources.

I intend to remove pci_create_bus() after changing all callers.

Based on original patch by Deng-Cheng Zhu.

Reference: http://www.spinics.net/lists/mips/msg41654.html
Reference: https://lkml.org/lkml/2011/8/26/88
Signed-off-by: Deng-Cheng Zhu <dczhu@mips.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 25 +++++++++++++++++++++----
 include/linux/pci.h |  3 +++
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2f0b14451d9d..89ecded0581b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1522,12 +1522,13 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
 	return max;
 }
 
-struct pci_bus * pci_create_bus(struct device *parent,
-		int bus, struct pci_ops *ops, void *sysdata)
+struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
 	int error, i;
 	struct pci_bus *b, *b2;
 	struct device *dev;
+	struct pci_bus_resource *bus_res, *n;
 	struct resource *res;
 
 	b = pci_alloc_bus();
@@ -1578,8 +1579,10 @@ struct pci_bus * pci_create_bus(struct device *parent,
 	pci_create_legacy_files(b);
 
 	b->number = b->secondary = bus;
-	b->resource[0] = &ioport_resource;
-	b->resource[1] = &iomem_resource;
+
+	/* Add initial resources to the bus */
+	list_for_each_entry_safe(bus_res, n, resources, list)
+		list_move_tail(&bus_res->list, &b->resources);
 
 	if (parent)
 		dev_info(parent, "PCI host bridge to bus %s\n", dev_name(&b->dev));
@@ -1605,6 +1608,20 @@ err_out:
 	return NULL;
 }
 
+struct pci_bus *pci_create_bus(struct device *parent,
+		int bus, struct pci_ops *ops, void *sysdata)
+{
+	LIST_HEAD(resources);
+	struct pci_bus *b;
+
+	pci_add_resource(&resources, &ioport_resource);
+	pci_add_resource(&resources, &iomem_resource);
+	b = pci_create_root_bus(parent, bus, ops, sysdata, &resources);
+	if (!b)
+		pci_free_resource_list(&resources);
+	return b;
+}
+
 struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
 		int bus, struct pci_ops *ops, void *sysdata)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9daa79901122..eacb1e51e11b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -670,6 +670,9 @@ static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *o
 		pci_bus_add_devices(root_bus);
 	return root_bus;
 }
+struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
+				    struct pci_ops *ops, void *sysdata,
+				    struct list_head *resources);
 struct pci_bus *pci_create_bus(struct device *parent, int bus,
 			       struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
-- 
cgit v1.2.3


From a2ebb827958a4ab3577443f89037f229683c644a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:25:50 -0600
Subject: PCI: add pci_scan_root_bus() that accepts resource list

"Early" and "header" quirks often use incorrect bus resources because they
see the default resources assigned by pci_create_bus(), before the
architecture fixes them up (typically in pcibios_fixup_bus()).  Regions
reserved by these quirks end up with the wrong parents.

Here's the standard path for scanning a PCI root bus:

  pci_scan_bus or pci_scan_bus_parented
    pci_create_bus                     <-- A create with default resources
    pci_scan_child_bus
      pci_scan_slot
        pci_scan_single_device
          pci_scan_device
            pci_setup_device
              pci_fixup_device(early)  <-- B
          pci_device_add
            pci_fixup_device(header)   <-- C
      pcibios_fixup_bus                <-- D fill in correct resources

Early and header quirks at B and C use the default (incorrect) root bus
resources rather than those filled in at D.

This patch adds a new pci_scan_root_bus() function that sets the bus
resources correctly from a supplied list of resources.

I intend to remove pci_scan_bus() and pci_scan_bus_parented() after
fixing all callers.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 15 +++++++++++++++
 include/linux/pci.h |  3 +++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 89ecded0581b..7fc7e14118cc 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1608,6 +1608,21 @@ err_out:
 	return NULL;
 }
 
+struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+{
+	struct pci_bus *b;
+
+	b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
+	if (!b)
+		return NULL;
+
+	b->subordinate = pci_scan_child_bus(b);
+	pci_bus_add_devices(b);
+	return b;
+}
+EXPORT_SYMBOL(pci_scan_root_bus);
+
 struct pci_bus *pci_create_bus(struct device *parent,
 		int bus, struct pci_ops *ops, void *sysdata)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eacb1e51e11b..5102d74f6bfc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -673,6 +673,9 @@ static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *o
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 				    struct pci_ops *ops, void *sysdata,
 				    struct list_head *resources);
+struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
+					     struct pci_ops *ops, void *sysdata,
+					     struct list_head *resources);
 struct pci_bus *pci_create_bus(struct device *parent, int bus,
 			       struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
-- 
cgit v1.2.3


From de4b2f76d69673cea08be952dcb4df2f4c81c6e3 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:25:55 -0600
Subject: PCI: convert pci_scan_bus() to use pci_create_root_bus()

I plan to deprecate pci_scan_bus_parented(), so use pci_create_root_bus()
directly instead.  pci_scan_bus() itself will be removed as soon as all
callers are gone, so this is just an interim step.

v2: export pci_scan_bus

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 19 +++++++++++++++++++
 include/linux/pci.h | 10 +---------
 2 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7fc7e14118cc..d09644b52d1c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1649,6 +1649,25 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
 }
 EXPORT_SYMBOL(pci_scan_bus_parented);
 
+struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
+					void *sysdata)
+{
+	LIST_HEAD(resources);
+	struct pci_bus *b;
+
+	pci_add_resource(&resources, &ioport_resource);
+	pci_add_resource(&resources, &iomem_resource);
+	b = pci_create_root_bus(NULL, bus, ops, sysdata, &resources);
+	if (b) {
+		b->subordinate = pci_scan_child_bus(b);
+		pci_bus_add_devices(b);
+	} else {
+		pci_free_resource_list(&resources);
+	}
+	return b;
+}
+EXPORT_SYMBOL(pci_scan_bus);
+
 #ifdef CONFIG_HOTPLUG
 /**
  * pci_rescan_bus - scan a PCI bus for devices.
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5102d74f6bfc..ff280e08690f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -661,15 +661,7 @@ extern struct pci_bus *pci_find_bus(int domain, int busnr);
 void pci_bus_add_devices(const struct pci_bus *bus);
 struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
 				      struct pci_ops *ops, void *sysdata);
-static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
-					   void *sysdata)
-{
-	struct pci_bus *root_bus;
-	root_bus = pci_scan_bus_parented(NULL, bus, ops, sysdata);
-	if (root_bus)
-		pci_bus_add_devices(root_bus);
-	return root_bus;
-}
+struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 				    struct pci_ops *ops, void *sysdata,
 				    struct list_head *resources);
-- 
cgit v1.2.3


From 118faafaf987f521832843d36c6be580983f9a6b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:28:24 -0600
Subject: PCI: remove pci_create_bus()

All users of pci_create_bus() have been converted to pci_create_root_bus(),
so remove it.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 14 --------------
 include/linux/pci.h |  2 --
 2 files changed, 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 944ea2170f48..7cc9e2f0f47c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1623,20 +1623,6 @@ struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
 }
 EXPORT_SYMBOL(pci_scan_root_bus);
 
-struct pci_bus *pci_create_bus(struct device *parent,
-		int bus, struct pci_ops *ops, void *sysdata)
-{
-	LIST_HEAD(resources);
-	struct pci_bus *b;
-
-	pci_add_resource(&resources, &ioport_resource);
-	pci_add_resource(&resources, &iomem_resource);
-	b = pci_create_root_bus(parent, bus, ops, sysdata, &resources);
-	if (!b)
-		pci_free_resource_list(&resources);
-	return b;
-}
-
 /* Deprecated; use pci_scan_root_bus() instead */
 struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
 		int bus, struct pci_ops *ops, void *sysdata)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ff280e08690f..174fe8aabdde 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -668,8 +668,6 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
 					     struct pci_ops *ops, void *sysdata,
 					     struct list_head *resources);
-struct pci_bus *pci_create_bus(struct device *parent, int bus,
-			       struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
 				int busnr);
 void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
-- 
cgit v1.2.3


From aecab53f45b84fbc7d6848957f9d83e1c3548b17 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Fri, 6 Jan 2012 13:32:41 +0100
Subject: PCI: pci_ids: add device ids for STA2X11 device (aka ConneXT)

The chip is an I/O hub used by some Atom boards.  Most of those
symbols are used in arch/x86/platform/sta2x11/sta2x11.c (to be
introduced) and in the specific drivers as well.

Signed-off-by: Alessandro Rubini <rubini@gnudd.com>
Acked-by: Giancarlo Asnaghi <giancarlo.asnaghi@st.com>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_ids.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2aaee0ca9da8..31d77af2ef42 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -776,6 +776,29 @@
 #define PCI_DEVICE_ID_ELSA_QS3000	0x3000
 
 #define PCI_VENDOR_ID_STMICRO		0x104A
+#define PCI_DEVICE_ID_STMICRO_USB_HOST	0xCC00
+#define PCI_DEVICE_ID_STMICRO_USB_OHCI	0xCC01
+#define PCI_DEVICE_ID_STMICRO_USB_OTG	0xCC02
+#define PCI_DEVICE_ID_STMICRO_UART_HWFC 0xCC03
+#define PCI_DEVICE_ID_STMICRO_UART_NO_HWFC	0xCC04
+#define PCI_DEVICE_ID_STMICRO_SOC_DMA	0xCC05
+#define PCI_DEVICE_ID_STMICRO_SATA	0xCC06
+#define PCI_DEVICE_ID_STMICRO_I2C	0xCC07
+#define PCI_DEVICE_ID_STMICRO_SPI_HS	0xCC08
+#define PCI_DEVICE_ID_STMICRO_MAC	0xCC09
+#define PCI_DEVICE_ID_STMICRO_SDIO_EMMC 0xCC0A
+#define PCI_DEVICE_ID_STMICRO_SDIO	0xCC0B
+#define PCI_DEVICE_ID_STMICRO_GPIO	0xCC0C
+#define PCI_DEVICE_ID_STMICRO_VIP	0xCC0D
+#define PCI_DEVICE_ID_STMICRO_AUDIO_ROUTER_DMA	0xCC0E
+#define PCI_DEVICE_ID_STMICRO_AUDIO_ROUTER_SRCS 0xCC0F
+#define PCI_DEVICE_ID_STMICRO_AUDIO_ROUTER_MSPS 0xCC10
+#define PCI_DEVICE_ID_STMICRO_CAN	0xCC11
+#define PCI_DEVICE_ID_STMICRO_MLB	0xCC12
+#define PCI_DEVICE_ID_STMICRO_DBP	0xCC13
+#define PCI_DEVICE_ID_STMICRO_SATA_PHY	0xCC14
+#define PCI_DEVICE_ID_STMICRO_ESRAM	0xCC15
+#define PCI_DEVICE_ID_STMICRO_VIC	0xCC16
 
 #define PCI_VENDOR_ID_BUSLOGIC		      0x104B
 #define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
-- 
cgit v1.2.3


From cda57bf9348fdbf4b8a830d6f9eb7da81df2f486 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 4 Jan 2012 15:49:45 -0800
Subject: PCI: DEVICE_COUNT_RESOURCE should be equal to PCI_NUM_RESOURCES

Save some bytes for device resource array.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 174fe8aabdde..5d06e340da9a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -111,7 +111,7 @@ enum {
 	PCI_NUM_RESOURCES,
 
 	/* preserve this for compatibility */
-	DEVICE_COUNT_RESOURCE
+	DEVICE_COUNT_RESOURCE = PCI_NUM_RESOURCES,
 };
 
 typedef int __bitwise pci_power_t;
-- 
cgit v1.2.3


From fb7ebfe4108e2cdfa2bb88e57148087717463dfa Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 4 Jan 2012 15:50:02 -0800
Subject: PCI: Increase resource array mask bit size in pcim_iomap_regions()

DEVICE_COUNT_RESOURCE will be bigger than 16 when SRIOV supported is enabled.

Let them pass with int just like pci_enable_resources().

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 6 +++---
 lib/devres.c        | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5d06e340da9a..a16b1df3deff 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1432,10 +1432,10 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass,
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
 void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
 void __iomem * const *pcim_iomap_table(struct pci_dev *pdev);
-int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name);
-int pcim_iomap_regions_request_all(struct pci_dev *pdev, u16 mask,
+int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name);
+int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
 				   const char *name);
-void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask);
+void pcim_iounmap_regions(struct pci_dev *pdev, int mask);
 
 extern int pci_pci_problems;
 #define PCIPCI_FAIL		1	/* No PCI PCI DMA */
diff --git a/lib/devres.c b/lib/devres.c
index 7c0e953a7486..2c851bd903bd 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -253,7 +253,7 @@ EXPORT_SYMBOL(pcim_iounmap);
  *
  * Request and iomap regions specified by @mask.
  */
-int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name)
+int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name)
 {
 	void __iomem * const *iomap;
 	int i, rc;
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(pcim_iomap_regions);
  *
  * Request all PCI BARs and iomap regions specified by @mask.
  */
-int pcim_iomap_regions_request_all(struct pci_dev *pdev, u16 mask,
+int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
 				   const char *name)
 {
 	int request_mask = ((1 << 6) - 1) & ~mask;
@@ -330,7 +330,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
  *
  * Unmap and release regions specified by @mask.
  */
-void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
+void pcim_iounmap_regions(struct pci_dev *pdev, int mask)
 {
 	void __iomem * const *iomap;
 	int i;
-- 
cgit v1.2.3


From cdcf116d44e78c7216ba9f8be9af1cdfca7af728 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Dec 2011 10:51:53 -0500
Subject: switch security_path_chmod() to struct path *

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c                |  2 +-
 include/linux/security.h | 10 +++-------
 security/apparmor/lsm.c  |  7 +++----
 security/capability.c    |  3 +--
 security/security.c      |  7 +++----
 security/tomoyo/tomoyo.c | 11 ++++-------
 6 files changed, 15 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index 2659f596f4c5..77becc041149 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -456,7 +456,7 @@ static int chmod_common(struct path *path, umode_t mode)
 	if (error)
 		return error;
 	mutex_lock(&inode->i_mutex);
-	error = security_path_chmod(path->dentry, path->mnt, mode);
+	error = security_path_chmod(path, mode);
 	if (error)
 		goto out_unlock;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
diff --git a/include/linux/security.h b/include/linux/security.h
index 535721cc374a..4298d2dbafa3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1435,8 +1435,7 @@ struct security_operations {
 			  struct dentry *new_dentry);
 	int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
 			    struct path *new_dir, struct dentry *new_dentry);
-	int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt,
-			   umode_t mode);
+	int (*path_chmod) (struct path *path, umode_t mode);
 	int (*path_chown) (struct path *path, uid_t uid, gid_t gid);
 	int (*path_chroot) (struct path *path);
 #endif
@@ -2866,8 +2865,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
 		       struct dentry *new_dentry);
 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
 			 struct path *new_dir, struct dentry *new_dentry);
-int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			umode_t mode);
+int security_path_chmod(struct path *path, umode_t mode);
 int security_path_chown(struct path *path, uid_t uid, gid_t gid);
 int security_path_chroot(struct path *path);
 #else	/* CONFIG_SECURITY_PATH */
@@ -2919,9 +2917,7 @@ static inline int security_path_rename(struct path *old_dir,
 	return 0;
 }
 
-static inline int security_path_chmod(struct dentry *dentry,
-				      struct vfsmount *mnt,
-				      umode_t mode)
+static inline int security_path_chmod(struct path *path, umode_t mode)
 {
 	return 0;
 }
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index c0a399ec1df9..2c0a0ff41399 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -344,13 +344,12 @@ static int apparmor_path_rename(struct path *old_dir, struct dentry *old_dentry,
 	return error;
 }
 
-static int apparmor_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			       umode_t mode)
+static int apparmor_path_chmod(struct path *path, umode_t mode)
 {
-	if (!mediated_filesystem(dentry->d_inode))
+	if (!mediated_filesystem(path->dentry->d_inode))
 		return 0;
 
-	return common_perm_mnt_dentry(OP_CHMOD, mnt, dentry, AA_MAY_CHMOD);
+	return common_perm_mnt_dentry(OP_CHMOD, path->mnt, path->dentry, AA_MAY_CHMOD);
 }
 
 static int apparmor_path_chown(struct path *path, uid_t uid, gid_t gid)
diff --git a/security/capability.c b/security/capability.c
index 156816d451ba..3b5883b7179f 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -279,8 +279,7 @@ static int cap_path_truncate(struct path *path)
 	return 0;
 }
 
-static int cap_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			  umode_t mode)
+static int cap_path_chmod(struct path *path, umode_t mode)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 151152de1a0a..214502c772ab 100644
--- a/security/security.c
+++ b/security/security.c
@@ -454,12 +454,11 @@ int security_path_truncate(struct path *path)
 	return security_ops->path_truncate(path);
 }
 
-int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			umode_t mode)
+int security_path_chmod(struct path *path, umode_t mode)
 {
-	if (unlikely(IS_PRIVATE(dentry->d_inode)))
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
 		return 0;
-	return security_ops->path_chmod(dentry, mnt, mode);
+	return security_ops->path_chmod(path, mode);
 }
 
 int security_path_chown(struct path *path, uid_t uid, gid_t gid)
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 75c956a51e75..620d37c159a3 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -353,17 +353,14 @@ static int tomoyo_file_ioctl(struct file *file, unsigned int cmd,
 /**
  * tomoyo_path_chmod - Target for security_path_chmod().
  *
- * @dentry: Pointer to "struct dentry".
- * @mnt:    Pointer to "struct vfsmount".
- * @mode:   DAC permission mode.
+ * @path: Pointer to "struct path".
+ * @mode: DAC permission mode.
  *
  * Returns 0 on success, negative value otherwise.
  */
-static int tomoyo_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
-			     umode_t mode)
+static int tomoyo_path_chmod(struct path *path, umode_t mode)
 {
-	struct path path = { mnt, dentry };
-	return tomoyo_path_number_perm(TOMOYO_TYPE_CHMOD, &path,
+	return tomoyo_path_number_perm(TOMOYO_TYPE_CHMOD, path,
 				       mode & S_IALLUGO);
 }
 
-- 
cgit v1.2.3


From 64132379d509184425672e0dce1ac0a031e3f2a5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Dec 2011 20:51:13 -0500
Subject: vfs: switch ->show_stats to struct dentry *

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/pohmelfs/inode.c |  4 ++--
 fs/cifs/cifsfs.c                 |  2 +-
 fs/nfs/super.c                   | 14 +++++++-------
 fs/proc_namespace.c              | 11 ++++++-----
 include/linux/fs.h               |  2 +-
 5 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 6c12516826ad..91ec29e112bc 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -1759,11 +1759,11 @@ err_out_exit:
 	return err;
 }
 
-static int pohmelfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+static int pohmelfs_show_stats(struct seq_file *m, struct dentry *root)
 {
 	struct netfs_state *st;
 	struct pohmelfs_ctl *ctl;
-	struct pohmelfs_sb *psb = POHMELFS_SB(mnt->mnt_sb);
+	struct pohmelfs_sb *psb = POHMELFS_SB(root->d_sb);
 	struct pohmelfs_config *c;
 
 	mutex_lock(&psb->state_lock);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5bb961c13c4d..0cb89dc6526c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -488,7 +488,7 @@ static void cifs_umount_begin(struct super_block *sb)
 }
 
 #ifdef CONFIG_CIFS_STATS2
-static int cifs_show_stats(struct seq_file *s, struct vfsmount *mnt)
+static int cifs_show_stats(struct seq_file *s, struct dentry *root)
 {
 	/* BB FIXME */
 	return 0;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0e6dd56a9f1e..dd74d3bc2eaa 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -265,7 +265,7 @@ static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_devname(struct seq_file *, struct vfsmount *);
 static int  nfs_show_path(struct seq_file *, struct vfsmount *);
-static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
+static int  nfs_show_stats(struct seq_file *, struct dentry *);
 static struct dentry *nfs_fs_mount(struct file_system_type *,
 		int, const char *, void *);
 static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -785,10 +785,10 @@ static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
 /*
  * Present statistical information for this VFS mountpoint
  */
-static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+static int nfs_show_stats(struct seq_file *m, struct dentry *root)
 {
 	int i, cpu;
-	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+	struct nfs_server *nfss = NFS_SB(root->d_sb);
 	struct rpc_auth *auth = nfss->client->cl_auth;
 	struct nfs_iostats totals = { };
 
@@ -798,10 +798,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	 * Display all mount option settings
 	 */
 	seq_printf(m, "\n\topts:\t");
-	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
-	seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
-	seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
-	seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+	seq_puts(m, root->d_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+	seq_puts(m, root->d_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+	seq_puts(m, root->d_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+	seq_puts(m, root->d_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
 	nfs_show_mount_options(m, nfss, 1);
 
 	seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 9dcd9543ca12..61a09a6364ba 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -183,12 +183,13 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
 {
 	struct mount *r = real_mount(mnt);
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+	struct super_block *sb = mnt_path.dentry->d_sb;
 	int err = 0;
 
 	/* device */
-	if (mnt->mnt_sb->s_op->show_devname) {
+	if (sb->s_op->show_devname) {
 		seq_puts(m, "device ");
-		err = mnt->mnt_sb->s_op->show_devname(m, mnt);
+		err = sb->s_op->show_devname(m, mnt);
 	} else {
 		if (r->mnt_devname) {
 			seq_puts(m, "device ");
@@ -204,13 +205,13 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
 
 	/* file system type */
 	seq_puts(m, "with fstype ");
-	show_type(m, mnt->mnt_sb);
+	show_type(m, sb);
 
 	/* optional statistics */
-	if (mnt->mnt_sb->s_op->show_stats) {
+	if (sb->s_op->show_stats) {
 		seq_putc(m, ' ');
 		if (!err)
-			err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+			err = sb->s_op->show_stats(m, mnt_path.dentry);
 	}
 
 	seq_putc(m, '\n');
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 659be7d82617..b2e4b6f639e4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1675,7 +1675,7 @@ struct super_operations {
 	int (*show_options)(struct seq_file *, struct vfsmount *);
 	int (*show_devname)(struct seq_file *, struct vfsmount *);
 	int (*show_path)(struct seq_file *, struct vfsmount *);
-	int (*show_stats)(struct seq_file *, struct vfsmount *);
+	int (*show_stats)(struct seq_file *, struct dentry *);
 #ifdef CONFIG_QUOTA
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
-- 
cgit v1.2.3


From d861c630e99febe5ce6055290085556c5b714b06 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Dec 2011 21:32:45 -0500
Subject: vfs: switch ->show_devname() to struct dentry *

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/super.c      |  6 +++---
 fs/proc_namespace.c | 17 +++++++++--------
 include/linux/fs.h  |  2 +-
 3 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index dd74d3bc2eaa..6e6faa17bd38 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -263,7 +263,7 @@ static match_table_t nfs_local_lock_tokens = {
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
-static int  nfs_show_devname(struct seq_file *, struct vfsmount *);
+static int  nfs_show_devname(struct seq_file *, struct dentry *);
 static int  nfs_show_path(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct dentry *);
 static struct dentry *nfs_fs_mount(struct file_system_type *,
@@ -760,14 +760,14 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
 #endif
 #endif
 
-static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
+static int nfs_show_devname(struct seq_file *m, struct dentry *root)
 {
 	char *page = (char *) __get_free_page(GFP_KERNEL);
 	char *devname, *dummy;
 	int err = 0;
 	if (!page)
 		return -ENOMEM;
-	devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE);
+	devname = nfs_path(&dummy, root, page, PAGE_SIZE);
 	if (IS_ERR(devname))
 		err = PTR_ERR(devname);
 	else
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 61a09a6364ba..6d4583ddbeda 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -94,9 +94,10 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
 	struct mount *r = real_mount(mnt);
 	int err = 0;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+	struct super_block *sb = mnt_path.dentry->d_sb;
 
-	if (mnt->mnt_sb->s_op->show_devname) {
-		err = mnt->mnt_sb->s_op->show_devname(m, mnt);
+	if (sb->s_op->show_devname) {
+		err = sb->s_op->show_devname(m, mnt_path.dentry);
 		if (err)
 			goto out;
 	} else {
@@ -105,14 +106,14 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
 	seq_putc(m, ' ');
 	seq_path(m, &mnt_path, " \t\n\\");
 	seq_putc(m, ' ');
-	show_type(m, mnt->mnt_sb);
+	show_type(m, sb);
 	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
-	err = show_sb_opts(m, mnt->mnt_sb);
+	err = show_sb_opts(m, sb);
 	if (err)
 		goto out;
 	show_mnt_opts(m, mnt);
-	if (mnt->mnt_sb->s_op->show_options)
-		err = mnt->mnt_sb->s_op->show_options(m, mnt);
+	if (sb->s_op->show_options)
+		err = sb->s_op->show_options(m, mnt);
 	seq_puts(m, " 0 0\n");
 out:
 	return err;
@@ -163,7 +164,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
 	show_type(m, sb);
 	seq_putc(m, ' ');
 	if (sb->s_op->show_devname)
-		err = sb->s_op->show_devname(m, mnt);
+		err = sb->s_op->show_devname(m, mnt->mnt_root);
 	else
 		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
 	if (err)
@@ -189,7 +190,7 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
 	/* device */
 	if (sb->s_op->show_devname) {
 		seq_puts(m, "device ");
-		err = sb->s_op->show_devname(m, mnt);
+		err = sb->s_op->show_devname(m, mnt_path.dentry);
 	} else {
 		if (r->mnt_devname) {
 			seq_puts(m, "device ");
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b2e4b6f639e4..a8dff43d1b9d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1673,7 +1673,7 @@ struct super_operations {
 	void (*umount_begin) (struct super_block *);
 
 	int (*show_options)(struct seq_file *, struct vfsmount *);
-	int (*show_devname)(struct seq_file *, struct vfsmount *);
+	int (*show_devname)(struct seq_file *, struct dentry *);
 	int (*show_path)(struct seq_file *, struct vfsmount *);
 	int (*show_stats)(struct seq_file *, struct dentry *);
 #ifdef CONFIG_QUOTA
-- 
cgit v1.2.3


From a6322de67b58a00e3a783ad9c87c2a11b2d67b47 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Dec 2011 21:37:57 -0500
Subject: vfs: switch ->show_path() to struct dentry *

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/super.c      | 4 ++--
 fs/proc_namespace.c | 2 +-
 include/linux/fs.h  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 6e6faa17bd38..02c693c77ab7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -264,7 +264,7 @@ static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_devname(struct seq_file *, struct dentry *);
-static int  nfs_show_path(struct seq_file *, struct vfsmount *);
+static int  nfs_show_path(struct seq_file *, struct dentry *);
 static int  nfs_show_stats(struct seq_file *, struct dentry *);
 static struct dentry *nfs_fs_mount(struct file_system_type *,
 		int, const char *, void *);
@@ -776,7 +776,7 @@ static int nfs_show_devname(struct seq_file *m, struct dentry *root)
 	return err;
 }
 
-static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
+static int nfs_show_path(struct seq_file *m, struct dentry *dentry)
 {
 	seq_puts(m, "/");
 	return 0;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 6d4583ddbeda..8f8304b3f98a 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -131,7 +131,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
 		   MAJOR(sb->s_dev), MINOR(sb->s_dev));
 	if (sb->s_op->show_path)
-		err = sb->s_op->show_path(m, mnt);
+		err = sb->s_op->show_path(m, mnt->mnt_root);
 	else
 		seq_dentry(m, mnt->mnt_root, " \t\n\\");
 	if (err)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a8dff43d1b9d..13721b073407 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1674,7 +1674,7 @@ struct super_operations {
 
 	int (*show_options)(struct seq_file *, struct vfsmount *);
 	int (*show_devname)(struct seq_file *, struct dentry *);
-	int (*show_path)(struct seq_file *, struct vfsmount *);
+	int (*show_path)(struct seq_file *, struct dentry *);
 	int (*show_stats)(struct seq_file *, struct dentry *);
 #ifdef CONFIG_QUOTA
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
-- 
cgit v1.2.3


From 34c80b1d93e6e20ca9dea0baf583a5b5510d92d4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Dec 2011 21:32:45 -0500
Subject: vfs: switch ->show_options() to struct dentry *

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  2 +-
 arch/s390/hypfs/inode.c           |  4 ++--
 drivers/staging/pohmelfs/inode.c  |  4 ++--
 drivers/usb/core/inode.c          |  2 +-
 fs/adfs/super.c                   |  4 ++--
 fs/autofs4/inode.c                |  6 +++---
 fs/btrfs/super.c                  |  4 ++--
 fs/ceph/super.c                   |  6 +++---
 fs/cifs/cifsfs.c                  |  6 +++---
 fs/devpts/inode.c                 |  4 ++--
 fs/ecryptfs/super.c               |  4 ++--
 fs/ext2/super.c                   |  4 ++--
 fs/ext3/super.c                   |  4 ++--
 fs/ext4/super.c                   |  4 ++--
 fs/fat/inode.c                    |  6 +++---
 fs/fuse/inode.c                   | 10 +++++-----
 fs/gfs2/super.c                   |  8 ++++----
 fs/hfs/super.c                    |  4 ++--
 fs/hfsplus/hfsplus_fs.h           |  2 +-
 fs/hfsplus/options.c              |  4 ++--
 fs/hostfs/hostfs_kern.c           |  4 ++--
 fs/jffs2/super.c                  |  4 ++--
 fs/jfs/super.c                    |  4 ++--
 fs/namespace.c                    |  4 ++--
 fs/ncpfs/inode.c                  |  6 +++---
 fs/nfs/super.c                    |  6 +++---
 fs/nilfs2/super.c                 |  6 +++---
 fs/ntfs/inode.c                   |  8 ++++----
 fs/ntfs/inode.h                   |  2 +-
 fs/ocfs2/super.c                  |  9 ++++-----
 fs/proc_namespace.c               |  4 ++--
 fs/ubifs/super.c                  |  4 ++--
 fs/udf/super.c                    |  6 +++---
 fs/ufs/super.c                    |  4 ++--
 fs/xfs/xfs_super.c                |  4 ++--
 include/linux/fs.h                |  4 ++--
 kernel/cgroup.c                   |  4 ++--
 mm/shmem.c                        |  4 ++--
 39 files changed, 90 insertions(+), 91 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 9e9f30b9f46b..4fca82e5276e 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -117,7 +117,7 @@ prototypes:
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
 	void (*umount_begin) (struct super_block *);
-	int (*show_options)(struct seq_file *, struct vfsmount *);
+	int (*show_options)(struct seq_file *, struct dentry *);
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 4b9f0d092a79..3d9393b845b8 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -225,7 +225,7 @@ struct super_operations {
         void (*clear_inode) (struct inode *);
         void (*umount_begin) (struct super_block *);
 
-        int (*show_options)(struct seq_file *, struct vfsmount *);
+        int (*show_options)(struct seq_file *, struct dentry *);
 
         ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
         ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 98efd2d6207a..8a2a887478cc 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -259,9 +259,9 @@ static int hypfs_parse_options(char *options, struct super_block *sb)
 	return 0;
 }
 
-static int hypfs_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int hypfs_show_options(struct seq_file *s, struct dentry *root)
 {
-	struct hypfs_sb_info *hypfs_info = mnt->mnt_sb->s_fs_info;
+	struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info;
 
 	seq_printf(s, ",uid=%u", hypfs_info->uid);
 	seq_printf(s, ",gid=%u", hypfs_info->gid);
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 91ec29e112bc..807e3f324113 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -1369,9 +1369,9 @@ static int pohmelfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int pohmelfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int pohmelfs_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct pohmelfs_sb *psb = POHMELFS_SB(vfs->mnt_sb);
+	struct pohmelfs_sb *psb = POHMELFS_SB(root->d_sb);
 
 	seq_printf(seq, ",idx=%u", psb->idx);
 	seq_printf(seq, ",trans_scan_timeout=%u", jiffies_to_msecs(psb->trans_scan_timeout));
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 2b60af2ce3ba..9e186f3da839 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -65,7 +65,7 @@ static umode_t devmode = USBFS_DEFAULT_DEVMODE;
 static umode_t busmode = USBFS_DEFAULT_BUSMODE;
 static umode_t listmode = USBFS_DEFAULT_LISTMODE;
 
-static int usbfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int usbfs_show_options(struct seq_file *seq, struct dentry *root)
 {
 	if (devuid != 0)
 		seq_printf(seq, ",devuid=%u", devuid);
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index c8bf36a1996a..8e3b36ace305 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -126,9 +126,9 @@ static void adfs_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int adfs_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct adfs_sb_info *asb = ADFS_SB(mnt->mnt_sb);
+	struct adfs_sb_info *asb = ADFS_SB(root->d_sb);
 
 	if (asb->s_uid != 0)
 		seq_printf(seq, ",uid=%u", asb->s_uid);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index f799efad52a8..2ba44c79d548 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -70,10 +70,10 @@ out_kill_sb:
 	kill_litter_super(sb);
 }
 
-static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int autofs4_show_options(struct seq_file *m, struct dentry *root)
 {
-	struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb);
-	struct inode *root_inode = mnt->mnt_sb->s_root->d_inode;
+	struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
+	struct inode *root_inode = root->d_sb->s_root->d_inode;
 
 	if (!sbi)
 		return 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index dc62d3cc68fd..ae488aa1966a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -661,9 +661,9 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 	return ret;
 }
 
-static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 {
-	struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
+	struct btrfs_root *root = btrfs_sb(dentry->d_sb);
 	struct btrfs_fs_info *info = root->fs_info;
 	char *compress_type;
 
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b48f15f101a0..11bd0fc4853f 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -341,11 +341,11 @@ out:
 /**
  * ceph_show_options - Show mount options in /proc/mounts
  * @m: seq_file to write to
- * @mnt: mount descriptor
+ * @root: root of that (sub)tree
  */
-static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int ceph_show_options(struct seq_file *m, struct dentry *root)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
 	struct ceph_mount_options *fsopt = fsc->mount_options;
 	struct ceph_options *opt = fsc->client->options;
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0cb89dc6526c..b1fd382d1952 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -343,9 +343,9 @@ cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
  * ones are.
  */
 static int
-cifs_show_options(struct seq_file *s, struct vfsmount *m)
+cifs_show_options(struct seq_file *s, struct dentry *root)
 {
-	struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
 	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
 	struct sockaddr *srcaddr;
 	srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
@@ -430,7 +430,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 		seq_printf(s, ",cifsacl");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
 		seq_printf(s, ",dynperm");
-	if (m->mnt_sb->s_flags & MS_POSIXACL)
+	if (root->d_sb->s_flags & MS_POSIXACL)
 		seq_printf(s, ",acl");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
 		seq_printf(s, ",mfsymlinks");
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index d5d5297efe97..79673eb71151 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -246,9 +246,9 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
 	return err;
 }
 
-static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int devpts_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb);
+	struct pts_fs_info *fsi = DEVPTS_SB(root->d_sb);
 	struct pts_mount_opts *opts = &fsi->mount_opts;
 
 	if (opts->setuid)
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index da485f0b4d1e..9df7fd6e0c39 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -131,9 +131,9 @@ static void ecryptfs_evict_inode(struct inode *inode)
  * Prints the mount options for a given superblock.
  * Returns zero; does not fail.
  */
-static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
 {
-	struct super_block *sb = mnt->mnt_sb;
+	struct super_block *sb = root->d_sb;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
 		&ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
 	struct ecryptfs_global_auth_tok *walker;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 67b5e752ec9d..9b403f064ce0 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -210,9 +210,9 @@ static void destroy_inodecache(void)
 	kmem_cache_destroy(ext2_inode_cachep);
 }
 
-static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ext2_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct super_block *sb = vfs->mnt_sb;
+	struct super_block *sb = root->d_sb;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 	struct ext2_super_block *es = sbi->s_es;
 	unsigned long def_mount_opts;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7e8944ee67c6..3a10b884e1be 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -610,9 +610,9 @@ static char *data_mode_string(unsigned long mode)
  *  - it's set to a non-default value OR
  *  - if the per-sb default is different from the global default
  */
-static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ext3_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct super_block *sb = vfs->mnt_sb;
+	struct super_block *sb = root->d_sb;
 	struct ext3_sb_info *sbi = EXT3_SB(sb);
 	struct ext3_super_block *es = sbi->s_es;
 	unsigned long def_mount_opts;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b739b210a616..6733b3736b3b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1032,11 +1032,11 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
  *  - it's set to a non-default value OR
  *  - if the per-sb default is different from the global default
  */
-static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ext4_show_options(struct seq_file *seq, struct dentry *root)
 {
 	int def_errors;
 	unsigned long def_mount_opts;
-	struct super_block *sb = vfs->mnt_sb;
+	struct super_block *sb = root->d_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_super_block *es = sbi->s_es;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ef44e5f98ced..7873797cc76a 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -671,7 +671,7 @@ int fat_sync_inode(struct inode *inode)
 
 EXPORT_SYMBOL_GPL(fat_sync_inode);
 
-static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
+static int fat_show_options(struct seq_file *m, struct dentry *root);
 static const struct super_operations fat_sops = {
 	.alloc_inode	= fat_alloc_inode,
 	.destroy_inode	= fat_destroy_inode,
@@ -810,9 +810,9 @@ static const struct export_operations fat_export_ops = {
 	.get_parent	= fat_get_parent,
 };
 
-static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int fat_show_options(struct seq_file *m, struct dentry *root)
 {
-	struct msdos_sb_info *sbi = MSDOS_SB(mnt->mnt_sb);
+	struct msdos_sb_info *sbi = MSDOS_SB(root->d_sb);
 	struct fat_mount_options *opts = &sbi->options;
 	int isvfat = opts->isvfat;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3d3622a1ceac..64cf8d07393e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -497,9 +497,10 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 	return 1;
 }
 
-static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int fuse_show_options(struct seq_file *m, struct dentry *root)
 {
-	struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
+	struct super_block *sb = root->d_sb;
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
 	seq_printf(m, ",user_id=%u", fc->user_id);
 	seq_printf(m, ",group_id=%u", fc->group_id);
@@ -509,9 +510,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 		seq_puts(m, ",allow_other");
 	if (fc->max_read != ~0)
 		seq_printf(m, ",max_read=%u", fc->max_read);
-	if (mnt->mnt_sb->s_bdev &&
-	    mnt->mnt_sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
-		seq_printf(m, ",blksize=%lu", mnt->mnt_sb->s_blocksize);
+	if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
+		seq_printf(m, ",blksize=%lu", sb->s_blocksize);
 	return 0;
 }
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9e89d94be003..10c7733a899b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1284,18 +1284,18 @@ static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
 /**
  * gfs2_show_options - Show mount options for /proc/mounts
  * @s: seq_file structure
- * @mnt: vfsmount
+ * @root: root of this (sub)tree
  *
  * Returns: 0 on success or error code
  */
 
-static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int gfs2_show_options(struct seq_file *s, struct dentry *root)
 {
-	struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
+	struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
 	struct gfs2_args *args = &sdp->sd_args;
 	int val;
 
-	if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
+	if (is_ancestor(root, sdp->sd_master_dir))
 		seq_printf(s, ",meta");
 	if (args->ar_lockproto[0])
 		seq_printf(s, ",lockproto=%s", args->ar_lockproto);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 32dc2fbb26d5..8137fb3e6780 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -133,9 +133,9 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
-static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int hfs_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb);
+	struct hfs_sb_info *sbi = HFS_SB(root->d_sb);
 
 	if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f))
 		seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 3a6c025414e2..21a5b7fc6db4 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -419,7 +419,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
 int hfsplus_parse_options_remount(char *input, int *force);
 void hfsplus_fill_defaults(struct hfsplus_sb_info *);
-int hfsplus_show_options(struct seq_file *, struct vfsmount *);
+int hfsplus_show_options(struct seq_file *, struct dentry *);
 
 /* super.c */
 struct inode *hfsplus_iget(struct super_block *, unsigned long);
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index bb62a5882147..06fa5618600c 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -206,9 +206,9 @@ done:
 	return 1;
 }
 
-int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
+int hfsplus_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb);
+	struct hfsplus_sb_info *sbi = HFSPLUS_SB(root->d_sb);
 
 	if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
 		seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index a7340e710a90..e130bd46d671 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -258,9 +258,9 @@ static void hostfs_destroy_inode(struct inode *inode)
 	call_rcu(&inode->i_rcu, hostfs_i_callback);
 }
 
-static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
 {
-	const char *root_path = vfs->mnt_sb->s_fs_info;
+	const char *root_path = root->d_sb->s_fs_info;
 	size_t offset = strlen(root_ino) + 1;
 
 	if (strlen(root_path) > offset)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 804e1292d63e..8be4925296cf 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -96,9 +96,9 @@ static const char *jffs2_compr_name(unsigned int compr)
 	}
 }
 
-static int jffs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int jffs2_show_options(struct seq_file *s, struct dentry *root)
 {
-	struct jffs2_sb_info *c = JFFS2_SB_INFO(mnt->mnt_sb);
+	struct jffs2_sb_info *c = JFFS2_SB_INFO(root->d_sb);
 	struct jffs2_mount_opts *opts = &c->mount_opts;
 
 	if (opts->override_compr)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1b8f4ca29adf..682bca642f38 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -608,9 +608,9 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
 	return 0;
 }
 
-static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int jfs_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb);
+	struct jfs_sb_info *sbi = JFS_SBI(root->d_sb);
 
 	if (sbi->uid != -1)
 		seq_printf(seq, ",uid=%d", sbi->uid);
diff --git a/fs/namespace.c b/fs/namespace.c
index 773435ca300d..db65e2e4921f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -836,12 +836,12 @@ static inline void mangle(struct seq_file *m, const char *s)
  *
  * See also save_mount_options().
  */
-int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
+int generic_show_options(struct seq_file *m, struct dentry *root)
 {
 	const char *options;
 
 	rcu_read_lock();
-	options = rcu_dereference(mnt->mnt_sb->s_options);
+	options = rcu_dereference(root->d_sb->s_options);
 
 	if (options != NULL && options[0]) {
 		seq_putc(m, ',');
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index f3f07cd392b3..3d1e34f8a68e 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -44,7 +44,7 @@
 static void ncp_evict_inode(struct inode *);
 static void ncp_put_super(struct super_block *);
 static int  ncp_statfs(struct dentry *, struct kstatfs *);
-static int  ncp_show_options(struct seq_file *, struct vfsmount *);
+static int  ncp_show_options(struct seq_file *, struct dentry *);
 
 static struct kmem_cache * ncp_inode_cachep;
 
@@ -322,9 +322,9 @@ static void ncp_stop_tasks(struct ncp_server *server) {
 		flush_work_sync(&server->timeout_tq);
 }
 
-static int  ncp_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int  ncp_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct ncp_server *server = NCP_SBP(mnt->mnt_sb);
+	struct ncp_server *server = NCP_SBP(root->d_sb);
 	unsigned int tmp;
 
 	if (server->m.uid != 0)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 02c693c77ab7..e463967aafb8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -262,7 +262,7 @@ static match_table_t nfs_local_lock_tokens = {
 
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
-static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static int  nfs_show_options(struct seq_file *, struct dentry *);
 static int  nfs_show_devname(struct seq_file *, struct dentry *);
 static int  nfs_show_path(struct seq_file *, struct dentry *);
 static int  nfs_show_stats(struct seq_file *, struct dentry *);
@@ -720,9 +720,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 /*
  * Describe the mount options on this VFS mountpoint
  */
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int nfs_show_options(struct seq_file *m, struct dentry *root)
 {
-	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+	struct nfs_server *nfss = NFS_SB(root->d_sb);
 
 	nfs_show_mount_options(m, nfss, 0);
 
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5356c7169d50..08e3d4f9df18 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -648,11 +648,11 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
 {
-	struct super_block *sb = vfs->mnt_sb;
+	struct super_block *sb = dentry->d_sb;
 	struct the_nilfs *nilfs = sb->s_fs_info;
-	struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root;
+	struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
 
 	if (!nilfs_test_opt(nilfs, BARRIER))
 		seq_puts(seq, ",nobarrier");
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index fea40bb6fb68..2eaa66652944 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2300,16 +2300,16 @@ void ntfs_evict_big_inode(struct inode *vi)
 /**
  * ntfs_show_options - show mount options in /proc/mounts
  * @sf:		seq_file in which to write our mount options
- * @mnt:	vfs mount whose mount options to display
+ * @root:	root of the mounted tree whose mount options to display
  *
  * Called by the VFS once for each mounted ntfs volume when someone reads
  * /proc/mounts in order to display the NTFS specific mount options of each
- * mount. The mount options of the vfs mount @mnt are written to the seq file
+ * mount. The mount options of fs specified by @root are written to the seq file
  * @sf and success is returned.
  */
-int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
+int ntfs_show_options(struct seq_file *sf, struct dentry *root)
 {
-	ntfs_volume *vol = NTFS_SB(mnt->mnt_sb);
+	ntfs_volume *vol = NTFS_SB(root->d_sb);
 	int i;
 
 	seq_printf(sf, ",uid=%i", vol->uid);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index fe8e7e928889..db29695f845c 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -298,7 +298,7 @@ extern void ntfs_clear_extent_inode(ntfs_inode *ni);
 
 extern int ntfs_read_inode_mount(struct inode *vi);
 
-extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt);
+extern int ntfs_show_options(struct seq_file *sf, struct dentry *root);
 
 #ifdef NTFS_RW
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c05ff25c356c..604e12c4e979 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -108,7 +108,7 @@ static int ocfs2_parse_options(struct super_block *sb, char *options,
 			       int is_remount);
 static int ocfs2_check_set_options(struct super_block *sb,
 				   struct mount_options *options);
-static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
+static int ocfs2_show_options(struct seq_file *s, struct dentry *root);
 static void ocfs2_put_super(struct super_block *sb);
 static int ocfs2_mount_volume(struct super_block *sb);
 static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
@@ -1533,9 +1533,9 @@ bail:
 	return status;
 }
 
-static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
 {
-	struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb);
+	struct ocfs2_super *osb = OCFS2_SB(root->d_sb);
 	unsigned long opts = osb->s_mount_opt;
 	unsigned int local_alloc_megs;
 
@@ -1567,8 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 	if (osb->preferred_slot != OCFS2_INVALID_SLOT)
 		seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
 
-	if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME))
-		seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
+	seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
 
 	if (osb->osb_commit_interval)
 		seq_printf(s, ",commit=%u",
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 8f8304b3f98a..12412852d88a 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -113,7 +113,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
 		goto out;
 	show_mnt_opts(m, mnt);
 	if (sb->s_op->show_options)
-		err = sb->s_op->show_options(m, mnt);
+		err = sb->s_op->show_options(m, mnt_path.dentry);
 	seq_puts(m, " 0 0\n");
 out:
 	return err;
@@ -174,7 +174,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
 	if (err)
 		goto out;
 	if (sb->s_op->show_options)
-		err = sb->s_op->show_options(m, mnt);
+		err = sb->s_op->show_options(m, mnt->mnt_root);
 	seq_putc(m, '\n');
 out:
 	return err;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index d93a3fadf53c..63765d58445b 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -419,9 +419,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int ubifs_show_options(struct seq_file *s, struct dentry *root)
 {
-	struct ubifs_info *c = mnt->mnt_sb->s_fs_info;
+	struct ubifs_info *c = root->d_sb->s_fs_info;
 
 	if (c->mount_opts.unmount_mode == 2)
 		seq_printf(s, ",fast_unmount");
diff --git a/fs/udf/super.c b/fs/udf/super.c
index c94fc889a486..0c33225647a0 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -89,7 +89,7 @@ static void udf_open_lvid(struct super_block *);
 static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
 static int udf_statfs(struct dentry *, struct kstatfs *);
-static int udf_show_options(struct seq_file *, struct vfsmount *);
+static int udf_show_options(struct seq_file *, struct dentry *);
 
 struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
 {
@@ -249,9 +249,9 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
 	return 0;
 }
 
-static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int udf_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct super_block *sb = mnt->mnt_sb;
+	struct super_block *sb = root->d_sb;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 
 	if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT))
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index d6961eb5b774..5246ee3e5607 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1351,9 +1351,9 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	return 0;
 }
 
-static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ufs_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
+	struct ufs_sb_info *sbi = UFS_SB(root->d_sb);
 	unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
 	const struct match_token *tp = tokens;
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8a899496fd5f..7b7669507ee3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1238,9 +1238,9 @@ xfs_fs_unfreeze(
 STATIC int
 xfs_fs_show_options(
 	struct seq_file		*m,
-	struct vfsmount		*mnt)
+	struct dentry		*root)
 {
-	return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
+	return -xfs_showargs(XFS_M(root->d_sb), m);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 13721b073407..cc1021fd19ef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1672,7 +1672,7 @@ struct super_operations {
 	int (*remount_fs) (struct super_block *, int *, char *);
 	void (*umount_begin) (struct super_block *);
 
-	int (*show_options)(struct seq_file *, struct vfsmount *);
+	int (*show_options)(struct seq_file *, struct dentry *);
 	int (*show_devname)(struct seq_file *, struct dentry *);
 	int (*show_path)(struct seq_file *, struct dentry *);
 	int (*show_stats)(struct seq_file *, struct dentry *);
@@ -2592,7 +2592,7 @@ extern void setattr_copy(struct inode *inode, const struct iattr *attr);
 
 extern void file_update_time(struct file *file);
 
-extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt);
+extern int generic_show_options(struct seq_file *m, struct dentry *root);
 extern void save_mount_options(struct super_block *sb, char *options);
 extern void replace_mount_options(struct super_block *sb, char *options);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86ebacfd9431..7cab65f83f1d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1038,9 +1038,9 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	return 0;
 }
 
-static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 {
-	struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
+	struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
 	struct cgroup_subsys *ss;
 
 	mutex_lock(&cgroup_mutex);
diff --git a/mm/shmem.c b/mm/shmem.c
index 86a19efc36fb..feead1943d92 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2118,9 +2118,9 @@ out:
 	return error;
 }
 
-static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct shmem_sb_info *sbinfo = SHMEM_SB(vfs->mnt_sb);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
 
 	if (sbinfo->max_blocks != shmem_default_max_blocks())
 		seq_printf(seq, ",size=%luk",
-- 
cgit v1.2.3


From 39f7c4db1d2d9e2e2a90abdf34811783089d217d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 Nov 2011 12:11:30 +0100
Subject: vfs: keep list of mounts for each superblock

Keep track of vfsmounts belonging to a superblock.  List is protected
by vfsmount_lock.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h         | 1 +
 fs/namespace.c     | 7 +++++++
 fs/super.c         | 2 ++
 include/linux/fs.h | 1 +
 4 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 0921b51e27e2..4ef36d93e5a2 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -29,6 +29,7 @@ struct mount {
 #endif
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
+	struct list_head mnt_instance;	/* mount instance on sb->s_mounts */
 	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
 	struct list_head mnt_expire;	/* link in fs-specific expiry list */
diff --git a/fs/namespace.c b/fs/namespace.c
index db65e2e4921f..145217b088d1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -671,6 +671,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	mnt->mnt.mnt_sb = root->d_sb;
 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 	mnt->mnt_parent = mnt;
+	br_write_lock(vfsmount_lock);
+	list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
+	br_write_unlock(vfsmount_lock);
 	return &mnt->mnt;
 }
 EXPORT_SYMBOL_GPL(vfs_kern_mount);
@@ -699,6 +702,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 		mnt->mnt.mnt_root = dget(root);
 		mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 		mnt->mnt_parent = mnt;
+		br_write_lock(vfsmount_lock);
+		list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
+		br_write_unlock(vfsmount_lock);
 
 		if (flag & CL_SLAVE) {
 			list_add(&mnt->mnt_slave, &old->mnt_slave_list);
@@ -781,6 +787,7 @@ put_again:
 		acct_auto_close_mnt(&mnt->mnt);
 		goto put_again;
 	}
+	list_del(&mnt->mnt_instance);
 	br_write_unlock(vfsmount_lock);
 	mntfree(mnt);
 }
diff --git a/fs/super.c b/fs/super.c
index 0413f51a9f0f..993ca8f128d6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -142,6 +142,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_LIST_HEAD(&s->s_dentry_lru);
 		INIT_LIST_HEAD(&s->s_inode_lru);
 		spin_lock_init(&s->s_inode_lru_lock);
+		INIT_LIST_HEAD(&s->s_mounts);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
 		lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -200,6 +201,7 @@ static inline void destroy_super(struct super_block *s)
 	free_percpu(s->s_files);
 #endif
 	security_sb_free(s);
+	WARN_ON(!list_empty(&s->s_mounts));
 	kfree(s->s_subtype);
 	kfree(s->s_options);
 	kfree(s);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc1021fd19ef..03385acd71e8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1428,6 +1428,7 @@ struct super_block {
 #else
 	struct list_head	s_files;
 #endif
+	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
 	/* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
 	struct list_head	s_dentry_lru;	/* unused dentry lru */
 	int			s_nr_dentry_unused;	/* # of dentry on lru */
-- 
cgit v1.2.3


From 4ed5e82fe77f4147cf386327c9a63a2dd7eff518 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 Nov 2011 12:11:31 +0100
Subject: vfs: protect remounting superblock read-only

Currently remouting superblock read-only is racy in a major way.

With the per mount read-only infrastructure it is now possible to
prevent most races, which this patch attempts.

Before starting the remount read-only, iterate through all mounts
belonging to the superblock and if none of them have any pending
writes, set sb->s_readonly_remount.  This indicates that remount is in
progress and no further write requests are allowed.  If the remount
succeeds set MS_RDONLY and reset s_readonly_remount.

If the remounting is unsuccessful just reset s_readonly_remount.
This can result in transient EROFS errors, despite the fact the
remount failed.  Unfortunately hodling off writes is difficult as
remount itself may touch the filesystem (e.g. through load_nls())
which would deadlock.

A later patch deals with delayed writes due to nlink going to zero.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h      |  1 +
 fs/namespace.c     | 40 +++++++++++++++++++++++++++++++++++++++-
 fs/super.c         | 22 ++++++++++++++++++----
 include/linux/fs.h |  3 +++
 4 files changed, 61 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index 2523a4029452..9962c59ba280 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -52,6 +52,7 @@ extern int finish_automount(struct vfsmount *, struct path *);
 
 extern void mnt_make_longterm(struct vfsmount *);
 extern void mnt_make_shortterm(struct vfsmount *);
+extern int sb_prepare_remount_readonly(struct super_block *);
 
 extern void __init mnt_init(void);
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 145217b088d1..98ebc78b21ab 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -273,6 +273,15 @@ static unsigned int mnt_get_writers(struct mount *mnt)
 #endif
 }
 
+static int mnt_is_readonly(struct vfsmount *mnt)
+{
+	if (mnt->mnt_sb->s_readonly_remount)
+		return 1;
+	/* Order wrt setting s_flags/s_readonly_remount in do_remount() */
+	smp_rmb();
+	return __mnt_is_readonly(mnt);
+}
+
 /*
  * Most r/o checks on a fs are for operations that take
  * discrete amounts of time, like a write() or unlink().
@@ -312,7 +321,7 @@ int mnt_want_write(struct vfsmount *m)
 	 * MNT_WRITE_HOLD is cleared.
 	 */
 	smp_rmb();
-	if (__mnt_is_readonly(m)) {
+	if (mnt_is_readonly(m)) {
 		mnt_dec_writers(mnt);
 		ret = -EROFS;
 		goto out;
@@ -435,6 +444,35 @@ static void __mnt_unmake_readonly(struct mount *mnt)
 	br_write_unlock(vfsmount_lock);
 }
 
+int sb_prepare_remount_readonly(struct super_block *sb)
+{
+	struct mount *mnt;
+	int err = 0;
+
+	br_write_lock(vfsmount_lock);
+	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
+		if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
+			mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
+			smp_mb();
+			if (mnt_get_writers(mnt) > 0) {
+				err = -EBUSY;
+				break;
+			}
+		}
+	}
+	if (!err) {
+		sb->s_readonly_remount = 1;
+		smp_wmb();
+	}
+	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
+		if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
+			mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
+	}
+	br_write_unlock(vfsmount_lock);
+
+	return err;
+}
+
 static void free_vfsmnt(struct mount *mnt)
 {
 	kfree(mnt->mnt_devname);
diff --git a/fs/super.c b/fs/super.c
index 993ca8f128d6..6acc02237e3e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -723,23 +723,33 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
 	if (remount_ro) {
-		if (force)
+		if (force) {
 			mark_files_ro(sb);
-		else if (!fs_may_remount_ro(sb))
-			return -EBUSY;
+		} else {
+			retval = sb_prepare_remount_readonly(sb);
+			if (retval)
+				return retval;
+
+			retval = -EBUSY;
+			if (!fs_may_remount_ro(sb))
+				goto cancel_readonly;
+		}
 	}
 
 	if (sb->s_op->remount_fs) {
 		retval = sb->s_op->remount_fs(sb, &flags, data);
 		if (retval) {
 			if (!force)
-				return retval;
+				goto cancel_readonly;
 			/* If forced remount, go ahead despite any errors */
 			WARN(1, "forced remount of a %s fs returned %i\n",
 			     sb->s_type->name, retval);
 		}
 	}
 	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
+	/* Needs to be ordered wrt mnt_is_readonly() */
+	smp_wmb();
+	sb->s_readonly_remount = 0;
 
 	/*
 	 * Some filesystems modify their metadata via some other path than the
@@ -752,6 +762,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	if (remount_ro && sb->s_bdev)
 		invalidate_bdev(sb->s_bdev);
 	return 0;
+
+cancel_readonly:
+	sb->s_readonly_remount = 0;
+	return retval;
 }
 
 static void do_emergency_remount(struct work_struct *work)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 03385acd71e8..7b8a681b1ef4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1482,6 +1482,9 @@ struct super_block {
 	int cleancache_poolid;
 
 	struct shrinker s_shrink;	/* per-sb shrinker handle */
+
+	/* Being remounted read-only */
+	int s_readonly_remount;
 };
 
 /* superblock cache pruning functions */
-- 
cgit v1.2.3


From 7ada4db88634429f4da690ad1c4eb73c93085f0c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 Nov 2011 12:11:32 +0100
Subject: vfs: count unlinked inodes

Add a new counter to the superblock that keeps track of unlinked but
not yet deleted inodes.

Do not WARN_ON if set_nlink is called with zero count, just do a
ratelimited printk.  This happens on xfs and probably other
filesystems after an unclean shutdown when the filesystem reads inodes
which already have zero i_nlink.  Reported by Christoph Hellwig.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h | 61 +++++----------------------------------
 2 files changed, 92 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 961355d00e38..87535753ab04 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -26,6 +26,7 @@
 #include <linux/ima.h>
 #include <linux/cred.h>
 #include <linux/buffer_head.h> /* for inode_has_buffers */
+#include <linux/ratelimit.h>
 #include "internal.h"
 
 /*
@@ -242,6 +243,11 @@ void __destroy_inode(struct inode *inode)
 	BUG_ON(inode_has_buffers(inode));
 	security_inode_free(inode);
 	fsnotify_inode_delete(inode);
+	if (!inode->i_nlink) {
+		WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
+		atomic_long_dec(&inode->i_sb->s_remove_count);
+	}
+
 #ifdef CONFIG_FS_POSIX_ACL
 	if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
 		posix_acl_release(inode->i_acl);
@@ -268,6 +274,85 @@ static void destroy_inode(struct inode *inode)
 		call_rcu(&inode->i_rcu, i_callback);
 }
 
+/**
+ * drop_nlink - directly drop an inode's link count
+ * @inode: inode
+ *
+ * This is a low-level filesystem helper to replace any
+ * direct filesystem manipulation of i_nlink.  In cases
+ * where we are attempting to track writes to the
+ * filesystem, a decrement to zero means an imminent
+ * write when the file is truncated and actually unlinked
+ * on the filesystem.
+ */
+void drop_nlink(struct inode *inode)
+{
+	WARN_ON(inode->i_nlink == 0);
+	inode->__i_nlink--;
+	if (!inode->i_nlink)
+		atomic_long_inc(&inode->i_sb->s_remove_count);
+}
+EXPORT_SYMBOL(drop_nlink);
+
+/**
+ * clear_nlink - directly zero an inode's link count
+ * @inode: inode
+ *
+ * This is a low-level filesystem helper to replace any
+ * direct filesystem manipulation of i_nlink.  See
+ * drop_nlink() for why we care about i_nlink hitting zero.
+ */
+void clear_nlink(struct inode *inode)
+{
+	if (inode->i_nlink) {
+		inode->__i_nlink = 0;
+		atomic_long_inc(&inode->i_sb->s_remove_count);
+	}
+}
+EXPORT_SYMBOL(clear_nlink);
+
+/**
+ * set_nlink - directly set an inode's link count
+ * @inode: inode
+ * @nlink: new nlink (should be non-zero)
+ *
+ * This is a low-level filesystem helper to replace any
+ * direct filesystem manipulation of i_nlink.
+ */
+void set_nlink(struct inode *inode, unsigned int nlink)
+{
+	if (!nlink) {
+		printk_ratelimited(KERN_INFO
+			"set_nlink() clearing i_nlink on %s inode %li\n",
+			inode->i_sb->s_type->name, inode->i_ino);
+		clear_nlink(inode);
+	} else {
+		/* Yes, some filesystems do change nlink from zero to one */
+		if (inode->i_nlink == 0)
+			atomic_long_dec(&inode->i_sb->s_remove_count);
+
+		inode->__i_nlink = nlink;
+	}
+}
+EXPORT_SYMBOL(set_nlink);
+
+/**
+ * inc_nlink - directly increment an inode's link count
+ * @inode: inode
+ *
+ * This is a low-level filesystem helper to replace any
+ * direct filesystem manipulation of i_nlink.  Currently,
+ * it is only here for parity with dec_nlink().
+ */
+void inc_nlink(struct inode *inode)
+{
+	if (WARN_ON(inode->i_nlink == 0))
+		atomic_long_dec(&inode->i_sb->s_remove_count);
+
+	inode->__i_nlink++;
+}
+EXPORT_SYMBOL(inc_nlink);
+
 void address_space_init_once(struct address_space *mapping)
 {
 	memset(mapping, 0, sizeof(*mapping));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7b8a681b1ef4..8ac40921f5ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1483,6 +1483,9 @@ struct super_block {
 
 	struct shrinker s_shrink;	/* per-sb shrinker handle */
 
+	/* Number of inodes with nlink == 0 but still referenced */
+	atomic_long_t s_remove_count;
+
 	/* Being remounted read-only */
 	int s_readonly_remount;
 };
@@ -1768,31 +1771,10 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
 	__mark_inode_dirty(inode, I_DIRTY_SYNC);
 }
 
-/**
- * set_nlink - directly set an inode's link count
- * @inode: inode
- * @nlink: new nlink (should be non-zero)
- *
- * This is a low-level filesystem helper to replace any
- * direct filesystem manipulation of i_nlink.
- */
-static inline void set_nlink(struct inode *inode, unsigned int nlink)
-{
-	inode->__i_nlink = nlink;
-}
-
-/**
- * inc_nlink - directly increment an inode's link count
- * @inode: inode
- *
- * This is a low-level filesystem helper to replace any
- * direct filesystem manipulation of i_nlink.  Currently,
- * it is only here for parity with dec_nlink().
- */
-static inline void inc_nlink(struct inode *inode)
-{
-	inode->__i_nlink++;
-}
+extern void inc_nlink(struct inode *inode);
+extern void drop_nlink(struct inode *inode);
+extern void clear_nlink(struct inode *inode);
+extern void set_nlink(struct inode *inode, unsigned int nlink);
 
 static inline void inode_inc_link_count(struct inode *inode)
 {
@@ -1800,35 +1782,6 @@ static inline void inode_inc_link_count(struct inode *inode)
 	mark_inode_dirty(inode);
 }
 
-/**
- * drop_nlink - directly drop an inode's link count
- * @inode: inode
- *
- * This is a low-level filesystem helper to replace any
- * direct filesystem manipulation of i_nlink.  In cases
- * where we are attempting to track writes to the
- * filesystem, a decrement to zero means an imminent
- * write when the file is truncated and actually unlinked
- * on the filesystem.
- */
-static inline void drop_nlink(struct inode *inode)
-{
-	inode->__i_nlink--;
-}
-
-/**
- * clear_nlink - directly zero an inode's link count
- * @inode: inode
- *
- * This is a low-level filesystem helper to replace any
- * direct filesystem manipulation of i_nlink.  See
- * drop_nlink() for why we care about i_nlink hitting zero.
- */
-static inline void clear_nlink(struct inode *inode)
-{
-	inode->__i_nlink = 0;
-}
-
 static inline void inode_dec_link_count(struct inode *inode)
 {
 	drop_nlink(inode);
-- 
cgit v1.2.3


From 8e8b87964bc8dc5c14b6543fc933b7725f07d3ac Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 Nov 2011 12:11:33 +0100
Subject: vfs: prevent remount read-only if pending removes

If there are any inodes on the super block that have been unlinked
(i_nlink == 0) but have not yet been deleted then prevent the
remounting the super block read-only.

Reported-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 23 -----------------------
 fs/namespace.c     |  7 +++++++
 fs/super.c         |  4 ----
 include/linux/fs.h |  2 --
 4 files changed, 7 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index c322794f7360..20002e39754d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -474,29 +474,6 @@ void file_sb_list_del(struct file *file)
 
 #endif
 
-int fs_may_remount_ro(struct super_block *sb)
-{
-	struct file *file;
-	/* Check that no files are currently opened for writing. */
-	lg_global_lock(files_lglock);
-	do_file_list_for_each_entry(sb, file) {
-		struct inode *inode = file->f_path.dentry->d_inode;
-
-		/* File with pending delete? */
-		if (inode->i_nlink == 0)
-			goto too_bad;
-
-		/* Writeable file? */
-		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
-			goto too_bad;
-	} while_file_list_for_each_entry;
-	lg_global_unlock(files_lglock);
-	return 1; /* Tis' cool bro. */
-too_bad:
-	lg_global_unlock(files_lglock);
-	return 0;
-}
-
 /**
  *	mark_files_ro - mark all files read-only
  *	@sb: superblock in question
diff --git a/fs/namespace.c b/fs/namespace.c
index 98ebc78b21ab..7e6f2c9dc7c4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -449,6 +449,10 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 	struct mount *mnt;
 	int err = 0;
 
+	/* Racy optimization.  Recheck the counter under MNT_WRITE_HOLD */
+	if (atomic_long_read(&sb->s_remove_count))
+		return -EBUSY;
+
 	br_write_lock(vfsmount_lock);
 	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
 		if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
@@ -460,6 +464,9 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 			}
 		}
 	}
+	if (!err && atomic_long_read(&sb->s_remove_count))
+		err = -EBUSY;
+
 	if (!err) {
 		sb->s_readonly_remount = 1;
 		smp_wmb();
diff --git a/fs/super.c b/fs/super.c
index 6acc02237e3e..de41e1e46f09 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -729,10 +729,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 			retval = sb_prepare_remount_readonly(sb);
 			if (retval)
 				return retval;
-
-			retval = -EBUSY;
-			if (!fs_may_remount_ro(sb))
-				goto cancel_readonly;
 		}
 	}
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8ac40921f5ac..7aacf31418fe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2150,8 +2150,6 @@ extern const struct file_operations read_pipefifo_fops;
 extern const struct file_operations write_pipefifo_fops;
 extern const struct file_operations rdwr_pipefifo_fops;
 
-extern int fs_may_remount_ro(struct super_block *);
-
 #ifdef CONFIG_BLOCK
 /*
  * return READ, READA, or WRITE
-- 
cgit v1.2.3


From c3aa077648e147783a7a53b409578234647db853 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 21 Dec 2011 20:17:10 +0100
Subject: reiserfs: Properly display mount options in /proc/mounts

Make reiserfs properly display mount options in /proc/mounts.

CC: reiserfs-devel@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/bitmap.c           |  91 ++++++++++++++++++++++++++++++++
 fs/reiserfs/super.c            | 116 +++++++++++++++++++++++++++++++++++++----
 include/linux/reiserfs_fs.h    |   7 +--
 include/linux/reiserfs_fs_sb.h |   4 +-
 4 files changed, 204 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index d1aca1df4f92..a945cd265228 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -13,6 +13,7 @@
 #include <linux/reiserfs_fs_sb.h>
 #include <linux/reiserfs_fs_i.h>
 #include <linux/quotaops.h>
+#include <linux/seq_file.h>
 
 #define PREALLOCATION_SIZE 9
 
@@ -634,6 +635,96 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
 	return 0;
 }
 
+static void print_sep(struct seq_file *seq, int *first)
+{
+	if (!*first)
+		seq_puts(seq, ":");
+	else
+		*first = 0;
+}
+
+void show_alloc_options(struct seq_file *seq, struct super_block *s)
+{
+	int first = 1;
+
+	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
+		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
+		return;
+
+	seq_puts(seq, ",alloc=");
+
+	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
+		print_sep(seq, &first);
+		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
+			seq_printf(seq, "concentrating_formatted_nodes=%d",
+				100 / REISERFS_SB(s)->s_alloc_options.border);
+		} else
+			seq_puts(seq, "concentrating_formatted_nodes");
+	}
+	if (TEST_OPTION(displacing_large_files, s)) {
+		print_sep(seq, &first);
+		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
+			seq_printf(seq, "displacing_large_files=%lu",
+			    REISERFS_SB(s)->s_alloc_options.large_file_size);
+		} else
+			seq_puts(seq, "displacing_large_files");
+	}
+	if (TEST_OPTION(displacing_new_packing_localities, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "displacing_new_packing_localities");
+	}
+	if (TEST_OPTION(old_hashed_relocation, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "old_hashed_relocation");
+	}
+	if (TEST_OPTION(new_hashed_relocation, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "new_hashed_relocation");
+	}
+	if (TEST_OPTION(dirid_groups, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "dirid_groups");
+	}
+	if (TEST_OPTION(oid_groups, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "oid_groups");
+	}
+	if (TEST_OPTION(packing_groups, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "packing_groups");
+	}
+	if (TEST_OPTION(hashed_formatted_nodes, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "hashed_formatted_nodes");
+	}
+	if (TEST_OPTION(skip_busy, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "skip_busy");
+	}
+	if (TEST_OPTION(hundredth_slices, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "hundredth_slices");
+	}
+	if (TEST_OPTION(old_way, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "old_way");
+	}
+	if (TEST_OPTION(displace_based_on_dirid, s)) {
+		print_sep(seq, &first);
+		seq_puts(seq, "displace_based_on_dirid");
+	}
+	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
+		print_sep(seq, &first);
+		seq_printf(seq, "preallocmin=%d",
+				REISERFS_SB(s)->s_alloc_options.preallocmin);
+	}
+	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
+		print_sep(seq, &first);
+		seq_printf(seq, "preallocsize=%d",
+				REISERFS_SB(s)->s_alloc_options.preallocsize);
+	}
+}
+
 static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 {
 	char *hash_in;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1abffa451529..19c454e61b79 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,6 +28,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
+#include <linux/seq_file.h>
 
 struct file_system_type reiserfs_fs_type;
 
@@ -61,6 +62,7 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs)
 
 static int reiserfs_remount(struct super_block *s, int *flags, char *data);
 static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf);
+void show_alloc_options(struct seq_file *seq, struct super_block *s);
 
 static int reiserfs_sync_fs(struct super_block *s, int wait)
 {
@@ -596,6 +598,82 @@ out:
 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
 }
 
+static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
+{
+	struct super_block *s = root->d_sb;
+	struct reiserfs_journal *journal = SB_JOURNAL(s);
+	long opts = REISERFS_SB(s)->s_mount_opt;
+
+	if (opts & (1 << REISERFS_LARGETAIL))
+		seq_puts(seq, ",tails=on");
+	else if (!(opts & (1 << REISERFS_SMALLTAIL)))
+		seq_puts(seq, ",notail");
+	/* tails=small is default so we don't show it */
+
+	if (!(opts & (1 << REISERFS_BARRIER_FLUSH)))
+		seq_puts(seq, ",barrier=none");
+	/* barrier=flush is default so we don't show it */
+
+	if (opts & (1 << REISERFS_ERROR_CONTINUE))
+		seq_puts(seq, ",errors=continue");
+	else if (opts & (1 << REISERFS_ERROR_PANIC))
+		seq_puts(seq, ",errors=panic");
+	/* errors=ro is default so we don't show it */
+
+	if (opts & (1 << REISERFS_DATA_LOG))
+		seq_puts(seq, ",data=journal");
+	else if (opts & (1 << REISERFS_DATA_WRITEBACK))
+		seq_puts(seq, ",data=writeback");
+	/* data=ordered is default so we don't show it */
+
+	if (opts & (1 << REISERFS_ATTRS))
+		seq_puts(seq, ",attrs");
+
+	if (opts & (1 << REISERFS_XATTRS_USER))
+		seq_puts(seq, ",user_xattr");
+
+	if (opts & (1 << REISERFS_EXPOSE_PRIVROOT))
+		seq_puts(seq, ",expose_privroot");
+
+	if (opts & (1 << REISERFS_POSIXACL))
+		seq_puts(seq, ",acl");
+
+	if (REISERFS_SB(s)->s_jdev)
+		seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev);
+
+	if (journal->j_max_commit_age != journal->j_default_max_commit_age)
+		seq_printf(seq, ",commit=%d", journal->j_max_commit_age);
+
+#ifdef CONFIG_QUOTA
+	if (REISERFS_SB(s)->s_qf_names[USRQUOTA])
+		seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]);
+	else if (opts & (1 << REISERFS_USRQUOTA))
+		seq_puts(seq, ",usrquota");
+	if (REISERFS_SB(s)->s_qf_names[GRPQUOTA])
+		seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]);
+	else if (opts & (1 << REISERFS_GRPQUOTA))
+		seq_puts(seq, ",grpquota");
+	if (REISERFS_SB(s)->s_jquota_fmt) {
+		if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_OLD)
+			seq_puts(seq, ",jqfmt=vfsold");
+		else if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_V0)
+			seq_puts(seq, ",jqfmt=vfsv0");
+	}
+#endif
+
+	/* Block allocator options */
+	if (opts & (1 << REISERFS_NO_BORDER))
+		seq_puts(seq, ",block-allocator=noborder");
+	if (opts & (1 << REISERFS_NO_UNHASHED_RELOCATION))
+		seq_puts(seq, ",block-allocator=no_unhashed_relocation");
+	if (opts & (1 << REISERFS_HASHED_RELOCATION))
+		seq_puts(seq, ",block-allocator=hashed_relocation");
+	if (opts & (1 << REISERFS_TEST4))
+		seq_puts(seq, ",block-allocator=test4");
+	show_alloc_options(seq, s);
+	return 0;
+}
+
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
 				    size_t, loff_t);
@@ -616,7 +694,7 @@ static const struct super_operations reiserfs_sops = {
 	.unfreeze_fs = reiserfs_unfreeze,
 	.statfs = reiserfs_statfs,
 	.remount_fs = reiserfs_remount,
-	.show_options = generic_show_options,
+	.show_options = reiserfs_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read = reiserfs_quota_read,
 	.quota_write = reiserfs_quota_write,
@@ -914,9 +992,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		{"jdev",.arg_required = 'j',.values = NULL},
 		{"nolargeio",.arg_required = 'w',.values = NULL},
 		{"commit",.arg_required = 'c',.values = NULL},
-		{"usrquota",.setmask = 1 << REISERFS_QUOTA},
-		{"grpquota",.setmask = 1 << REISERFS_QUOTA},
-		{"noquota",.clrmask = 1 << REISERFS_QUOTA},
+		{"usrquota",.setmask = 1 << REISERFS_USRQUOTA},
+		{"grpquota",.setmask = 1 << REISERFS_GRPQUOTA},
+		{"noquota",.clrmask = 1 << REISERFS_USRQUOTA | 1 << REISERFS_GRPQUOTA},
 		{"errors",.arg_required = 'e',.values = error_actions},
 		{"usrjquota",.arg_required =
 		 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
@@ -1030,12 +1108,19 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 					return 0;
 				}
 				strcpy(qf_names[qtype], arg);
-				*mount_options |= 1 << REISERFS_QUOTA;
+				if (qtype == USRQUOTA)
+					*mount_options |= 1 << REISERFS_USRQUOTA;
+				else
+					*mount_options |= 1 << REISERFS_GRPQUOTA;
 			} else {
 				if (qf_names[qtype] !=
 				    REISERFS_SB(s)->s_qf_names[qtype])
 					kfree(qf_names[qtype]);
 				qf_names[qtype] = NULL;
+				if (qtype == USRQUOTA)
+					*mount_options &= ~(1 << REISERFS_USRQUOTA);
+				else
+					*mount_options &= ~(1 << REISERFS_GRPQUOTA);
 			}
 		}
 		if (c == 'f') {
@@ -1074,9 +1159,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 				 "journaled quota format not specified.");
 		return 0;
 	}
-	/* This checking is not precise wrt the quota type but for our purposes it is sufficient */
-	if (!(*mount_options & (1 << REISERFS_QUOTA))
-	    && sb_any_quota_loaded(s)) {
+	if ((!(*mount_options & (1 << REISERFS_USRQUOTA)) &&
+	       sb_has_quota_loaded(s, USRQUOTA)) ||
+	    (!(*mount_options & (1 << REISERFS_GRPQUOTA)) &&
+	       sb_has_quota_loaded(s, GRPQUOTA))) {
 		reiserfs_warning(s, "super-6516", "quota options must "
 				 "be present when quota is turned on.");
 		return 0;
@@ -1224,7 +1310,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	safe_mask |= 1 << REISERFS_ERROR_RO;
 	safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
 	safe_mask |= 1 << REISERFS_ERROR_PANIC;
-	safe_mask |= 1 << REISERFS_QUOTA;
+	safe_mask |= 1 << REISERFS_USRQUOTA;
+	safe_mask |= 1 << REISERFS_GRPQUOTA;
 
 	/* Update the bitmask, taking care to keep
 	 * the bits we're not allowed to change here */
@@ -1671,6 +1758,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	     &commit_max_age, qf_names, &qfmt) == 0) {
 		goto error;
 	}
+	if (jdev_name && jdev_name[0]) {
+		REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL);
+		if (!REISERFS_SB(s)->s_jdev) {
+			SWARN(silent, s, "", "Cannot allocate memory for "
+				"journal device name");
+			goto error;
+		}
+	}
 #ifdef CONFIG_QUOTA
 	handle_quota_files(s, qf_names, &qfmt);
 #endif
@@ -2053,8 +2148,9 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 	int err;
 	struct inode *inode;
 	struct reiserfs_transaction_handle th;
+	int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
 
-	if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
+	if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
 		return -EINVAL;
 
 	/* Quotafile not on the same filesystem? */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 26be28fd7b76..2213ddcce20c 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1759,13 +1759,14 @@ struct reiserfs_journal_header {
 					      REISERFS_QUOTA_TRANS_BLOCKS(sb)))
 
 #ifdef CONFIG_QUOTA
+#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA))
 /* We need to update data and inode (atime) */
-#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<<REISERFS_QUOTA) ? 2 : 0)
+#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0)
 /* 1 balancing, 1 bitmap, 1 data per write + stat data update */
-#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<<REISERFS_QUOTA) ? \
+#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
 (DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0)
 /* same as with INIT */
-#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<<REISERFS_QUOTA) ? \
+#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
 (DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0)
 #else
 #define REISERFS_QUOTA_TRANS_BLOCKS(s) 0
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 52c83b6a758a..8c9e85c64b46 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -417,6 +417,7 @@ struct reiserfs_sb_info {
 	char *s_qf_names[MAXQUOTAS];
 	int s_jquota_fmt;
 #endif
+	char *s_jdev;		/* Stored jdev for mount option showing */
 #ifdef CONFIG_REISERFS_CHECK
 
 	struct tree_balance *cur_tb;	/*
@@ -482,7 +483,8 @@ enum reiserfs_mount_options {
 	REISERFS_ERROR_RO,
 	REISERFS_ERROR_CONTINUE,
 
-	REISERFS_QUOTA,		/* Some quota option specified */
+	REISERFS_USRQUOTA,	/* User quota option specified */
+	REISERFS_GRPQUOTA,	/* Group quota option specified */
 
 	REISERFS_TEST1,
 	REISERFS_TEST2,
-- 
cgit v1.2.3


From 6c06108be53ca5e94d8b0e93883d534dd9079646 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 5 Jan 2012 02:27:57 -0300
Subject: [media] V4L/DVB: v4l2-ioctl: integer overflow in video_usercopy()

If ctrls->count is too high the multiplication could overflow and
array_size would be lower than expected.  Mauro and Hans Verkuil
suggested that we cap it at 1024.  That comes from the maximum
number of controls with lots of room for expantion.

$ grep V4L2_CID include/linux/videodev2.h | wc -l
211

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-ioctl.c | 4 ++++
 include/linux/videodev2.h        | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index e1da8fc9dd2f..639abeee3392 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -2226,6 +2226,10 @@ static int check_array_args(unsigned int cmd, void *parg, size_t *array_size,
 		struct v4l2_ext_controls *ctrls = parg;
 
 		if (ctrls->count != 0) {
+			if (ctrls->count > V4L2_CID_MAX_CTRLS) {
+				ret = -EINVAL;
+				break;
+			}
 			*user_ptr = (void __user *)ctrls->controls;
 			*kernel_ptr = (void *)&ctrls->controls;
 			*array_size = sizeof(struct v4l2_ext_control)
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 6bfaa767a817..b2e1331ca76b 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1132,6 +1132,7 @@ struct v4l2_querymenu {
 #define V4L2_CTRL_FLAG_NEXT_CTRL	0x80000000
 
 /*  User-class control IDs defined by V4L2 */
+#define V4L2_CID_MAX_CTRLS		1024
 #define V4L2_CID_BASE			(V4L2_CTRL_CLASS_USER | 0x900)
 #define V4L2_CID_USER_BASE 		V4L2_CID_BASE
 /*  IDs reserved for driver specific controls */
-- 
cgit v1.2.3


From 6926afd1925a54a13684ebe05987868890665e2b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 7 Jan 2012 13:22:46 -0500
Subject: NFSv4: Save the owner/group name string when doing open

...so that we can do the uid/gid mapping outside the asynchronous RPC
context.
This fixes a bug in the current NFSv4 atomic open code where the client
isn't able to determine what the true uid/gid fields of the file are,
(because the asynchronous nature of the OPEN call denies it the ability
to do an upcall) and so fills them with default values, marking the
inode as needing revalidation.
Unfortunately, in some cases, the VFS will do some additional sanity
checks on the file, and may override the server's decision to allow
the open because it sees the wrong owner/group fields.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c            |  83 ++++++++++++++++++++++++++++++++++++
 fs/nfs/inode.c            |   2 +
 fs/nfs/nfs4proc.c         |  10 +++++
 fs/nfs/nfs4xdr.c          | 106 ++++++++++++++++++++--------------------------
 include/linux/nfs_idmap.h |   8 ++++
 include/linux/nfs_xdr.h   |  17 +++++---
 6 files changed, 162 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 47d1c6ff2d8e..2c05f1991e1e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -38,6 +38,89 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/nfs_idmap.h>
+#include <linux/nfs_fs.h>
+
+/**
+ * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
+ * @fattr: fully initialised struct nfs_fattr
+ * @owner_name: owner name string cache
+ * @group_name: group name string cache
+ */
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+		struct nfs4_string *owner_name,
+		struct nfs4_string *group_name)
+{
+	fattr->owner_name = owner_name;
+	fattr->group_name = group_name;
+}
+
+static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr)
+{
+	fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME;
+	kfree(fattr->owner_name->data);
+}
+
+static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
+{
+	fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME;
+	kfree(fattr->group_name->data);
+}
+
+static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+	struct nfs4_string *owner = fattr->owner_name;
+	__u32 uid;
+
+	if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
+		return false;
+	if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) {
+		fattr->uid = uid;
+		fattr->valid |= NFS_ATTR_FATTR_OWNER;
+	}
+	return true;
+}
+
+static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+	struct nfs4_string *group = fattr->group_name;
+	__u32 gid;
+
+	if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
+		return false;
+	if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) {
+		fattr->gid = gid;
+		fattr->valid |= NFS_ATTR_FATTR_GROUP;
+	}
+	return true;
+}
+
+/**
+ * nfs_fattr_free_names - free up the NFSv4 owner and group strings
+ * @fattr: a fully initialised nfs_fattr structure
+ */
+void nfs_fattr_free_names(struct nfs_fattr *fattr)
+{
+	if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)
+		nfs_fattr_free_owner_name(fattr);
+	if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)
+		nfs_fattr_free_group_name(fattr);
+}
+
+/**
+ * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free
+ * @server: pointer to the filesystem nfs_server structure
+ * @fattr: a fully initialised nfs_fattr structure
+ *
+ * This helper maps the cached NFSv4 owner/group strings in fattr into
+ * their numeric uid/gid equivalents, and then frees the cached strings.
+ */
+void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+	if (nfs_fattr_map_owner_name(server, fattr))
+		nfs_fattr_free_owner_name(fattr);
+	if (nfs_fattr_map_group_name(server, fattr))
+		nfs_fattr_free_group_name(fattr);
+}
 
 static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
 {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a15fa8cf98..f59cab12a8ee 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1019,6 +1019,8 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
 	fattr->valid = 0;
 	fattr->time_start = jiffies;
 	fattr->gencount = nfs_inc_attr_generation_counter();
+	fattr->owner_name = NULL;
+	fattr->group_name = NULL;
 }
 
 struct nfs_fattr *nfs_alloc_fattr(void)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3b1080118452..df3d3068242e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -52,6 +52,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/module.h>
+#include <linux/nfs_idmap.h>
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/xattr.h>
 #include <linux/utsname.h>
@@ -760,6 +761,8 @@ struct nfs4_opendata {
 	struct nfs_openres o_res;
 	struct nfs_open_confirmargs c_arg;
 	struct nfs_open_confirmres c_res;
+	struct nfs4_string owner_name;
+	struct nfs4_string group_name;
 	struct nfs_fattr f_attr;
 	struct nfs_fattr dir_attr;
 	struct dentry *dir;
@@ -783,6 +786,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 	p->o_res.server = p->o_arg.server;
 	nfs_fattr_init(&p->f_attr);
 	nfs_fattr_init(&p->dir_attr);
+	nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
 }
 
 static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
@@ -814,6 +818,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->o_arg.name = &dentry->d_name;
 	p->o_arg.server = server;
 	p->o_arg.bitmask = server->attr_bitmask;
+	p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
 	if (flags & O_CREAT) {
 		u32 *s;
@@ -850,6 +855,7 @@ static void nfs4_opendata_free(struct kref *kref)
 	dput(p->dir);
 	dput(p->dentry);
 	nfs_sb_deactive(sb);
+	nfs_fattr_free_names(&p->f_attr);
 	kfree(p);
 }
 
@@ -1574,6 +1580,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
 	if (status != 0 || !data->rpc_done)
 		return status;
 
+	nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
+
 	nfs_refresh_inode(dir, o_res->dir_attr);
 
 	if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1606,6 +1614,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		return status;
 	}
 
+	nfs_fattr_map_and_free_names(server, &data->f_attr);
+
 	if (o_arg->open_flags & O_CREAT) {
 		update_changeattr(dir, &o_res->cinfo);
 		nfs_post_op_update_inode(dir, o_res->dir_attr);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dcaf69309d8e..95e92e438407 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2298,7 +2298,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_getfh(xdr, &hdr);
 	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_restorefh(xdr, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_getfattr(xdr, args->dir_bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -3792,7 +3792,8 @@ out_overflow:
 }
 
 static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
-		const struct nfs_server *server, uint32_t *uid, int may_sleep)
+		const struct nfs_server *server, uint32_t *uid,
+		struct nfs4_string *owner_name)
 {
 	uint32_t len;
 	__be32 *p;
@@ -3809,8 +3810,12 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
 		p = xdr_inline_decode(xdr, len);
 		if (unlikely(!p))
 			goto out_overflow;
-		if (!may_sleep) {
-			/* do nothing */
+		if (owner_name != NULL) {
+			owner_name->data = kmemdup(p, len, GFP_NOWAIT);
+			if (owner_name->data != NULL) {
+				owner_name->len = len;
+				ret = NFS_ATTR_FATTR_OWNER_NAME;
+			}
 		} else if (len < XDR_MAX_NETOBJ) {
 			if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
 				ret = NFS_ATTR_FATTR_OWNER;
@@ -3830,7 +3835,8 @@ out_overflow:
 }
 
 static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
-		const struct nfs_server *server, uint32_t *gid, int may_sleep)
+		const struct nfs_server *server, uint32_t *gid,
+		struct nfs4_string *group_name)
 {
 	uint32_t len;
 	__be32 *p;
@@ -3847,8 +3853,12 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
 		p = xdr_inline_decode(xdr, len);
 		if (unlikely(!p))
 			goto out_overflow;
-		if (!may_sleep) {
-			/* do nothing */
+		if (group_name != NULL) {
+			group_name->data = kmemdup(p, len, GFP_NOWAIT);
+			if (group_name->data != NULL) {
+				group_name->len = len;
+				ret = NFS_ATTR_FATTR_GROUP_NAME;
+			}
 		} else if (len < XDR_MAX_NETOBJ) {
 			if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
 				ret = NFS_ATTR_FATTR_GROUP;
@@ -4285,7 +4295,7 @@ xdr_error:
 
 static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		struct nfs_fattr *fattr, struct nfs_fh *fh,
-		const struct nfs_server *server, int may_sleep)
+		const struct nfs_server *server)
 {
 	int status;
 	umode_t fmode = 0;
@@ -4352,12 +4362,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		goto xdr_error;
 	fattr->valid |= status;
 
-	status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep);
+	status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, fattr->owner_name);
 	if (status < 0)
 		goto xdr_error;
 	fattr->valid |= status;
 
-	status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep);
+	status = decode_attr_group(xdr, bitmap, server, &fattr->gid, fattr->group_name);
 	if (status < 0)
 		goto xdr_error;
 	fattr->valid |= status;
@@ -4398,7 +4408,7 @@ xdr_error:
 }
 
 static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
-		struct nfs_fh *fh, const struct nfs_server *server, int may_sleep)
+		struct nfs_fh *fh, const struct nfs_server *server)
 {
 	__be32 *savep;
 	uint32_t attrlen,
@@ -4417,7 +4427,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat
 	if (status < 0)
 		goto xdr_error;
 
-	status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep);
+	status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server);
 	if (status < 0)
 		goto xdr_error;
 
@@ -4428,9 +4438,9 @@ xdr_error:
 }
 
 static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
-		const struct nfs_server *server, int may_sleep)
+		const struct nfs_server *server)
 {
-	return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
+	return decode_getfattr_generic(xdr, fattr, NULL, server);
 }
 
 /*
@@ -5711,8 +5721,7 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
 	status = decode_open_downgrade(xdr, res);
 	if (status != 0)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5738,8 +5747,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_access(xdr, res);
 	if (status != 0)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5768,8 +5776,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_getfh(xdr, res->fh);
 	if (status)
 		goto out;
-	status = decode_getfattr(xdr, res->fattr, res->server
-			,!RPC_IS_ASYNC(rqstp->rq_task));
+	status = decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5795,8 +5802,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
 		goto out;
 	status = decode_getfh(xdr, res->fh);
 	if (status == 0)
-		status = decode_getfattr(xdr, res->fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task));
+		status = decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5822,8 +5828,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_remove(xdr, &res->cinfo);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->dir_attr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->dir_attr, res->server);
 out:
 	return status;
 }
@@ -5856,14 +5861,12 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	/* Current FH is target directory */
-	if (decode_getfattr(xdr, res->new_fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
+	if (decode_getfattr(xdr, res->new_fattr, res->server))
 		goto out;
 	status = decode_restorefh(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->old_fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->old_fattr, res->server);
 out:
 	return status;
 }
@@ -5899,14 +5902,12 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	 * Note order: OP_LINK leaves the directory as the current
 	 *             filehandle.
 	 */
-	if (decode_getfattr(xdr, res->dir_attr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
+	if (decode_getfattr(xdr, res->dir_attr, res->server))
 		goto out;
 	status = decode_restorefh(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5938,14 +5939,12 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_getfh(xdr, res->fh);
 	if (status)
 		goto out;
-	if (decode_getfattr(xdr, res->fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
+	if (decode_getfattr(xdr, res->fattr, res->server))
 		goto out;
 	status = decode_restorefh(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->dir_fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->dir_fattr, res->server);
 out:
 	return status;
 }
@@ -5977,8 +5976,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	status = decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6076,8 +6074,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	 * 	an ESTALE error. Shouldn't be a problem,
 	 * 	though, since fattr->valid will remain unset.
 	 */
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6108,13 +6105,11 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		goto out;
 	if (decode_getfh(xdr, &res->fh) != 0)
 		goto out;
-	if (decode_getfattr(xdr, res->f_attr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
+	if (decode_getfattr(xdr, res->f_attr, res->server) != 0)
 		goto out;
 	if (decode_restorefh(xdr) != 0)
 		goto out;
-	decode_getfattr(xdr, res->dir_attr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->dir_attr, res->server);
 out:
 	return status;
 }
@@ -6162,8 +6157,7 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
 	status = decode_open(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->f_attr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->f_attr, res->server);
 out:
 	return status;
 }
@@ -6190,8 +6184,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
 	status = decode_setattr(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6371,8 +6364,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	if (res->fattr)
-		decode_getfattr(xdr, res->fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task));
+		decode_getfattr(xdr, res->fattr, res->server);
 	if (!status)
 		status = res->count;
 out:
@@ -6401,8 +6393,7 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	if (res->fattr)
-		decode_getfattr(xdr, res->fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task));
+		decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6561,8 +6552,7 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
 	status = decode_delegreturn(xdr);
 	if (status != 0)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6591,8 +6581,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
 		goto out;
 	xdr_enter_page(xdr, PAGE_SIZE);
 	status = decode_getfattr(xdr, &res->fs_locations->fattr,
-				 res->fs_locations->server,
-				 !RPC_IS_ASYNC(req->rq_task));
+				 res->fs_locations->server);
 out:
 	return status;
 }
@@ -6841,8 +6830,7 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
 	status = decode_layoutcommit(xdr, rqstp, res);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6973,7 +6961,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 		goto out_overflow;
 
 	if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
-					entry->server, 1) < 0)
+					entry->server) < 0)
 		goto out_overflow;
 	if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
 		entry->ino = entry->fattr->mounted_on_fileid;
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
index ae7d6a380dae..308c18877018 100644
--- a/include/linux/nfs_idmap.h
+++ b/include/linux/nfs_idmap.h
@@ -66,6 +66,8 @@ struct idmap_msg {
 /* Forward declaration to make this header independent of others */
 struct nfs_client;
 struct nfs_server;
+struct nfs_fattr;
+struct nfs4_string;
 
 #ifdef CONFIG_NFS_USE_NEW_IDMAPPER
 
@@ -97,6 +99,12 @@ void nfs_idmap_delete(struct nfs_client *);
 
 #endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
 
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+		struct nfs4_string *owner_name,
+		struct nfs4_string *group_name);
+void nfs_fattr_free_names(struct nfs_fattr *);
+void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *);
+
 int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, __u32 *);
 int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, __u32 *);
 int nfs_map_uid_to_name(const struct nfs_server *, __u32, char *, size_t);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6c898afe6095..a764cef06b73 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -18,6 +18,11 @@
 /* Forward declaration for NFS v3 */
 struct nfs4_secinfo_flavors;
 
+struct nfs4_string {
+	unsigned int len;
+	char *data;
+};
+
 struct nfs_fsid {
 	uint64_t		major;
 	uint64_t		minor;
@@ -61,6 +66,8 @@ struct nfs_fattr {
 	struct timespec		pre_ctime;	/* pre_op_attr.ctime	  */
 	unsigned long		time_start;
 	unsigned long		gencount;
+	struct nfs4_string	*owner_name;
+	struct nfs4_string	*group_name;
 };
 
 #define NFS_ATTR_FATTR_TYPE		(1U << 0)
@@ -85,6 +92,8 @@ struct nfs_fattr {
 #define NFS_ATTR_FATTR_V4_REFERRAL	(1U << 19)	/* NFSv4 referral */
 #define NFS_ATTR_FATTR_MOUNTPOINT	(1U << 20)	/* Treat as mountpoint */
 #define NFS_ATTR_FATTR_MOUNTED_ON_FILEID		(1U << 21)
+#define NFS_ATTR_FATTR_OWNER_NAME	(1U << 22)
+#define NFS_ATTR_FATTR_GROUP_NAME	(1U << 23)
 
 #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
 		| NFS_ATTR_FATTR_MODE \
@@ -324,6 +333,7 @@ struct nfs_openargs {
 	const struct qstr *	name;
 	const struct nfs_server *server;	 /* Needed for ID mapping */
 	const u32 *		bitmask;
+	const u32 *		dir_bitmask;
 	__u32			claim;
 	struct nfs4_sequence_args	seq_args;
 };
@@ -342,6 +352,8 @@ struct nfs_openres {
 	__u32			do_recall;
 	__u64			maxsize;
 	__u32			attrset[NFS4_BITMAP_SIZE];
+	struct nfs4_string	*owner;
+	struct nfs4_string	*group_owner;
 	struct nfs4_sequence_res	seq_res;
 };
 
@@ -778,11 +790,6 @@ struct nfs3_getaclres {
 	struct posix_acl *	acl_default;
 };
 
-struct nfs4_string {
-	unsigned int len;
-	char *data;
-};
-
 #ifdef CONFIG_NFS_V4
 
 typedef u64 clientid4;
-- 
cgit v1.2.3


From 96de37b62ca525cd77d2e85aea1472846ee31c4d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Sat, 7 Jan 2012 17:26:49 -0500
Subject: tracing: Fix compile error when static ftrace is enabled

The stack tracer uses the call ftrace_set_early_filter() function
to allow the stack tracer to pick its own functions on boot.
But this function is not defined if dynamic ftrace is not set.
This causes a compiler error when stack tracer is enabled and
dynamic ftrace is not.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 41df6f501656..028e26f0bf08 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -339,6 +339,7 @@ static inline int ftrace_text_reserved(void *start, void *end)
  * functions may still be called. Use a macro instead of inline.
  */
 #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
+#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
 
 static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
 			    size_t cnt, loff_t *ppos) { return -ENODEV; }
-- 
cgit v1.2.3


From bc31b86a5923fad5f3fbb6192f767f410241ba27 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sat, 7 Jan 2012 20:41:55 -0600
Subject: writeback: move MIN_WRITEBACK_PAGES to fs-writeback.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix compile error

 fs/fs-writeback.c:515:33: error: ‘PAGE_CACHE_SHIFT’ undeclared (first use in this function)

Reported-by: Randy Dunlap <rdunlap@xenotime.net>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 fs/fs-writeback.c         | 6 ++++++
 include/linux/writeback.h | 5 -----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 92d353e069dc..22e2d42742a9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/pagemap.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/writeback.h>
@@ -29,6 +30,11 @@
 #include <linux/tracepoint.h>
 #include "internal.h"
 
+/*
+ * 4MB minimal write chunk size
+ */
+#define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_CACHE_SHIFT - 10))
+
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b30419cd425e..4e0a55493023 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -25,11 +25,6 @@ DECLARE_PER_CPU(int, dirty_throttle_leaks);
 #define DIRTY_SCOPE		8
 #define DIRTY_FULL_SCOPE	(DIRTY_SCOPE / 2)
 
-/*
- * 4MB minimal write chunk size
- */
-#define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_CACHE_SHIFT - 10))
-
 struct backing_dev_info;
 
 /*
-- 
cgit v1.2.3


From c5a92aa3eb7425da68797a820d208edad36551f7 Mon Sep 17 00:00:00 2001
From: Daniel Nicoletti <dantti12@gmail.com>
Date: Fri, 2 Dec 2011 03:52:22 -0200
Subject: hid-input: add support for HID devices reporting Battery Strength
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I've sent an email earlier asking for help with a GetFeature code, and now I
have a second patch on top of Jeremy's to provide the battery functionality
for devices that support reporting it.

If I understood correctly when talking to Jeremy he said his device
never actually reported the status as an input event (sorry if I didn't
understand it correctly), and after reading HID specs I believe it's
really because it was meant to be probed, I have an Apple Keyboard and
Magic Trackpad both bluetooth batteries operated, so using PacketLogger
I saw that Mac OSX always ask the battery status using the so called
GetFeature.

What my patch does is basically:
- store the report id that matches the battery_strength
- setup the battery if 0x6.0x20 is found, even if that is reported as a feature
  (as it was meant to be but only the MagicTrackpad does)
- when upower or someone access /sys/class/power_supply/hid-*/capacity it
  will probe the device and return it's status.

It works great for both devices, but I have two concerns:
- the report_features function has a duplicated code
- it would be nice if it was possible for specific drivers to provide their own
  probe as there might be some strange devices... (but maybe it's
already possible)

I've talked to the upower dev and he fixed it to be able to show the
right percentage.

Here how the uevent file (in /sys/class/power_supply/hid-*/) looks like:
POWER_SUPPLY_NAME=hid-00:22:41:D9:18:E7-battery
POWER_SUPPLY_PRESENT=1
POWER_SUPPLY_ONLINE=1
POWER_SUPPLY_CAPACITY=66
POWER_SUPPLY_MODEL_NAME=MacAdmin’s keyboard
POWER_SUPPLY_STATUS=Discharging

POWER_SUPPLY_NAME=hid-70:CD:60:F5:FF:3F-battery
POWER_SUPPLY_PRESENT=1
POWER_SUPPLY_ONLINE=1
POWER_SUPPLY_CAPACITY=62
POWER_SUPPLY_MODEL_NAME=nexx’s Trackpad
POWER_SUPPLY_STATUS=Discharging

Signed-off-by: Daniel Nicoletti <dantti12@gmail.com>
---
 drivers/hid/hid-input.c | 61 +++++++++++++++++++++++++++++++++++++------------
 include/linux/hid.h     |  1 +
 2 files changed, 47 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index b9b8c75a6f9a..8fac47cf42f1 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -277,6 +277,7 @@ static enum power_supply_property hidinput_battery_props[] = {
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_CAPACITY,
 	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_STATUS
 };
 
 static int hidinput_get_battery_property(struct power_supply *psy,
@@ -285,6 +286,9 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 {
 	struct hid_device *dev = container_of(psy, struct hid_device, battery);
 	int ret = 0;
+	int ret_rep;
+	__u8 *buf = NULL;
+	unsigned char report_number = dev->battery_report_id;
 
 	switch (prop) {
 	case POWER_SUPPLY_PROP_PRESENT:
@@ -293,28 +297,45 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 		break;
 
 	case POWER_SUPPLY_PROP_CAPACITY:
-		if (dev->battery_min < dev->battery_max &&
-		    dev->battery_val >= dev->battery_min &&
-		    dev->battery_val <= dev->battery_max)
-			val->intval = (100 * (dev->battery_val - dev->battery_min)) /
-				(dev->battery_max - dev->battery_min);
-		else
+		buf = kmalloc(2 * sizeof(__u8), GFP_KERNEL);
+		if (!buf) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		memset(buf, 0, sizeof(buf));
+		ret_rep = dev->hid_get_raw_report(dev, report_number, buf, sizeof(buf), HID_FEATURE_REPORT);
+		if (ret_rep != 2) {
 			ret = -EINVAL;
+			break;
+		}
+
+		/* store the returned value */
+		/* I'm not calculating this using the logical_minimum and maximum */
+		/* because my device returns 0-100 even though the min and max are 0-255 */
+		val->intval = buf[1];
 		break;
 
 	case POWER_SUPPLY_PROP_MODEL_NAME:
 		val->strval = dev->name;
 		break;
 
+	case POWER_SUPPLY_PROP_STATUS:
+		val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
 	}
 
+	if (buf) {
+		kfree(buf);
+	}
 	return ret;
 }
 
-static void hidinput_setup_battery(struct hid_device *dev, s32 min, s32 max)
+static void hidinput_setup_battery(struct hid_device *dev, unsigned id, s32 min, s32 max)
 {
 	struct power_supply *battery = &dev->battery;
 	int ret;
@@ -326,7 +347,7 @@ static void hidinput_setup_battery(struct hid_device *dev, s32 min, s32 max)
 	if (battery->name == NULL)
 		return;
 
-	battery->type = POWER_SUPPLY_TYPE_BATTERY;
+	battery->type = POWER_SUPPLY_TYPE_USB;
 	battery->properties = hidinput_battery_props;
 	battery->num_properties = ARRAY_SIZE(hidinput_battery_props);
 	battery->use_for_apm = 0;
@@ -334,6 +355,7 @@ static void hidinput_setup_battery(struct hid_device *dev, s32 min, s32 max)
 
 	dev->battery_min = min;
 	dev->battery_max = max;
+	dev->battery_report_id = id;
 
 	ret = power_supply_register(&dev->dev, battery);
 	if (ret != 0) {
@@ -353,7 +375,7 @@ static void hidinput_cleanup_battery(struct hid_device *dev)
 	dev->battery.name = NULL;
 }
 #else  /* !CONFIG_HID_BATTERY_STRENGTH */
-static void hidinput_setup_battery(struct hid_device *dev, s32 min, s32 max)
+static void hidinput_setup_battery(struct hid_device *dev, unsigned id, s32 min, s32 max)
 {
 }
 
@@ -723,6 +745,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 	case HID_UP_GENDEVCTRLS:
 		if ((usage->hid & HID_USAGE) == 0x20) {	/* Battery Strength */
 			hidinput_setup_battery(device,
+					       field->report->id,
 					       field->logical_minimum,
 					       field->logical_maximum);
 			goto ignore;
@@ -997,15 +1020,23 @@ static void report_features(struct hid_device *hid)
 	struct hid_report *rep;
 	int i, j;
 
-	if (!drv->feature_mapping)
-		return;
-
 	rep_enum = &hid->report_enum[HID_FEATURE_REPORT];
 	list_for_each_entry(rep, &rep_enum->report_list, list)
 		for (i = 0; i < rep->maxfield; i++)
-			for (j = 0; j < rep->field[i]->maxusage; j++)
-				drv->feature_mapping(hid, rep->field[i],
-						     rep->field[i]->usage + j);
+			for (j = 0; j < rep->field[i]->maxusage; j++) {
+				/* Verify if Battery Strength feature is available */
+				if (((rep->field[i]->usage + j)->hid & HID_USAGE_PAGE) == HID_UP_GENDEVCTRLS &&
+					((rep->field[i]->usage + j)->hid & HID_USAGE) == 0x20) {
+					hidinput_setup_battery(hid,
+							       rep->id,
+							       rep->field[i]->logical_minimum,
+							       rep->field[i]->logical_maximum);
+				}
+
+				if (drv->feature_mapping)
+					drv->feature_mapping(hid, rep->field[i],
+							     rep->field[i]->usage + j);
+			}
 }
 
 /*
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 7f344c3da767..b5df198d87a5 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -496,6 +496,7 @@ struct hid_device {							/* device report descriptor */
 	__s32 battery_min;
 	__s32 battery_max;
 	__s32 battery_val;
+	__s32 battery_report_id;
 #endif
 
 	unsigned int status;						/* see STAT flags above */
-- 
cgit v1.2.3


From bbc21cfd55858d7c3e55bfaa91fa934b0b13ad4d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Fri, 2 Dec 2011 11:12:36 -0800
Subject: hid-input/battery: add quirks for battery

Some devices always report percentage, despite having 0/255 as their
min/max, so add a quirk for them.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
---
 drivers/hid/hid-core.c  |  2 +-
 drivers/hid/hid-input.c | 41 +++++++++++++++++++++++++++++++++++++----
 include/linux/hid.h     |  2 ++
 3 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index c0ef2b49a00c..aa4a30b7c6af 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1157,7 +1157,7 @@ static bool hid_match_one_id(struct hid_device *hdev,
 		(id->product == HID_ANY_ID || id->product == hdev->product);
 }
 
-static const struct hid_device_id *hid_match_id(struct hid_device *hdev,
+const struct hid_device_id *hid_match_id(struct hid_device *hdev,
 		const struct hid_device_id *id)
 {
 	for (; id->bus; id++)
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index b108ce71583f..69dec476883a 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -32,6 +32,8 @@
 #include <linux/hid.h>
 #include <linux/hid-debug.h>
 
+#include "hid-ids.h"
+
 #define unk	KEY_UNKNOWN
 
 static const unsigned char hid_keyboard[256] = {
@@ -280,6 +282,28 @@ static enum power_supply_property hidinput_battery_props[] = {
 	POWER_SUPPLY_PROP_STATUS
 };
 
+#define HID_BATTERY_QUIRK_PERCENT	(1 << 0) /* always reports percent */
+
+static const struct hid_device_id hid_battery_quirks[] = {
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE),
+	  HID_BATTERY_QUIRK_PERCENT },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD),
+	  HID_BATTERY_QUIRK_PERCENT },
+	{}
+};
+
+static unsigned find_battery_quirk(struct hid_device *hdev)
+{
+	unsigned quirks = 0;
+	const struct hid_device_id *match;
+
+	match = hid_match_id(hdev, hid_battery_quirks);
+	if (match != NULL)
+		quirks = match->driver_data;
+
+	return quirks;
+}
+
 static int hidinput_get_battery_property(struct power_supply *psy,
 					 enum power_supply_property prop,
 					 union power_supply_propval *val)
@@ -304,10 +328,11 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 			break;
 		}
 
-		/* store the returned value */
-		/* I'm not calculating this using the logical_minimum and maximum */
-		/* because my device returns 0-100 even though the min and max are 0-255 */
-		val->intval = buf[1];
+		if (dev->battery_min < dev->battery_max &&
+		    buf[1] >= dev->battery_min &&
+		    buf[1] <= dev->battery_max)
+			val->intval = (100 * (buf[1] - dev->battery_min)) /
+				(dev->battery_max - dev->battery_min);
 		break;
 
 	case POWER_SUPPLY_PROP_MODEL_NAME:
@@ -330,6 +355,7 @@ static void hidinput_setup_battery(struct hid_device *dev, unsigned id, s32 min,
 {
 	struct power_supply *battery = &dev->battery;
 	int ret;
+	unsigned quirks;
 
 	if (battery->name != NULL)
 		return;		/* already initialized? */
@@ -344,6 +370,13 @@ static void hidinput_setup_battery(struct hid_device *dev, unsigned id, s32 min,
 	battery->use_for_apm = 0;
 	battery->get_property = hidinput_get_battery_property;
 
+	quirks = find_battery_quirk(dev);
+
+	if (quirks & HID_BATTERY_QUIRK_PERCENT) {
+		min = 0;
+		max = 100;
+	}
+
 	dev->battery_min = min;
 	dev->battery_max = max;
 	dev->battery_report_id = id;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index b5df198d87a5..fa772c86fa2c 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -735,6 +735,8 @@ int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
 int hid_check_keys_pressed(struct hid_device *hid);
 int hid_connect(struct hid_device *hid, unsigned int connect_mask);
 void hid_disconnect(struct hid_device *hid);
+const struct hid_device_id *hid_match_id(struct hid_device *hdev,
+					 const struct hid_device_id *id);
 
 /**
  * hid_map_usage - map usage input bits
-- 
cgit v1.2.3


From fb8ac91b4dccbdda0ad51d499079d05143783ba4 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Fri, 2 Dec 2011 11:18:45 -0800
Subject: hid-input/battery: deal with both FEATURE and INPUT report batteries

Some devices seem to report batteries as FEATUREs, others as INPUTs.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
---
 drivers/hid/hid-input.c | 20 +++++++++++++-------
 include/linux/hid.h     |  1 +
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 69dec476883a..f5c3efcdcb02 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -312,7 +312,6 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 	int ret = 0;
 	int ret_rep;
 	__u8 buf[2] = {};
-	unsigned char report_number = dev->battery_report_id;
 
 	switch (prop) {
 	case POWER_SUPPLY_PROP_PRESENT:
@@ -321,8 +320,9 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 		break;
 
 	case POWER_SUPPLY_PROP_CAPACITY:
-		ret_rep = dev->hid_get_raw_report(dev, report_number,
-						  buf, sizeof(buf), HID_FEATURE_REPORT);
+		ret_rep = dev->hid_get_raw_report(dev, dev->battery_report_id,
+						  buf, sizeof(buf),
+						  dev->battery_report_type);
 		if (ret_rep != 2) {
 			ret = -EINVAL;
 			break;
@@ -351,7 +351,9 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 	return ret;
 }
 
-static void hidinput_setup_battery(struct hid_device *dev, unsigned id, s32 min, s32 max)
+static void hidinput_setup_battery(struct hid_device *dev,
+				   unsigned report_type, unsigned report_id,
+				   s32 min, s32 max)
 {
 	struct power_supply *battery = &dev->battery;
 	int ret;
@@ -379,7 +381,8 @@ static void hidinput_setup_battery(struct hid_device *dev, unsigned id, s32 min,
 
 	dev->battery_min = min;
 	dev->battery_max = max;
-	dev->battery_report_id = id;
+	dev->battery_report_type = report_type;
+	dev->battery_report_id = report_id;
 
 	ret = power_supply_register(&dev->dev, battery);
 	if (ret != 0) {
@@ -399,7 +402,9 @@ static void hidinput_cleanup_battery(struct hid_device *dev)
 	dev->battery.name = NULL;
 }
 #else  /* !CONFIG_HID_BATTERY_STRENGTH */
-static void hidinput_setup_battery(struct hid_device *dev, unsigned id, s32 min, s32 max)
+static void hidinput_setup_battery(struct hid_device *dev,
+				   unsigned report_type, unsigned report_id,
+				   s32 min, s32 max)
 {
 }
 
@@ -769,6 +774,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 	case HID_UP_GENDEVCTRLS:
 		if ((usage->hid & HID_USAGE) == 0x20) {	/* Battery Strength */
 			hidinput_setup_battery(device,
+					       HID_INPUT_REPORT,
 					       field->report->id,
 					       field->logical_minimum,
 					       field->logical_maximum);
@@ -1052,7 +1058,7 @@ static void report_features(struct hid_device *hid)
 				if (((rep->field[i]->usage + j)->hid & HID_USAGE_PAGE) == HID_UP_GENDEVCTRLS &&
 					((rep->field[i]->usage + j)->hid & HID_USAGE) == 0x20) {
 					hidinput_setup_battery(hid,
-							       rep->id,
+							       HID_FEATURE_REPORT, rep->id,
 							       rep->field[i]->logical_minimum,
 							       rep->field[i]->logical_maximum);
 				}
diff --git a/include/linux/hid.h b/include/linux/hid.h
index fa772c86fa2c..9351d3d1d089 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -496,6 +496,7 @@ struct hid_device {							/* device report descriptor */
 	__s32 battery_min;
 	__s32 battery_max;
 	__s32 battery_val;
+	__s32 battery_report_type;
 	__s32 battery_report_id;
 #endif
 
-- 
cgit v1.2.3


From ce63920b395f1476e2d28cca16a56919289f0b62 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Fri, 2 Dec 2011 21:57:50 -0800
Subject: hid-input/battery: remove battery_val

hidinput_get_battery_property() now directly polls the device for the
current battery strength, so there's no need for battery_val, or the
code to set it on the input event path.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
---
 drivers/hid/hid-input.c | 8 --------
 include/linux/hid.h     | 1 -
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index ceade58b8027..48785db10e87 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -917,14 +917,6 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
 
 	input = field->hidinput->input;
 
-#ifdef CONFIG_HID_BATTERY_STRENGTH
-	if (usage->hid == HID_DC_BATTERYSTRENGTH) {
-		hid->battery_val = value;
-		hid_dbg(hid, "battery value is %d (range %d-%d)\n",
-			value, hid->battery_min, hid->battery_max);
-		return;
-	}
-#endif
 	if (!usage->type)
 		return;
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 9351d3d1d089..0e76f0ca110a 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -495,7 +495,6 @@ struct hid_device {							/* device report descriptor */
 	struct power_supply battery;
 	__s32 battery_min;
 	__s32 battery_max;
-	__s32 battery_val;
 	__s32 battery_report_type;
 	__s32 battery_report_id;
 #endif
-- 
cgit v1.2.3


From 73de16db43f8dcb833ab032ed274b60b23676680 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Tue, 8 Nov 2011 09:44:06 +0530
Subject: mfd: Add support for irq over gpio pin to stmpe

On many boards, stmpe is present as an separate device (not as part of SoC).
Here gpio lines are mostly used for getting interrupts. This patch adds in
support to handle irq over gpio pin.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe.c       | 36 +++++++++++++++++++++++++++++-------
 include/linux/mfd/stmpe.h |  7 +++++++
 2 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 2963689cf45c..39efa629a19d 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -5,6 +5,7 @@
  * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
  */
 
+#include <linux/gpio.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -877,9 +878,10 @@ static int __devinit stmpe_devices_init(struct stmpe *stmpe)
 static int stmpe_suspend(struct device *dev)
 {
 	struct i2c_client *i2c = to_i2c_client(dev);
+	struct stmpe *stmpe = i2c_get_clientdata(i2c);
 
 	if (device_may_wakeup(&i2c->dev))
-		enable_irq_wake(i2c->irq);
+		enable_irq_wake(stmpe->irq);
 
 	return 0;
 }
@@ -887,9 +889,10 @@ static int stmpe_suspend(struct device *dev)
 static int stmpe_resume(struct device *dev)
 {
 	struct i2c_client *i2c = to_i2c_client(dev);
+	struct stmpe *stmpe = i2c_get_clientdata(i2c);
 
 	if (device_may_wakeup(&i2c->dev))
-		disable_irq_wake(i2c->irq);
+		disable_irq_wake(stmpe->irq);
 
 	return 0;
 }
@@ -925,15 +928,28 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 
 	i2c_set_clientdata(i2c, stmpe);
 
+	if (pdata->irq_over_gpio) {
+		ret = gpio_request_one(pdata->irq_gpio, GPIOF_DIR_IN, "stmpe");
+		if (ret) {
+			dev_err(stmpe->dev, "failed to request IRQ GPIO: %d\n",
+					ret);
+			goto out_free;
+		}
+
+		stmpe->irq = gpio_to_irq(pdata->irq_gpio);
+	} else {
+		stmpe->irq = i2c->irq;
+	}
+
 	ret = stmpe_chip_init(stmpe);
 	if (ret)
-		goto out_free;
+		goto free_gpio;
 
 	ret = stmpe_irq_init(stmpe);
 	if (ret)
-		goto out_free;
+		goto free_gpio;
 
-	ret = request_threaded_irq(stmpe->i2c->irq, NULL, stmpe_irq,
+	ret = request_threaded_irq(stmpe->irq, NULL, stmpe_irq,
 				   pdata->irq_trigger | IRQF_ONESHOT,
 				   "stmpe", stmpe);
 	if (ret) {
@@ -951,9 +967,12 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 
 out_removedevs:
 	mfd_remove_devices(stmpe->dev);
-	free_irq(stmpe->i2c->irq, stmpe);
+	free_irq(stmpe->irq, stmpe);
 out_removeirq:
 	stmpe_irq_remove(stmpe);
+free_gpio:
+	if (pdata->irq_over_gpio)
+		gpio_free(pdata->irq_gpio);
 out_free:
 	kfree(stmpe);
 	return ret;
@@ -965,9 +984,12 @@ static int __devexit stmpe_remove(struct i2c_client *client)
 
 	mfd_remove_devices(stmpe->dev);
 
-	free_irq(stmpe->i2c->irq, stmpe);
+	free_irq(stmpe->irq, stmpe);
 	stmpe_irq_remove(stmpe);
 
+	if (stmpe->pdata->irq_over_gpio)
+		gpio_free(stmpe->pdata->irq_gpio);
+
 	kfree(stmpe);
 
 	return 0;
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index be1af7c42e57..270d6613aadf 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -61,6 +61,7 @@ struct stmpe_variant_info;
  * @variant: the detected STMPE model number
  * @regs: list of addresses of registers which are at different addresses on
  *	  different variants.  Indexed by one of STMPE_IDX_*.
+ * @irq: irq number for stmpe
  * @irq_base: starting IRQ number for internal IRQs
  * @num_gpios: number of gpios, differs for variants
  * @ier: cache of IER registers for bus_lock
@@ -76,6 +77,7 @@ struct stmpe {
 	struct stmpe_variant_info *variant;
 	const u8 *regs;
 
+	int irq;
 	int irq_base;
 	int num_gpios;
 	u8 ier[2];
@@ -183,6 +185,9 @@ struct stmpe_ts_platform_data {
  * @autosleep_timeout: inactivity timeout in milliseconds for autosleep
  * @irq_base: base IRQ number.  %STMPE_NR_IRQS irqs will be used, or
  *	      %STMPE_NR_INTERNAL_IRQS if the GPIO driver is not used.
+ * @irq_over_gpio: true if gpio is used to get irq
+ * @irq_gpio: gpio number over which irq will be requested (significant only if
+ *	      irq_over_gpio is true)
  * @gpio: GPIO-specific platform data
  * @keypad: keypad-specific platform data
  * @ts: touchscreen-specific platform data
@@ -194,6 +199,8 @@ struct stmpe_platform_data {
 	unsigned int irq_trigger;
 	bool irq_invert_polarity;
 	bool autosleep;
+	bool irq_over_gpio;
+	int irq_gpio;
 	int autosleep_timeout;
 
 	struct stmpe_gpio_platform_data *gpio;
-- 
cgit v1.2.3


From 289aabdaf943f3676a16908e2c3cc1a1f9877ccb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 3 Nov 2011 13:41:14 +0000
Subject: mfd: Disable more pulls on WM8994

Disable more pulls by default on WM8994 for a small current saving. Since
some designs do leave SPKMODE floating provide platform data to allow that
to be left enabled.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c        | 11 ++++++++---
 include/linux/mfd/wm8994/pdata.h |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index ff373dcda2c7..776298b313ab 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -374,6 +374,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 	struct wm8994_pdata *pdata = wm8994->dev->platform_data;
 	const char *devname;
 	int ret, i;
+	int pulls = 0;
 
 	dev_set_drvdata(wm8994->dev, wm8994);
 
@@ -516,12 +517,16 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 		}
 
 		wm8994->ldo_ena_always_driven = pdata->ldo_ena_always_driven;
+
+		if (pdata->spkmode_pu)
+			pulls |= WM8994_SPKMODE_PU;
 	}
 
-	/* Disable LDO pulldowns while the device is active */
+	/* Disable unneeded pulls */
 	wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
-			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
-			0);
+			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD |
+			WM8994_SPKMODE_PU | WM8994_CSNADDR_PD,
+			pulls);
 
 	/* In some system designs where the regulators are not in use,
 	 * we can achieve a small reduction in leakage currents by
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index ea32f306dca6..54e2fef587d5 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -174,6 +174,12 @@ struct wm8994_pdata {
 	 * consumption will rise.
 	 */
 	bool ldo_ena_always_driven;
+
+	/*
+	 * SPKMODE must be pulled internally by the device on this
+	 * system.
+	 */
+	bool spkmode_pu;
 };
 
 #endif
-- 
cgit v1.2.3


From b46a36c0e0adc92c8be2c8a6fa68d979f6eee124 Mon Sep 17 00:00:00 2001
From: "Jett.Zhou" <jtzhou@marvell.com>
Date: Fri, 11 Nov 2011 15:38:27 +0800
Subject: mfd: Convert 88pm860x to use regmap api

Convert the 88pm860x normal bank register read/write to
use the register map API.

Signed-off-by: Jett.Zhou <jtzhou@marvell.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-i2c.c   | 105 ++++++++++++++++++-------------------------
 drivers/mfd/Kconfig          |   1 +
 include/linux/mfd/88pm860x.h |   3 +-
 3 files changed, 47 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm860x-i2c.c b/drivers/mfd/88pm860x-i2c.c
index f629d6f4e3e9..630f1b545fc4 100644
--- a/drivers/mfd/88pm860x-i2c.c
+++ b/drivers/mfd/88pm860x-i2c.c
@@ -12,51 +12,20 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/regmap.h>
 #include <linux/mfd/88pm860x.h>
 #include <linux/slab.h>
 
-static inline int pm860x_read_device(struct i2c_client *i2c,
-				     int reg, int bytes, void *dest)
-{
-	unsigned char data;
-	int ret;
-
-	data = (unsigned char)reg;
-	ret = i2c_master_send(i2c, &data, 1);
-	if (ret < 0)
-		return ret;
-
-	ret = i2c_master_recv(i2c, dest, bytes);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-static inline int pm860x_write_device(struct i2c_client *i2c,
-				      int reg, int bytes, void *src)
-{
-	unsigned char buf[bytes + 1];
-	int ret;
-
-	buf[0] = (unsigned char)reg;
-	memcpy(&buf[1], src, bytes);
-
-	ret = i2c_master_send(i2c, buf, bytes + 1);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
 int pm860x_reg_read(struct i2c_client *i2c, int reg)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
-	unsigned char data;
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
+	unsigned int data;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_read_device(i2c, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_read(map, reg, &data);
 	if (ret < 0)
 		return ret;
 	else
@@ -68,12 +37,11 @@ int pm860x_reg_write(struct i2c_client *i2c, int reg,
 		     unsigned char data)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_write_device(i2c, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_write(map, reg, data);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_reg_write);
@@ -82,12 +50,11 @@ int pm860x_bulk_read(struct i2c_client *i2c, int reg,
 		     int count, unsigned char *buf)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_read_device(i2c, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_raw_read(map, reg, buf, count);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_bulk_read);
@@ -96,12 +63,11 @@ int pm860x_bulk_write(struct i2c_client *i2c, int reg,
 		      int count, unsigned char *buf)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_write_device(i2c, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_raw_write(map, reg, buf, count);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_bulk_write);
@@ -110,18 +76,11 @@ int pm860x_set_bits(struct i2c_client *i2c, int reg,
 		    unsigned char mask, unsigned char data)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
-	unsigned char value;
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_read_device(i2c, reg, 1, &value);
-	if (ret < 0)
-		goto out;
-	value &= ~mask;
-	value |= data;
-	ret = pm860x_write_device(i2c, reg, 1, &value);
-out:
-	mutex_unlock(&chip->io_lock);
+	ret = regmap_update_bits(map, reg, mask, data);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_set_bits);
@@ -300,11 +259,17 @@ static int verify_addr(struct i2c_client *i2c)
 	return 0;
 }
 
+static struct regmap_config pm860x_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
 static int __devinit pm860x_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct pm860x_platform_data *pdata = client->dev.platform_data;
 	struct pm860x_chip *chip;
+	int ret;
 
 	if (!pdata) {
 		pr_info("No platform data in %s!\n", __func__);
@@ -316,10 +281,16 @@ static int __devinit pm860x_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	chip->id = verify_addr(client);
+	chip->regmap = regmap_init_i2c(client, &pm860x_regmap_config);
+	if (IS_ERR(chip->regmap)) {
+		ret = PTR_ERR(chip->regmap);
+		dev_err(&client->dev, "Failed to allocate register map: %d\n",
+				ret);
+		return ret;
+	}
 	chip->client = client;
 	i2c_set_clientdata(client, chip);
 	chip->dev = &client->dev;
-	mutex_init(&chip->io_lock);
 	dev_set_drvdata(chip->dev, chip);
 
 	/*
@@ -333,6 +304,14 @@ static int __devinit pm860x_probe(struct i2c_client *client,
 		chip->companion_addr = pdata->companion_addr;
 		chip->companion = i2c_new_dummy(chip->client->adapter,
 						chip->companion_addr);
+		chip->regmap_companion = regmap_init_i2c(chip->companion,
+							&pm860x_regmap_config);
+		if (IS_ERR(chip->regmap_companion)) {
+			ret = PTR_ERR(chip->regmap_companion);
+			dev_err(&chip->companion->dev,
+				"Failed to allocate register map: %d\n", ret);
+			return ret;
+		}
 		i2c_set_clientdata(chip->companion, chip);
 	}
 
@@ -345,7 +324,11 @@ static int __devexit pm860x_remove(struct i2c_client *client)
 	struct pm860x_chip *chip = i2c_get_clientdata(client);
 
 	pm860x_device_exit(chip);
-	i2c_unregister_device(chip->companion);
+	if (chip->companion) {
+		regmap_exit(chip->regmap_companion);
+		i2c_unregister_device(chip->companion);
+	}
+	regmap_exit(chip->regmap);
 	kfree(chip);
 	return 0;
 }
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f1391c21ef26..c9acd32fc0a4 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -12,6 +12,7 @@ config MFD_CORE
 config MFD_88PM860X
 	bool "Support Marvell 88PM8606/88PM8607"
 	depends on I2C=y && GENERIC_HARDIRQS
+	select REGMAP_I2C
 	select MFD_CORE
 	help
 	  This supports for Marvell 88PM8606/88PM8607 Power Management IC.
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index 63b4fb8e3b6f..92be3476c9f5 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -297,10 +297,11 @@ enum {
 
 struct pm860x_chip {
 	struct device		*dev;
-	struct mutex		io_lock;
 	struct mutex		irq_lock;
 	struct i2c_client	*client;
 	struct i2c_client	*companion;	/* companion chip client */
+	struct regmap           *regmap;
+	struct regmap           *regmap_companion;
 
 	int			buck3_double;	/* DVC ramp slope double */
 	unsigned short		companion_addr;
-- 
cgit v1.2.3


From 2439d9714e8f4f106c8965e093f9892cfb347d76 Mon Sep 17 00:00:00 2001
From: Donggeun Kim <dg77.kim@samsung.com>
Date: Thu, 24 Nov 2011 18:12:17 +0900
Subject: mfd: Add platform data for MAX8997 MUIC driver

Currently, MAX8997 device does not support MUIC function of it.
To add MAX8997 MUIC driver, header file should be updated.

Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/max8997.h | 58 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 0bbd13dbe336..49d2a0bfd7fe 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -77,6 +77,60 @@ struct max8997_regulator_data {
 	struct regulator_init_data *initdata;
 };
 
+enum max8997_muic_usb_type {
+	MAX8997_USB_HOST,
+	MAX8997_USB_DEVICE,
+};
+
+enum max8997_muic_charger_type {
+	MAX8997_CHARGER_TYPE_NONE = 0,
+	MAX8997_CHARGER_TYPE_USB,
+	MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT,
+	MAX8997_CHARGER_TYPE_DEDICATED_CHG,
+	MAX8997_CHARGER_TYPE_500MA,
+	MAX8997_CHARGER_TYPE_1A,
+	MAX8997_CHARGER_TYPE_DEAD_BATTERY = 7,
+};
+
+struct max8997_muic_reg_data {
+	u8 addr;
+	u8 data;
+};
+
+/**
+ * struct max8997_muic_platform_data
+ * @usb_callback: callback function for USB
+ *		  inform callee of USB type (HOST or DEVICE)
+ *		  and attached state(true or false)
+ * @charger_callback: callback function for charger
+ *		  inform callee of charger_type
+ *		  and attached state(true or false)
+ * @deskdock_callback: callback function for desk dock
+ *		  inform callee of attached state(true or false)
+ * @cardock_callback: callback function for car dock
+ *		  inform callee of attached state(true or false)
+ * @mhl_callback: callback function for MHL (Mobile High-definition Link)
+ *		  inform callee of attached state(true or false)
+ * @uart_callback: callback function for JIG UART
+ *		   inform callee of attached state(true or false)
+ * @init_data: array of max8997_muic_reg_data
+ *	       used for initializing registers of MAX8997 MUIC device
+ * @num_init_data: array size of init_data
+ */
+struct max8997_muic_platform_data {
+	void (*usb_callback)(enum max8997_muic_usb_type usb_type,
+		bool attached);
+	void (*charger_callback)(bool attached,
+		enum max8997_muic_charger_type charger_type);
+	void (*deskdock_callback) (bool attached);
+	void (*cardock_callback) (bool attached);
+	void (*mhl_callback) (bool attached);
+	void (*uart_callback) (bool attached);
+
+	struct max8997_muic_reg_data *init_data;
+	int num_init_data;
+};
+
 struct max8997_platform_data {
 	/* IRQ */
 	int irq_base;
@@ -113,7 +167,9 @@ struct max8997_platform_data {
 	/* charge Full Timeout */
 	int timeout; /* 0 (no timeout), 5, 6, 7 hours */
 
-	/* MUIC: Not implemented */
+	/* ---- MUIC ---- */
+	struct max8997_muic_platform_data *muic_pdata;
+
 	/* HAPTIC: Not implemented */
 	/* RTC: Not implemented */
 	/* Flash: Not implemented */
-- 
cgit v1.2.3


From 2161891a0a7bcad6ee8819bb324ee4a031bc8a95 Mon Sep 17 00:00:00 2001
From: Robin van der Gracht <robin@protonic.nl>
Date: Tue, 29 Nov 2011 12:09:03 +0100
Subject: mfd: Fixed unconditional reset of the mc13xxx ADC reading enable bits

When the ADC is being prepared for a single or multiple channel reading,
the adc0 register is reconfigured without taking the lithium cell, charge
current and battery current reading enable bits into account. Which results
in clearing the bits.

Signed-off-by: Robin van der Gracht <robin@protonic.nl>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/mc13xxx-core.c  | 4 ++--
 include/linux/mfd/mc13xxx.h | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index 441738048458..d0d3dfafba5c 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c
@@ -615,13 +615,13 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
 		break;
 
 	case MC13XXX_ADC_MODE_SINGLE_CHAN:
-		adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK;
+		adc0 |= old_adc0 & MC13XXX_ADC0_CONFIG_MASK;
 		adc1 |= (channel & 0x7) << MC13XXX_ADC1_CHAN0_SHIFT;
 		adc1 |= MC13XXX_ADC1_RAND;
 		break;
 
 	case MC13XXX_ADC_MODE_MULT_CHAN:
-		adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK;
+		adc0 |= old_adc0 & MC13XXX_ADC0_CONFIG_MASK;
 		adc1 |= 4 << MC13XXX_ADC1_CHAN1_SHIFT;
 		break;
 
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index 3816c2fac0ad..261fc117b40a 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -173,6 +173,9 @@ struct mc13xxx_platform_data {
 #define MC13XXX_ADC_MODE_MULT_CHAN	3
 
 #define MC13XXX_ADC0		43
+#define MC13XXX_ADC0_LICELLCON		(1 << 0)
+#define MC13XXX_ADC0_CHRGICON		(1 << 1)
+#define MC13XXX_ADC0_BATICON		(1 << 2)
 #define MC13XXX_ADC0_ADREFEN		(1 << 10)
 #define MC13XXX_ADC0_TSMOD0		(1 << 12)
 #define MC13XXX_ADC0_TSMOD1		(1 << 13)
@@ -184,4 +187,9 @@ struct mc13xxx_platform_data {
 					MC13XXX_ADC0_TSMOD1 | \
 					MC13XXX_ADC0_TSMOD2)
 
+#define MC13XXX_ADC0_CONFIG_MASK	(MC13XXX_ADC0_TSMOD_MASK | \
+					MC13XXX_ADC0_LICELLCON | \
+					MC13XXX_ADC0_CHRGICON | \
+					MC13XXX_ADC0_BATICON)
+
 #endif /* ifndef __LINUX_MFD_MC13XXX_H */
-- 
cgit v1.2.3


From 5dd7bf59e0e8563265b3e5b33276099ef628fcc7 Mon Sep 17 00:00:00 2001
From: Jochen Friedrich <jochen@scram.de>
Date: Sun, 27 Nov 2011 22:00:54 +0100
Subject: ARM: sa11x0: Implement autoloading of codec and codec pdata for mcp
 bus.

Signed-off-by: Jochen Friedrich <jochen@scram.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/mach-sa1100/assabet.c          |  1 +
 arch/arm/mach-sa1100/cerf.c             |  1 +
 arch/arm/mach-sa1100/collie.c           |  8 +++++-
 arch/arm/mach-sa1100/include/mach/mcp.h |  2 ++
 arch/arm/mach-sa1100/lart.c             |  1 +
 arch/arm/mach-sa1100/shannon.c          |  1 +
 arch/arm/mach-sa1100/simpad.c           |  8 +++++-
 drivers/mfd/mcp-core.c                  | 44 ++++++++++++++++++++++++++++--
 drivers/mfd/mcp-sa11x0.c                |  7 +++--
 drivers/mfd/ucb1x00-core.c              | 48 +++++++++++++++++++++++++--------
 drivers/mfd/ucb1x00-ts.c                |  2 +-
 include/linux/mfd/mcp.h                 |  7 +++--
 include/linux/mfd/ucb1x00.h             |  5 +++-
 include/linux/mod_devicetable.h         | 11 ++++++++
 scripts/mod/file2alias.c                | 13 +++++++++
 15 files changed, 138 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 3dd133f18415..14b31f116ef9 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -202,6 +202,7 @@ static struct irda_platform_data assabet_irda_data = {
 static struct mcp_plat_data assabet_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init assabet_init(void)
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index 7f3da4b11ec9..b7db7cd08305 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -124,6 +124,7 @@ static void __init cerf_map_io(void)
 static struct mcp_plat_data cerf_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init cerf_init(void)
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 2965cc9d424e..b0b5efee683b 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -27,6 +27,7 @@
 #include <linux/timer.h>
 #include <linux/gpio.h>
 #include <linux/pda_power.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -85,10 +86,15 @@ static struct scoop_pcmcia_config collie_pcmcia_config = {
 	.num_devs	= 1,
 };
 
+static struct ucb1x00_plat_data collie_ucb1x00_data = {
+	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
+};
+
 static struct mcp_plat_data collie_mcp_data = {
 	.mccr0		= MCCR0_ADM | MCCR0_ExtClk,
 	.sclk_rate	= 9216000,
-	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
+	.codec		= "ucb1x00",
+	.codec_pdata	= &collie_ucb1x00_data,
 };
 
 /*
diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h
index ed1a331508a7..586cec898b35 100644
--- a/arch/arm/mach-sa1100/include/mach/mcp.h
+++ b/arch/arm/mach-sa1100/include/mach/mcp.h
@@ -17,6 +17,8 @@ struct mcp_plat_data {
 	u32 mccr1;
 	unsigned int sclk_rate;
 	int gpio_base;
+	const char *codec;
+	void *codec_pdata;
 };
 
 #endif
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index 5bc59d0947ba..34bbdd986e43 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -24,6 +24,7 @@
 static struct mcp_plat_data lart_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init lart_init(void)
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 1cccbf5b9e9a..252faa5e2395 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -55,6 +55,7 @@ static struct resource shannon_flash_resource = {
 static struct mcp_plat_data shannon_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init shannon_init(void)
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 4790f3f3d008..7eac8ebab94e 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -14,6 +14,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <asm/irq.h>
 #include <mach/hardware.h>
@@ -187,10 +188,15 @@ static struct resource simpad_flash_resources [] = {
 	}
 };
 
+static struct ucb1x00_plat_data simpad_ucb1x00_data = {
+	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
+};
+
 static struct mcp_plat_data simpad_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
+	.codec		= "ucb1300",
+	.codec_pdata	= &simpad_ucb1x00_data,
 };
 
 
diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c
index 84815f9ef636..63be60bc3455 100644
--- a/drivers/mfd/mcp-core.c
+++ b/drivers/mfd/mcp-core.c
@@ -26,9 +26,35 @@
 #define to_mcp(d)		container_of(d, struct mcp, attached_device)
 #define to_mcp_driver(d)	container_of(d, struct mcp_driver, drv)
 
+static const struct mcp_device_id *mcp_match_id(const struct mcp_device_id *id,
+						const char *codec)
+{
+	while (id->name[0]) {
+		if (strcmp(codec, id->name) == 0)
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
+const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp)
+{
+	const struct mcp_driver *driver =
+		to_mcp_driver(mcp->attached_device.driver);
+
+	return mcp_match_id(driver->id_table, mcp->codec);
+}
+EXPORT_SYMBOL(mcp_get_device_id);
+
 static int mcp_bus_match(struct device *dev, struct device_driver *drv)
 {
-	return 1;
+	const struct mcp *mcp = to_mcp(dev);
+	const struct mcp_driver *driver = to_mcp_driver(drv);
+
+	if (driver->id_table)
+		return !!mcp_match_id(driver->id_table, mcp->codec);
+
+	return 0;
 }
 
 static int mcp_bus_probe(struct device *dev)
@@ -74,9 +100,18 @@ static int mcp_bus_resume(struct device *dev)
 	return ret;
 }
 
+static int mcp_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct mcp *mcp = to_mcp(dev);
+
+	add_uevent_var(env, "MODALIAS=%s%s", MCP_MODULE_PREFIX, mcp->codec);
+	return 0;
+}
+
 static struct bus_type mcp_bus_type = {
 	.name		= "mcp",
 	.match		= mcp_bus_match,
+	.uevent		= mcp_bus_uevent,
 	.probe		= mcp_bus_probe,
 	.remove		= mcp_bus_remove,
 	.suspend	= mcp_bus_suspend,
@@ -212,9 +247,14 @@ struct mcp *mcp_host_alloc(struct device *parent, size_t size)
 }
 EXPORT_SYMBOL(mcp_host_alloc);
 
-int mcp_host_register(struct mcp *mcp)
+int mcp_host_register(struct mcp *mcp, void *pdata)
 {
+	if (!mcp->codec)
+		return -EINVAL;
+
+	mcp->attached_device.platform_data = pdata;
 	dev_set_name(&mcp->attached_device, "mcp0");
+	request_module("%s%s", MCP_MODULE_PREFIX, mcp->codec);
 	return device_register(&mcp->attached_device);
 }
 EXPORT_SYMBOL(mcp_host_register);
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 02c53a0766c4..da4e077a1bee 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -146,6 +146,9 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENODEV;
 
+	if (!data->codec)
+		return -ENODEV;
+
 	if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp"))
 		return -EBUSY;
 
@@ -162,7 +165,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->dma_audio_wr	= DMA_Ser4MCP0Wr;
 	mcp->dma_telco_rd	= DMA_Ser4MCP1Rd;
 	mcp->dma_telco_wr	= DMA_Ser4MCP1Wr;
-	mcp->gpio_base		= data->gpio_base;
+	mcp->codec		= data->codec;
 
 	platform_set_drvdata(pdev, mcp);
 
@@ -195,7 +198,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->rw_timeout = (64 * 3 * 1000000 + mcp->sclk_rate - 1) /
 			  mcp->sclk_rate;
 
-	ret = mcp_host_register(mcp);
+	ret = mcp_host_register(mcp, data->codec_pdata);
 	if (ret == 0)
 		goto out;
 
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index b281217334eb..91c4f25e0e55 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -36,6 +36,15 @@ static DEFINE_MUTEX(ucb1x00_mutex);
 static LIST_HEAD(ucb1x00_drivers);
 static LIST_HEAD(ucb1x00_devices);
 
+static struct mcp_device_id ucb1x00_id[] = {
+	{ "ucb1x00", 0 },  /* auto-detection */
+	{ "ucb1200", UCB_ID_1200 },
+	{ "ucb1300", UCB_ID_1300 },
+	{ "tc35143", UCB_ID_TC35143 },
+	{ }
+};
+MODULE_DEVICE_TABLE(mcp, ucb1x00_id);
+
 /**
  *	ucb1x00_io_set_dir - set IO direction
  *	@ucb: UCB1x00 structure describing chip
@@ -527,17 +536,33 @@ static struct class ucb1x00_class = {
 
 static int ucb1x00_probe(struct mcp *mcp)
 {
+	const struct mcp_device_id *mid;
 	struct ucb1x00 *ucb;
 	struct ucb1x00_driver *drv;
+	struct ucb1x00_plat_data *pdata;
 	unsigned int id;
 	int ret = -ENODEV;
 	int temp;
 
 	mcp_enable(mcp);
 	id = mcp_reg_read(mcp, UCB_ID);
+	mid = mcp_get_device_id(mcp);
 
-	if (id != UCB_ID_1200 && id != UCB_ID_1300 && id != UCB_ID_TC35143) {
-		printk(KERN_WARNING "UCB1x00 ID not found: %04x\n", id);
+	if (mid && mid->driver_data) {
+		if (id != mid->driver_data) {
+			printk(KERN_WARNING "%s wrong ID %04x found: %04x\n",
+				mid->name, (unsigned int) mid->driver_data, id);
+			goto err_disable;
+		}
+	} else {
+		mid = &ucb1x00_id[1];
+		while (mid->driver_data) {
+			if (id == mid->driver_data)
+				break;
+			mid++;
+		}
+		printk(KERN_WARNING "%s ID not found: %04x\n",
+			ucb1x00_id[0].name, id);
 		goto err_disable;
 	}
 
@@ -546,28 +571,28 @@ static int ucb1x00_probe(struct mcp *mcp)
 	if (!ucb)
 		goto err_disable;
 
-
+	pdata = mcp->attached_device.platform_data;
 	ucb->dev.class = &ucb1x00_class;
 	ucb->dev.parent = &mcp->attached_device;
-	dev_set_name(&ucb->dev, "ucb1x00");
+	dev_set_name(&ucb->dev, mid->name);
 
 	spin_lock_init(&ucb->lock);
 	spin_lock_init(&ucb->io_lock);
 	sema_init(&ucb->adc_sem, 1);
 
-	ucb->id  = id;
+	ucb->id  = mid;
 	ucb->mcp = mcp;
 	ucb->irq = ucb1x00_detect_irq(ucb);
 	if (ucb->irq == NO_IRQ) {
-		printk(KERN_ERR "UCB1x00: IRQ probe failed\n");
+		printk(KERN_ERR "%s: IRQ probe failed\n", mid->name);
 		ret = -ENODEV;
 		goto err_free;
 	}
 
 	ucb->gpio.base = -1;
-	if (mcp->gpio_base != 0) {
+	if (pdata && (pdata->gpio_base >= 0)) {
 		ucb->gpio.label = dev_name(&ucb->dev);
-		ucb->gpio.base = mcp->gpio_base;
+		ucb->gpio.base = pdata->gpio_base;
 		ucb->gpio.ngpio = 10;
 		ucb->gpio.set = ucb1x00_gpio_set;
 		ucb->gpio.get = ucb1x00_gpio_get;
@@ -580,10 +605,10 @@ static int ucb1x00_probe(struct mcp *mcp)
 		dev_info(&ucb->dev, "gpio_base not set so no gpiolib support");
 
 	ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING,
-			  "UCB1x00", ucb);
+			  mid->name, ucb);
 	if (ret) {
-		printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
-			ucb->irq, ret);
+		printk(KERN_ERR "%s: unable to grab irq%d: %d\n",
+			mid->name, ucb->irq, ret);
 		goto err_gpio;
 	}
 
@@ -705,6 +730,7 @@ static struct mcp_driver ucb1x00_driver = {
 	.remove		= ucb1x00_remove,
 	.suspend	= ucb1x00_suspend,
 	.resume		= ucb1x00_resume,
+	.id_table	= ucb1x00_id,
 };
 
 static int __init ucb1x00_init(void)
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 38ffbd50a0d2..40ec3c118868 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -382,7 +382,7 @@ static int ucb1x00_ts_add(struct ucb1x00_dev *dev)
 	ts->adcsync = adcsync ? UCB_SYNC : UCB_NOSYNC;
 
 	idev->name       = "Touchscreen panel";
-	idev->id.product = ts->ucb->id;
+	idev->id.product = ts->ucb->id->driver_data;
 	idev->open       = ucb1x00_ts_open;
 	idev->close      = ucb1x00_ts_close;
 
diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index ee496708e38b..1515e64e3663 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -10,6 +10,7 @@
 #ifndef MCP_H
 #define MCP_H
 
+#include <linux/mod_devicetable.h>
 #include <mach/dma.h>
 
 struct mcp_ops;
@@ -26,7 +27,7 @@ struct mcp {
 	dma_device_t	dma_telco_rd;
 	dma_device_t	dma_telco_wr;
 	struct device	attached_device;
-	int		gpio_base;
+	const char	*codec;
 };
 
 struct mcp_ops {
@@ -44,10 +45,11 @@ void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
 unsigned int mcp_reg_read(struct mcp *, unsigned int);
 void mcp_enable(struct mcp *);
 void mcp_disable(struct mcp *);
+const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp);
 #define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
 
 struct mcp *mcp_host_alloc(struct device *, size_t);
-int mcp_host_register(struct mcp *);
+int mcp_host_register(struct mcp *, void *);
 void mcp_host_unregister(struct mcp *);
 
 struct mcp_driver {
@@ -56,6 +58,7 @@ struct mcp_driver {
 	void (*remove)(struct mcp *);
 	int (*suspend)(struct mcp *, pm_message_t);
 	int (*resume)(struct mcp *);
+	const struct mcp_device_id *id_table;
 };
 
 int mcp_driver_register(struct mcp_driver *);
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index 4321f044d1e4..bc19e5fb7ea8 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h
@@ -104,6 +104,9 @@
 #define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
 #define UCB_MODE_AUD_OFF_CAN	(1 << 13)
 
+struct ucb1x00_plat_data {
+	int		gpio_base;
+};
 
 struct ucb1x00_irq {
 	void *devid;
@@ -116,7 +119,7 @@ struct ucb1x00 {
 	unsigned int		irq;
 	struct semaphore	adc_sem;
 	spinlock_t		io_lock;
-	u16			id;
+	const struct mcp_device_id *id;
 	u16			io_dir;
 	u16			io_out;
 	u16			adc_cr;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 468819cdde87..bc50d9a80d89 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -436,6 +436,17 @@ struct spi_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
+/* mcp */
+
+#define MCP_NAME_SIZE	20
+#define MCP_MODULE_PREFIX "mcp:"
+
+struct mcp_device_id {
+	char name[MCP_NAME_SIZE];
+	kernel_ulong_t driver_data	/* Data private to the driver */
+			__attribute__((aligned(sizeof(kernel_ulong_t))));
+};
+
 /* dmi */
 enum dmi_field {
 	DMI_NONE,
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index f936d1fa969d..e8c7eb16c0ea 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -774,6 +774,15 @@ static int do_spi_entry(const char *filename, struct spi_device_id *id,
 	return 1;
 }
 
+/* Looks like: mcp:S */
+static int do_mcp_entry(const char *filename, struct mcp_device_id *id,
+			char *alias)
+{
+	sprintf(alias, MCP_MODULE_PREFIX "%s", id->name);
+
+	return 1;
+}
+
 static const struct dmifield {
 	const char *prefix;
 	int field;
@@ -1027,6 +1036,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct spi_device_id), "spi",
 			 do_spi_entry, mod);
+	else if (sym_is(symname, "__mod_mcp_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct mcp_device_id), "mcp",
+			 do_mcp_entry, mod);
 	else if (sym_is(symname, "__mod_dmi_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct dmi_system_id), "dmi",
-- 
cgit v1.2.3


From bdd6a67a3936f4da5ef382ec640b8d1eb1f209fa Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 28 Nov 2011 23:26:47 +0000
Subject: mfd: Constify WM8994 regulator_init_data

The driver has no need to modify the regulator_init_data so declare it
const to allow machine code to do so.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/pdata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 54e2fef587d5..b00897a6c461 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -23,7 +23,7 @@ struct wm8994_ldo_pdata {
 	int enable;
 
 	const char *supply;
-	struct regulator_init_data *init_data;
+	const struct regulator_init_data *init_data;
 };
 
 #define WM8994_CONFIGURE_GPIO 0x10000
-- 
cgit v1.2.3


From 83051b7287e43241ccb6adaaa92615a87274898b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Dec 2011 13:56:53 +0000
Subject: mfd: Add missing mutex.h inclusion to WM8994 core.h

struct wm8994 includes a mutex so we need to include mutex.h before we
declare it. All current users rely on this being done implicitly.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index f44bdb7273bd..8eb1b75cb755 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -15,6 +15,7 @@
 #ifndef __MFD_WM8994_CORE_H__
 #define __MFD_WM8994_CORE_H__
 
+#include <linux/mutex.h>
 #include <linux/interrupt.h>
 
 enum wm8994_type {
-- 
cgit v1.2.3


From ee66e653ca7425bc8ffca4e00f19a8057cd14e4d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 2 Dec 2011 14:16:33 +0100
Subject: mfd: Unify abx500 headers in mfd/abx500

This moves all the header files related to the abx500 family into
a common include directory below mfd. From now on we place any
subchip header in that directory. Headers previously in e.g.
<linux/mfd/ab8500/gpio.h> get prefixed and are now e.g.
<linux/mfd/abx500/ab8500-gpio.h>. The top-level abstract interface
remains in <linux/mfd/abx500.h>.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/mach-ux500/board-mop500.c                 |   4 +-
 arch/arm/mach-ux500/board-u5500.c                  |   2 +-
 .../mach-ux500/include/mach/irqs-board-mop500.h    |   2 +-
 drivers/input/misc/ab8500-ponkey.c                 |   2 +-
 drivers/mfd/ab5500-core.c                          |   2 +-
 drivers/mfd/ab5500-debugfs.c                       |   2 +-
 drivers/mfd/ab8500-core.c                          |   2 +-
 drivers/mfd/ab8500-debugfs.c                       |   2 +-
 drivers/mfd/ab8500-gpadc.c                         |   4 +-
 drivers/mfd/ab8500-i2c.c                           |   2 +-
 drivers/mfd/ab8500-sysctrl.c                       |   4 +-
 drivers/misc/ab8500-pwm.c                          |   2 +-
 drivers/regulator/ab8500.c                         |   2 +-
 drivers/rtc/rtc-ab8500.c                           |   2 +-
 drivers/usb/otg/ab8500-usb.c                       |   2 +-
 include/linux/mfd/ab5500/ab5500.h                  | 140 ------------
 include/linux/mfd/ab8500.h                         | 201 ----------------
 include/linux/mfd/ab8500/gpadc.h                   |  35 ---
 include/linux/mfd/ab8500/gpio.h                    |  21 --
 include/linux/mfd/ab8500/sysctrl.h                 | 254 ---------------------
 include/linux/mfd/abx500/ab5500.h                  | 140 ++++++++++++
 include/linux/mfd/abx500/ab8500-gpadc.h            |  35 +++
 include/linux/mfd/abx500/ab8500-gpio.h             |  21 ++
 include/linux/mfd/abx500/ab8500-sysctrl.h          | 254 +++++++++++++++++++++
 include/linux/mfd/abx500/ab8500.h                  | 201 ++++++++++++++++
 25 files changed, 669 insertions(+), 669 deletions(-)
 delete mode 100644 include/linux/mfd/ab5500/ab5500.h
 delete mode 100644 include/linux/mfd/ab8500.h
 delete mode 100644 include/linux/mfd/ab8500/gpadc.h
 delete mode 100644 include/linux/mfd/ab8500/gpio.h
 delete mode 100644 include/linux/mfd/ab8500/sysctrl.h
 create mode 100644 include/linux/mfd/abx500/ab5500.h
 create mode 100644 include/linux/mfd/abx500/ab8500-gpadc.h
 create mode 100644 include/linux/mfd/abx500/ab8500-gpio.h
 create mode 100644 include/linux/mfd/abx500/ab8500-sysctrl.h
 create mode 100644 include/linux/mfd/abx500/ab8500.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index bdd7b80dd7ad..80cef36d71ce 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -19,11 +19,11 @@
 #include <linux/amba/pl022.h>
 #include <linux/amba/serial.h>
 #include <linux/spi/spi.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/ab8500.h>
 #include <linux/mfd/tc3589x.h>
 #include <linux/mfd/tps6105x.h>
-#include <linux/mfd/ab8500/gpio.h>
+#include <linux/mfd/abx500/ab8500-gpio.h>
 #include <linux/leds-lp5521.h>
 #include <linux/input.h>
 #include <linux/smsc911x.h>
diff --git a/arch/arm/mach-ux500/board-u5500.c b/arch/arm/mach-ux500/board-u5500.c
index 82025ba70c03..4ecb07a93f14 100644
--- a/arch/arm/mach-ux500/board-u5500.c
+++ b/arch/arm/mach-ux500/board-u5500.c
@@ -10,7 +10,7 @@
 #include <linux/amba/bus.h>
 #include <linux/irq.h>
 #include <linux/i2c.h>
-#include <linux/mfd/ab5500/ab5500.h>
+#include <linux/mfd/abx500/ab5500.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h b/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h
index 47969909836c..d2d4131435a6 100644
--- a/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h
+++ b/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h
@@ -9,7 +9,7 @@
 #define __MACH_IRQS_BOARD_MOP500_H
 
 /* Number of AB8500 irqs is taken from header file */
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 
 #define MOP500_AB8500_IRQ_BASE		IRQ_BOARD_START
 #define MOP500_AB8500_IRQ_END		(MOP500_AB8500_IRQ_BASE \
diff --git a/drivers/input/misc/ab8500-ponkey.c b/drivers/input/misc/ab8500-ponkey.c
index 3d3288a78fdc..3f199e1539bf 100644
--- a/drivers/input/misc/ab8500-ponkey.c
+++ b/drivers/input/misc/ab8500-ponkey.c
@@ -12,7 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/slab.h>
 
 /**
diff --git a/drivers/mfd/ab5500-core.c b/drivers/mfd/ab5500-core.c
index ec10629a0b0b..bd56a764dea1 100644
--- a/drivers/mfd/ab5500-core.c
+++ b/drivers/mfd/ab5500-core.c
@@ -22,8 +22,8 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/random.h>
-#include <linux/mfd/ab5500/ab5500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab5500.h>
 #include <linux/list.h>
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
diff --git a/drivers/mfd/ab5500-debugfs.c b/drivers/mfd/ab5500-debugfs.c
index b7b2d3483fd4..72006940937a 100644
--- a/drivers/mfd/ab5500-debugfs.c
+++ b/drivers/mfd/ab5500-debugfs.c
@@ -7,8 +7,8 @@
 #include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/mfd/ab5500/ab5500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab5500.h>
 #include <linux/uaccess.h>
 
 #include "ab5500-core.h"
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index d3d572b2317b..53e2a80f42fa 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -17,7 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/ab8500.h>
 
 /*
diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index dedb7f65cea6..9a0211aa8897 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -13,7 +13,7 @@
 #include <linux/platform_device.h>
 
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 
 static u32 debug_bank;
 static u32 debug_address;
diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c
index e985d1701a83..c39fc716e1dc 100644
--- a/drivers/mfd/ab8500-gpadc.c
+++ b/drivers/mfd/ab8500-gpadc.c
@@ -18,9 +18,9 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/list.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500/gpadc.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-gpadc.h>
 
 /*
  * GPADC register offsets
diff --git a/drivers/mfd/ab8500-i2c.c b/drivers/mfd/ab8500-i2c.c
index 9be541c6b004..087fecd71ce0 100644
--- a/drivers/mfd/ab8500-i2c.c
+++ b/drivers/mfd/ab8500-i2c.c
@@ -10,7 +10,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/mfd/db8500-prcmu.h>
 
 static int ab8500_i2c_write(struct ab8500 *ab8500, u16 addr, u8 data)
diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index f20feefac190..c28d4eb1eff0 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -7,9 +7,9 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500/sysctrl.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-sysctrl.h>
 
 static struct device *sysctrl_dev;
 
diff --git a/drivers/misc/ab8500-pwm.c b/drivers/misc/ab8500-pwm.c
index 2208a9d52622..d7a9aa14e5d5 100644
--- a/drivers/misc/ab8500-pwm.c
+++ b/drivers/misc/ab8500-pwm.c
@@ -8,8 +8,8 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/pwm.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/module.h>
 
 /*
diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c
index 6e1ae69646b3..80d08237a5a7 100644
--- a/drivers/regulator/ab8500.c
+++ b/drivers/regulator/ab8500.c
@@ -16,8 +16,8 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/ab8500.h>
diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
index e346705aae92..db16ce212d6b 100644
--- a/drivers/rtc/rtc-ab8500.c
+++ b/drivers/rtc/rtc-ab8500.c
@@ -15,7 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/delay.h>
 
 #define AB8500_RTC_SOFF_STAT_REG	0x00
diff --git a/drivers/usb/otg/ab8500-usb.c b/drivers/usb/otg/ab8500-usb.c
index 07ccea9ada40..74fe6e62e0f7 100644
--- a/drivers/usb/otg/ab8500-usb.c
+++ b/drivers/usb/otg/ab8500-usb.c
@@ -30,7 +30,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 
 #define AB8500_MAIN_WD_CTRL_REG 0x01
 #define AB8500_USB_LINE_STAT_REG 0x80
diff --git a/include/linux/mfd/ab5500/ab5500.h b/include/linux/mfd/ab5500/ab5500.h
deleted file mode 100644
index a720051ae933..000000000000
--- a/include/linux/mfd/ab5500/ab5500.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson 2011
- *
- * License Terms: GNU General Public License v2
- */
-#ifndef MFD_AB5500_H
-#define MFD_AB5500_H
-
-#include <linux/device.h>
-
-enum ab5500_devid {
-	AB5500_DEVID_ADC,
-	AB5500_DEVID_LEDS,
-	AB5500_DEVID_POWER,
-	AB5500_DEVID_REGULATORS,
-	AB5500_DEVID_SIM,
-	AB5500_DEVID_RTC,
-	AB5500_DEVID_CHARGER,
-	AB5500_DEVID_FUELGAUGE,
-	AB5500_DEVID_VIBRATOR,
-	AB5500_DEVID_CODEC,
-	AB5500_DEVID_USB,
-	AB5500_DEVID_OTP,
-	AB5500_DEVID_VIDEO,
-	AB5500_DEVID_DBIECI,
-	AB5500_DEVID_ONSWA,
-	AB5500_NUM_DEVICES,
-};
-
-enum ab5500_banks {
-	AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP = 0,
-	AB5500_BANK_VDDDIG_IO_I2C_CLK_TST = 1,
-	AB5500_BANK_VDENC = 2,
-	AB5500_BANK_SIM_USBSIM  = 3,
-	AB5500_BANK_LED = 4,
-	AB5500_BANK_ADC  = 5,
-	AB5500_BANK_RTC  = 6,
-	AB5500_BANK_STARTUP  = 7,
-	AB5500_BANK_DBI_ECI  = 8,
-	AB5500_BANK_CHG  = 9,
-	AB5500_BANK_FG_BATTCOM_ACC = 10,
-	AB5500_BANK_USB = 11,
-	AB5500_BANK_IT = 12,
-	AB5500_BANK_VIBRA = 13,
-	AB5500_BANK_AUDIO_HEADSETUSB = 14,
-	AB5500_NUM_BANKS = 15,
-};
-
-enum ab5500_banks_addr {
-	AB5500_ADDR_VIT_IO_I2C_CLK_TST_OTP = 0x4A,
-	AB5500_ADDR_VDDDIG_IO_I2C_CLK_TST = 0x4B,
-	AB5500_ADDR_VDENC = 0x06,
-	AB5500_ADDR_SIM_USBSIM  = 0x04,
-	AB5500_ADDR_LED = 0x10,
-	AB5500_ADDR_ADC  = 0x0A,
-	AB5500_ADDR_RTC  = 0x0F,
-	AB5500_ADDR_STARTUP  = 0x03,
-	AB5500_ADDR_DBI_ECI  = 0x07,
-	AB5500_ADDR_CHG  = 0x0B,
-	AB5500_ADDR_FG_BATTCOM_ACC = 0x0C,
-	AB5500_ADDR_USB = 0x05,
-	AB5500_ADDR_IT = 0x0E,
-	AB5500_ADDR_VIBRA = 0x02,
-	AB5500_ADDR_AUDIO_HEADSETUSB = 0x0D,
-};
-
-/*
- * Interrupt register offsets
- * Bank : 0x0E
- */
-#define AB5500_IT_SOURCE0_REG		0x20
-#define AB5500_IT_SOURCE1_REG		0x21
-#define AB5500_IT_SOURCE2_REG		0x22
-#define AB5500_IT_SOURCE3_REG		0x23
-#define AB5500_IT_SOURCE4_REG		0x24
-#define AB5500_IT_SOURCE5_REG		0x25
-#define AB5500_IT_SOURCE6_REG		0x26
-#define AB5500_IT_SOURCE7_REG		0x27
-#define AB5500_IT_SOURCE8_REG		0x28
-#define AB5500_IT_SOURCE9_REG		0x29
-#define AB5500_IT_SOURCE10_REG		0x2A
-#define AB5500_IT_SOURCE11_REG		0x2B
-#define AB5500_IT_SOURCE12_REG		0x2C
-#define AB5500_IT_SOURCE13_REG		0x2D
-#define AB5500_IT_SOURCE14_REG		0x2E
-#define AB5500_IT_SOURCE15_REG		0x2F
-#define AB5500_IT_SOURCE16_REG		0x30
-#define AB5500_IT_SOURCE17_REG		0x31
-#define AB5500_IT_SOURCE18_REG		0x32
-#define AB5500_IT_SOURCE19_REG		0x33
-#define AB5500_IT_SOURCE20_REG		0x34
-#define AB5500_IT_SOURCE21_REG		0x35
-#define AB5500_IT_SOURCE22_REG		0x36
-#define AB5500_IT_SOURCE23_REG		0x37
-
-#define AB5500_NUM_IRQ_REGS		23
-
-/**
- * struct ab5500
- * @access_mutex: lock out concurrent accesses to the AB registers
- * @dev: a pointer to the device struct for this chip driver
- * @ab5500_irq: the analog baseband irq
- * @irq_base: the platform configuration irq base for subdevices
- * @chip_name: name of this chip variant
- * @chip_id: 8 bit chip ID for this chip variant
- * @irq_lock: a lock to protect the mask
- * @abb_events: a local bit mask of the prcmu wakeup events
- * @event_mask: a local copy of the mask event registers
- * @last_event_mask: a copy of the last event_mask written to hardware
- * @startup_events: a copy of the first reading of the event registers
- * @startup_events_read: whether the first events have been read
- */
-struct ab5500 {
-	struct mutex access_mutex;
-	struct device *dev;
-	unsigned int ab5500_irq;
-	unsigned int irq_base;
-	char chip_name[32];
-	u8 chip_id;
-	struct mutex irq_lock;
-	u32 abb_events;
-	u8 mask[AB5500_NUM_IRQ_REGS];
-	u8 oldmask[AB5500_NUM_IRQ_REGS];
-	u8 startup_events[AB5500_NUM_IRQ_REGS];
-	bool startup_events_read;
-#ifdef CONFIG_DEBUG_FS
-	unsigned int debug_bank;
-	unsigned int debug_address;
-#endif
-};
-
-struct ab5500_platform_data {
-	struct {unsigned int base; unsigned int count; } irq;
-	void *dev_data[AB5500_NUM_DEVICES];
-	struct abx500_init_settings *init_settings;
-	unsigned int init_settings_sz;
-	bool pm_power_off;
-};
-
-#endif /* MFD_AB5500_H */
diff --git a/include/linux/mfd/ab8500.h b/include/linux/mfd/ab8500.h
deleted file mode 100644
index 838c6b487cc5..000000000000
--- a/include/linux/mfd/ab8500.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson SA 2010
- *
- * License Terms: GNU General Public License v2
- * Author: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
- */
-#ifndef MFD_AB8500_H
-#define MFD_AB8500_H
-
-#include <linux/device.h>
-
-/*
- * AB8500 bank addresses
- */
-#define AB8500_SYS_CTRL1_BLOCK	0x1
-#define AB8500_SYS_CTRL2_BLOCK	0x2
-#define AB8500_REGU_CTRL1	0x3
-#define AB8500_REGU_CTRL2	0x4
-#define AB8500_USB		0x5
-#define AB8500_TVOUT		0x6
-#define AB8500_DBI		0x7
-#define AB8500_ECI_AV_ACC	0x8
-#define AB8500_RESERVED		0x9
-#define AB8500_GPADC		0xA
-#define AB8500_CHARGER		0xB
-#define AB8500_GAS_GAUGE	0xC
-#define AB8500_AUDIO		0xD
-#define AB8500_INTERRUPT	0xE
-#define AB8500_RTC		0xF
-#define AB8500_MISC		0x10
-#define AB8500_DEVELOPMENT	0x11
-#define AB8500_DEBUG		0x12
-#define AB8500_PROD_TEST	0x13
-#define AB8500_OTP_EMUL		0x15
-
-/*
- * Interrupts
- */
-
-#define AB8500_INT_MAIN_EXT_CH_NOT_OK	0
-#define AB8500_INT_UN_PLUG_TV_DET	1
-#define AB8500_INT_PLUG_TV_DET		2
-#define AB8500_INT_TEMP_WARM		3
-#define AB8500_INT_PON_KEY2DB_F		4
-#define AB8500_INT_PON_KEY2DB_R		5
-#define AB8500_INT_PON_KEY1DB_F		6
-#define AB8500_INT_PON_KEY1DB_R		7
-#define AB8500_INT_BATT_OVV		8
-#define AB8500_INT_MAIN_CH_UNPLUG_DET	10
-#define AB8500_INT_MAIN_CH_PLUG_DET	11
-#define AB8500_INT_USB_ID_DET_F		12
-#define AB8500_INT_USB_ID_DET_R		13
-#define AB8500_INT_VBUS_DET_F		14
-#define AB8500_INT_VBUS_DET_R		15
-#define AB8500_INT_VBUS_CH_DROP_END	16
-#define AB8500_INT_RTC_60S		17
-#define AB8500_INT_RTC_ALARM		18
-#define AB8500_INT_BAT_CTRL_INDB	20
-#define AB8500_INT_CH_WD_EXP		21
-#define AB8500_INT_VBUS_OVV		22
-#define AB8500_INT_MAIN_CH_DROP_END	23
-#define AB8500_INT_CCN_CONV_ACC		24
-#define AB8500_INT_INT_AUD		25
-#define AB8500_INT_CCEOC		26
-#define AB8500_INT_CC_INT_CALIB		27
-#define AB8500_INT_LOW_BAT_F		28
-#define AB8500_INT_LOW_BAT_R		29
-#define AB8500_INT_BUP_CHG_NOT_OK	30
-#define AB8500_INT_BUP_CHG_OK		31
-#define AB8500_INT_GP_HW_ADC_CONV_END	32
-#define AB8500_INT_ACC_DETECT_1DB_F	33
-#define AB8500_INT_ACC_DETECT_1DB_R	34
-#define AB8500_INT_ACC_DETECT_22DB_F	35
-#define AB8500_INT_ACC_DETECT_22DB_R	36
-#define AB8500_INT_ACC_DETECT_21DB_F	37
-#define AB8500_INT_ACC_DETECT_21DB_R	38
-#define AB8500_INT_GP_SW_ADC_CONV_END	39
-#define AB8500_INT_GPIO6R		40
-#define AB8500_INT_GPIO7R		41
-#define AB8500_INT_GPIO8R		42
-#define AB8500_INT_GPIO9R		43
-#define AB8500_INT_GPIO10R		44
-#define AB8500_INT_GPIO11R		45
-#define AB8500_INT_GPIO12R		46
-#define AB8500_INT_GPIO13R		47
-#define AB8500_INT_GPIO24R		48
-#define AB8500_INT_GPIO25R		49
-#define AB8500_INT_GPIO36R		50
-#define AB8500_INT_GPIO37R		51
-#define AB8500_INT_GPIO38R		52
-#define AB8500_INT_GPIO39R		53
-#define AB8500_INT_GPIO40R		54
-#define AB8500_INT_GPIO41R		55
-#define AB8500_INT_GPIO6F		56
-#define AB8500_INT_GPIO7F		57
-#define AB8500_INT_GPIO8F		58
-#define AB8500_INT_GPIO9F		59
-#define AB8500_INT_GPIO10F		60
-#define AB8500_INT_GPIO11F		61
-#define AB8500_INT_GPIO12F		62
-#define AB8500_INT_GPIO13F		63
-#define AB8500_INT_GPIO24F		64
-#define AB8500_INT_GPIO25F		65
-#define AB8500_INT_GPIO36F		66
-#define AB8500_INT_GPIO37F		67
-#define AB8500_INT_GPIO38F		68
-#define AB8500_INT_GPIO39F		69
-#define AB8500_INT_GPIO40F		70
-#define AB8500_INT_GPIO41F		71
-#define AB8500_INT_ADP_SOURCE_ERROR	72
-#define AB8500_INT_ADP_SINK_ERROR	73
-#define AB8500_INT_ADP_PROBE_PLUG	74
-#define AB8500_INT_ADP_PROBE_UNPLUG	75
-#define AB8500_INT_ADP_SENSE_OFF	76
-#define AB8500_INT_USB_PHY_POWER_ERR	78
-#define AB8500_INT_USB_LINK_STATUS	79
-#define AB8500_INT_BTEMP_LOW		80
-#define AB8500_INT_BTEMP_LOW_MEDIUM	81
-#define AB8500_INT_BTEMP_MEDIUM_HIGH	82
-#define AB8500_INT_BTEMP_HIGH		83
-#define AB8500_INT_USB_CHARGER_NOT_OK	89
-#define AB8500_INT_ID_WAKEUP_R		90
-#define AB8500_INT_ID_DET_R1R		92
-#define AB8500_INT_ID_DET_R2R		93
-#define AB8500_INT_ID_DET_R3R		94
-#define AB8500_INT_ID_DET_R4R		95
-#define AB8500_INT_ID_WAKEUP_F		96
-#define AB8500_INT_ID_DET_R1F		98
-#define AB8500_INT_ID_DET_R2F		99
-#define AB8500_INT_ID_DET_R3F		100
-#define AB8500_INT_ID_DET_R4F		101
-#define AB8500_INT_USB_CHG_DET_DONE	102
-#define AB8500_INT_USB_CH_TH_PROT_F	104
-#define AB8500_INT_USB_CH_TH_PROT_R    105
-#define AB8500_INT_MAIN_CH_TH_PROT_F   106
-#define AB8500_INT_MAIN_CH_TH_PROT_R	107
-#define AB8500_INT_USB_CHARGER_NOT_OKF	111
-
-#define AB8500_NR_IRQS			112
-#define AB8500_NUM_IRQ_REGS		14
-
-/**
- * struct ab8500 - ab8500 internal structure
- * @dev: parent device
- * @lock: read/write operations lock
- * @irq_lock: genirq bus lock
- * @irq: irq line
- * @chip_id: chip revision id
- * @write: register write
- * @read: register read
- * @rx_buf: rx buf for SPI
- * @tx_buf: tx buf for SPI
- * @mask: cache of IRQ regs for bus lock
- * @oldmask: cache of previous IRQ regs for bus lock
- */
-struct ab8500 {
-	struct device	*dev;
-	struct mutex	lock;
-	struct mutex	irq_lock;
-
-	int		irq_base;
-	int		irq;
-	u8		chip_id;
-
-	int (*write) (struct ab8500 *a8500, u16 addr, u8 data);
-	int (*read) (struct ab8500 *a8500, u16 addr);
-
-	unsigned long	tx_buf[4];
-	unsigned long	rx_buf[4];
-
-	u8 mask[AB8500_NUM_IRQ_REGS];
-	u8 oldmask[AB8500_NUM_IRQ_REGS];
-};
-
-struct regulator_reg_init;
-struct regulator_init_data;
-struct ab8500_gpio_platform_data;
-
-/**
- * struct ab8500_platform_data - AB8500 platform data
- * @irq_base: start of AB8500 IRQs, AB8500_NR_IRQS will be used
- * @init: board-specific initialization after detection of ab8500
- * @num_regulator_reg_init: number of regulator init registers
- * @regulator_reg_init: regulator init registers
- * @num_regulator: number of regulators
- * @regulator: machine-specific constraints for regulators
- */
-struct ab8500_platform_data {
-	int irq_base;
-	void (*init) (struct ab8500 *);
-	int num_regulator_reg_init;
-	struct ab8500_regulator_reg_init *regulator_reg_init;
-	int num_regulator;
-	struct regulator_init_data *regulator;
-	struct ab8500_gpio_platform_data *gpio;
-};
-
-extern int __devinit ab8500_init(struct ab8500 *ab8500);
-extern int __devexit ab8500_exit(struct ab8500 *ab8500);
-
-#endif /* MFD_AB8500_H */
diff --git a/include/linux/mfd/ab8500/gpadc.h b/include/linux/mfd/ab8500/gpadc.h
deleted file mode 100644
index 252966769d93..000000000000
--- a/include/linux/mfd/ab8500/gpadc.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2010 ST-Ericsson SA
- * Licensed under GPLv2.
- *
- * Author: Arun R Murthy <arun.murthy@stericsson.com>
- * Author: Daniel Willerud <daniel.willerud@stericsson.com>
- */
-
-#ifndef	_AB8500_GPADC_H
-#define _AB8500_GPADC_H
-
-/* GPADC source: From datasheet(ADCSwSel[4:0] in GPADCCtrl2) */
-#define BAT_CTRL	0x01
-#define BTEMP_BALL	0x02
-#define MAIN_CHARGER_V	0x03
-#define ACC_DETECT1	0x04
-#define ACC_DETECT2	0x05
-#define ADC_AUX1	0x06
-#define ADC_AUX2	0x07
-#define MAIN_BAT_V	0x08
-#define VBUS_V		0x09
-#define MAIN_CHARGER_C	0x0A
-#define USB_CHARGER_C	0x0B
-#define BK_BAT_V	0x0C
-#define DIE_TEMP	0x0D
-
-struct ab8500_gpadc;
-
-struct ab8500_gpadc *ab8500_gpadc_get(char *name);
-int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel);
-int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel);
-int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc,
-    u8 channel, int ad_value);
-
-#endif /* _AB8500_GPADC_H */
diff --git a/include/linux/mfd/ab8500/gpio.h b/include/linux/mfd/ab8500/gpio.h
deleted file mode 100644
index 488a8c920a29..000000000000
--- a/include/linux/mfd/ab8500/gpio.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright ST-Ericsson 2010.
- *
- * Author: Bibek Basu <bibek.basu@stericsson.com>
- * Licensed under GPLv2.
- */
-
-#ifndef _AB8500_GPIO_H
-#define _AB8500_GPIO_H
-
-/*
- * Platform data to register a block: only the initial gpio/irq number.
- */
-
-struct ab8500_gpio_platform_data {
-	int gpio_base;
-	u32 irq_base;
-	u8  config_reg[7];
-};
-
-#endif /* _AB8500_GPIO_H */
diff --git a/include/linux/mfd/ab8500/sysctrl.h b/include/linux/mfd/ab8500/sysctrl.h
deleted file mode 100644
index 10da0291f8f8..000000000000
--- a/include/linux/mfd/ab8500/sysctrl.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com> for ST Ericsson.
- * License terms: GNU General Public License (GPL) version 2
- */
-#ifndef __AB8500_SYSCTRL_H
-#define __AB8500_SYSCTRL_H
-
-#include <linux/bitops.h>
-
-#ifdef CONFIG_AB8500_CORE
-
-int ab8500_sysctrl_read(u16 reg, u8 *value);
-int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value);
-
-#else
-
-static inline int ab8500_sysctrl_read(u16 reg, u8 *value)
-{
-	return 0;
-}
-
-static inline int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value)
-{
-	return 0;
-}
-
-#endif /* CONFIG_AB8500_CORE */
-
-static inline int ab8500_sysctrl_set(u16 reg, u8 bits)
-{
-	return ab8500_sysctrl_write(reg, bits, bits);
-}
-
-static inline int ab8500_sysctrl_clear(u16 reg, u8 bits)
-{
-	return ab8500_sysctrl_write(reg, bits, 0);
-}
-
-/* Registers */
-#define AB8500_TURNONSTATUS		0x100
-#define AB8500_RESETSTATUS		0x101
-#define AB8500_PONKEY1PRESSSTATUS	0x102
-#define AB8500_SYSCLKREQSTATUS		0x142
-#define AB8500_STW4500CTRL1		0x180
-#define AB8500_STW4500CTRL2		0x181
-#define AB8500_STW4500CTRL3		0x200
-#define AB8500_MAINWDOGCTRL		0x201
-#define AB8500_MAINWDOGTIMER		0x202
-#define AB8500_LOWBAT			0x203
-#define AB8500_BATTOK			0x204
-#define AB8500_SYSCLKTIMER		0x205
-#define AB8500_SMPSCLKCTRL		0x206
-#define AB8500_SMPSCLKSEL1		0x207
-#define AB8500_SMPSCLKSEL2		0x208
-#define AB8500_SMPSCLKSEL3		0x209
-#define AB8500_SYSULPCLKCONF		0x20A
-#define AB8500_SYSULPCLKCTRL1		0x20B
-#define AB8500_SYSCLKCTRL		0x20C
-#define AB8500_SYSCLKREQ1VALID		0x20D
-#define AB8500_SYSTEMCTRLSUP		0x20F
-#define AB8500_SYSCLKREQ1RFCLKBUF	0x210
-#define AB8500_SYSCLKREQ2RFCLKBUF	0x211
-#define AB8500_SYSCLKREQ3RFCLKBUF	0x212
-#define AB8500_SYSCLKREQ4RFCLKBUF	0x213
-#define AB8500_SYSCLKREQ5RFCLKBUF	0x214
-#define AB8500_SYSCLKREQ6RFCLKBUF	0x215
-#define AB8500_SYSCLKREQ7RFCLKBUF	0x216
-#define AB8500_SYSCLKREQ8RFCLKBUF	0x217
-#define AB8500_DITHERCLKCTRL		0x220
-#define AB8500_SWATCTRL			0x230
-#define AB8500_HIQCLKCTRL		0x232
-#define AB8500_VSIMSYSCLKCTRL		0x233
-
-/* Bits */
-#define AB8500_TURNONSTATUS_PORNVBAT BIT(0)
-#define AB8500_TURNONSTATUS_PONKEY1DBF BIT(1)
-#define AB8500_TURNONSTATUS_PONKEY2DBF BIT(2)
-#define AB8500_TURNONSTATUS_RTCALARM BIT(3)
-#define AB8500_TURNONSTATUS_MAINCHDET BIT(4)
-#define AB8500_TURNONSTATUS_VBUSDET BIT(5)
-#define AB8500_TURNONSTATUS_USBIDDETECT BIT(6)
-
-#define AB8500_RESETSTATUS_RESETN4500NSTATUS BIT(0)
-#define AB8500_RESETSTATUS_SWRESETN4500NSTATUS BIT(2)
-
-#define AB8500_PONKEY1PRESSSTATUS_PONKEY1PRESSTIME_MASK 0x7F
-#define AB8500_PONKEY1PRESSSTATUS_PONKEY1PRESSTIME_SHIFT 0
-
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ1STATUS BIT(0)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ2STATUS BIT(1)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ3STATUS BIT(2)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ4STATUS BIT(3)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ5STATUS BIT(4)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ6STATUS BIT(5)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ7STATUS BIT(6)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ8STATUS BIT(7)
-
-#define AB8500_STW4500CTRL1_SWOFF BIT(0)
-#define AB8500_STW4500CTRL1_SWRESET4500N BIT(1)
-#define AB8500_STW4500CTRL1_THDB8500SWOFF BIT(2)
-
-#define AB8500_STW4500CTRL2_RESETNVAUX1VALID BIT(0)
-#define AB8500_STW4500CTRL2_RESETNVAUX2VALID BIT(1)
-#define AB8500_STW4500CTRL2_RESETNVAUX3VALID BIT(2)
-#define AB8500_STW4500CTRL2_RESETNVMODVALID BIT(3)
-#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY1VALID BIT(4)
-#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY2VALID BIT(5)
-#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY3VALID BIT(6)
-#define AB8500_STW4500CTRL2_RESETNVSMPS1VALID BIT(7)
-
-#define AB8500_STW4500CTRL3_CLK32KOUT2DIS BIT(0)
-#define AB8500_STW4500CTRL3_RESETAUDN BIT(1)
-#define AB8500_STW4500CTRL3_RESETDENCN BIT(2)
-#define AB8500_STW4500CTRL3_THSDENA BIT(3)
-
-#define AB8500_MAINWDOGCTRL_MAINWDOGENA BIT(0)
-#define AB8500_MAINWDOGCTRL_MAINWDOGKICK BIT(1)
-#define AB8500_MAINWDOGCTRL_WDEXPTURNONVALID BIT(4)
-
-#define AB8500_MAINWDOGTIMER_MAINWDOGTIMER_MASK 0x7F
-#define AB8500_MAINWDOGTIMER_MAINWDOGTIMER_SHIFT 0
-
-#define AB8500_LOWBAT_LOWBATENA BIT(0)
-#define AB8500_LOWBAT_LOWBAT_MASK 0x7E
-#define AB8500_LOWBAT_LOWBAT_SHIFT 1
-
-#define AB8500_BATTOK_BATTOKSEL0THF_MASK 0x0F
-#define AB8500_BATTOK_BATTOKSEL0THF_SHIFT 0
-#define AB8500_BATTOK_BATTOKSEL1THF_MASK 0xF0
-#define AB8500_BATTOK_BATTOKSEL1THF_SHIFT 4
-
-#define AB8500_SYSCLKTIMER_SYSCLKTIMER_MASK 0x0F
-#define AB8500_SYSCLKTIMER_SYSCLKTIMER_SHIFT 0
-#define AB8500_SYSCLKTIMER_SYSCLKTIMERADJ_MASK 0xF0
-#define AB8500_SYSCLKTIMER_SYSCLKTIMERADJ_SHIFT 4
-
-#define AB8500_SMPSCLKCTRL_SMPSCLKINTSEL_MASK 0x03
-#define AB8500_SMPSCLKCTRL_SMPSCLKINTSEL_SHIFT 0
-#define AB8500_SMPSCLKCTRL_3M2CLKINTENA BIT(2)
-
-#define AB8500_SMPSCLKSEL1_VARMCLKSEL_MASK 0x07
-#define AB8500_SMPSCLKSEL1_VARMCLKSEL_SHIFT 0
-#define AB8500_SMPSCLKSEL1_VAPECLKSEL_MASK 0x38
-#define AB8500_SMPSCLKSEL1_VAPECLKSEL_SHIFT 3
-
-#define AB8500_SMPSCLKSEL2_VMODCLKSEL_MASK 0x07
-#define AB8500_SMPSCLKSEL2_VMODCLKSEL_SHIFT 0
-#define AB8500_SMPSCLKSEL2_VSMPS1CLKSEL_MASK 0x38
-#define AB8500_SMPSCLKSEL2_VSMPS1CLKSEL_SHIFT 3
-
-#define AB8500_SMPSCLKSEL3_VSMPS2CLKSEL_MASK 0x07
-#define AB8500_SMPSCLKSEL3_VSMPS2CLKSEL_SHIFT 0
-#define AB8500_SMPSCLKSEL3_VSMPS3CLKSEL_MASK 0x38
-#define AB8500_SMPSCLKSEL3_VSMPS3CLKSEL_SHIFT 3
-
-#define AB8500_SYSULPCLKCONF_ULPCLKCONF_MASK 0x03
-#define AB8500_SYSULPCLKCONF_ULPCLKCONF_SHIFT 0
-#define AB8500_SYSULPCLKCONF_CLK27MHZSTRE BIT(2)
-#define AB8500_SYSULPCLKCONF_TVOUTCLKDELN BIT(3)
-#define AB8500_SYSULPCLKCONF_TVOUTCLKINV BIT(4)
-#define AB8500_SYSULPCLKCONF_ULPCLKSTRE BIT(5)
-#define AB8500_SYSULPCLKCONF_CLK27MHZBUFENA BIT(6)
-#define AB8500_SYSULPCLKCONF_CLK27MHZPDENA BIT(7)
-
-#define AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_MASK 0x03
-#define AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_SHIFT 0
-#define AB8500_SYSULPCLKCTRL1_ULPCLKREQ BIT(2)
-#define AB8500_SYSULPCLKCTRL1_4500SYSCLKREQ BIT(3)
-#define AB8500_SYSULPCLKCTRL1_AUDIOCLKENA BIT(4)
-#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF2REQ BIT(5)
-#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF3REQ BIT(6)
-#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF4REQ BIT(7)
-
-#define AB8500_SYSCLKCTRL_TVOUTPLLENA BIT(0)
-#define AB8500_SYSCLKCTRL_TVOUTCLKENA BIT(1)
-#define AB8500_SYSCLKCTRL_USBCLKENA BIT(2)
-
-#define AB8500_SYSCLKREQ1VALID_SYSCLKREQ1VALID BIT(0)
-#define AB8500_SYSCLKREQ1VALID_ULPCLKREQ1VALID BIT(1)
-#define AB8500_SYSCLKREQ1VALID_USBSYSCLKREQ1VALID BIT(2)
-
-#define AB8500_SYSTEMCTRLSUP_EXTSUP12LPNCLKSEL_MASK 0x03
-#define AB8500_SYSTEMCTRLSUP_EXTSUP12LPNCLKSEL_SHIFT 0
-#define AB8500_SYSTEMCTRLSUP_EXTSUP3LPNCLKSEL_MASK 0x0C
-#define AB8500_SYSTEMCTRLSUP_EXTSUP3LPNCLKSEL_SHIFT 2
-#define AB8500_SYSTEMCTRLSUP_INTDB8500NOD BIT(4)
-
-#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF4 BIT(4)
-
-#define AB8500_DITHERCLKCTRL_VARMDITHERENA BIT(0)
-#define AB8500_DITHERCLKCTRL_VSMPS3DITHERENA BIT(1)
-#define AB8500_DITHERCLKCTRL_VSMPS1DITHERENA BIT(2)
-#define AB8500_DITHERCLKCTRL_VSMPS2DITHERENA BIT(3)
-#define AB8500_DITHERCLKCTRL_VMODDITHERENA BIT(4)
-#define AB8500_DITHERCLKCTRL_VAPEDITHERENA BIT(5)
-#define AB8500_DITHERCLKCTRL_DITHERDEL_MASK 0xC0
-#define AB8500_DITHERCLKCTRL_DITHERDEL_SHIFT 6
-
-#define AB8500_SWATCTRL_UPDATERF BIT(0)
-#define AB8500_SWATCTRL_SWATENABLE BIT(1)
-#define AB8500_SWATCTRL_RFOFFTIMER_MASK 0x1C
-#define AB8500_SWATCTRL_RFOFFTIMER_SHIFT 2
-#define AB8500_SWATCTRL_SWATBIT5 BIT(6)
-
-#define AB8500_HIQCLKCTRL_SYSCLKREQ1HIQENAVALID BIT(0)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ2HIQENAVALID BIT(1)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ3HIQENAVALID BIT(2)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ4HIQENAVALID BIT(3)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ5HIQENAVALID BIT(4)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ6HIQENAVALID BIT(5)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ7HIQENAVALID BIT(6)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ8HIQENAVALID BIT(7)
-
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ1VALID BIT(0)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ2VALID BIT(1)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ3VALID BIT(2)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ4VALID BIT(3)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ5VALID BIT(4)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ6VALID BIT(5)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ7VALID BIT(6)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ8VALID BIT(7)
-
-#endif /* __AB8500_SYSCTRL_H */
diff --git a/include/linux/mfd/abx500/ab5500.h b/include/linux/mfd/abx500/ab5500.h
new file mode 100644
index 000000000000..a720051ae933
--- /dev/null
+++ b/include/linux/mfd/abx500/ab5500.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) ST-Ericsson 2011
+ *
+ * License Terms: GNU General Public License v2
+ */
+#ifndef MFD_AB5500_H
+#define MFD_AB5500_H
+
+#include <linux/device.h>
+
+enum ab5500_devid {
+	AB5500_DEVID_ADC,
+	AB5500_DEVID_LEDS,
+	AB5500_DEVID_POWER,
+	AB5500_DEVID_REGULATORS,
+	AB5500_DEVID_SIM,
+	AB5500_DEVID_RTC,
+	AB5500_DEVID_CHARGER,
+	AB5500_DEVID_FUELGAUGE,
+	AB5500_DEVID_VIBRATOR,
+	AB5500_DEVID_CODEC,
+	AB5500_DEVID_USB,
+	AB5500_DEVID_OTP,
+	AB5500_DEVID_VIDEO,
+	AB5500_DEVID_DBIECI,
+	AB5500_DEVID_ONSWA,
+	AB5500_NUM_DEVICES,
+};
+
+enum ab5500_banks {
+	AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP = 0,
+	AB5500_BANK_VDDDIG_IO_I2C_CLK_TST = 1,
+	AB5500_BANK_VDENC = 2,
+	AB5500_BANK_SIM_USBSIM  = 3,
+	AB5500_BANK_LED = 4,
+	AB5500_BANK_ADC  = 5,
+	AB5500_BANK_RTC  = 6,
+	AB5500_BANK_STARTUP  = 7,
+	AB5500_BANK_DBI_ECI  = 8,
+	AB5500_BANK_CHG  = 9,
+	AB5500_BANK_FG_BATTCOM_ACC = 10,
+	AB5500_BANK_USB = 11,
+	AB5500_BANK_IT = 12,
+	AB5500_BANK_VIBRA = 13,
+	AB5500_BANK_AUDIO_HEADSETUSB = 14,
+	AB5500_NUM_BANKS = 15,
+};
+
+enum ab5500_banks_addr {
+	AB5500_ADDR_VIT_IO_I2C_CLK_TST_OTP = 0x4A,
+	AB5500_ADDR_VDDDIG_IO_I2C_CLK_TST = 0x4B,
+	AB5500_ADDR_VDENC = 0x06,
+	AB5500_ADDR_SIM_USBSIM  = 0x04,
+	AB5500_ADDR_LED = 0x10,
+	AB5500_ADDR_ADC  = 0x0A,
+	AB5500_ADDR_RTC  = 0x0F,
+	AB5500_ADDR_STARTUP  = 0x03,
+	AB5500_ADDR_DBI_ECI  = 0x07,
+	AB5500_ADDR_CHG  = 0x0B,
+	AB5500_ADDR_FG_BATTCOM_ACC = 0x0C,
+	AB5500_ADDR_USB = 0x05,
+	AB5500_ADDR_IT = 0x0E,
+	AB5500_ADDR_VIBRA = 0x02,
+	AB5500_ADDR_AUDIO_HEADSETUSB = 0x0D,
+};
+
+/*
+ * Interrupt register offsets
+ * Bank : 0x0E
+ */
+#define AB5500_IT_SOURCE0_REG		0x20
+#define AB5500_IT_SOURCE1_REG		0x21
+#define AB5500_IT_SOURCE2_REG		0x22
+#define AB5500_IT_SOURCE3_REG		0x23
+#define AB5500_IT_SOURCE4_REG		0x24
+#define AB5500_IT_SOURCE5_REG		0x25
+#define AB5500_IT_SOURCE6_REG		0x26
+#define AB5500_IT_SOURCE7_REG		0x27
+#define AB5500_IT_SOURCE8_REG		0x28
+#define AB5500_IT_SOURCE9_REG		0x29
+#define AB5500_IT_SOURCE10_REG		0x2A
+#define AB5500_IT_SOURCE11_REG		0x2B
+#define AB5500_IT_SOURCE12_REG		0x2C
+#define AB5500_IT_SOURCE13_REG		0x2D
+#define AB5500_IT_SOURCE14_REG		0x2E
+#define AB5500_IT_SOURCE15_REG		0x2F
+#define AB5500_IT_SOURCE16_REG		0x30
+#define AB5500_IT_SOURCE17_REG		0x31
+#define AB5500_IT_SOURCE18_REG		0x32
+#define AB5500_IT_SOURCE19_REG		0x33
+#define AB5500_IT_SOURCE20_REG		0x34
+#define AB5500_IT_SOURCE21_REG		0x35
+#define AB5500_IT_SOURCE22_REG		0x36
+#define AB5500_IT_SOURCE23_REG		0x37
+
+#define AB5500_NUM_IRQ_REGS		23
+
+/**
+ * struct ab5500
+ * @access_mutex: lock out concurrent accesses to the AB registers
+ * @dev: a pointer to the device struct for this chip driver
+ * @ab5500_irq: the analog baseband irq
+ * @irq_base: the platform configuration irq base for subdevices
+ * @chip_name: name of this chip variant
+ * @chip_id: 8 bit chip ID for this chip variant
+ * @irq_lock: a lock to protect the mask
+ * @abb_events: a local bit mask of the prcmu wakeup events
+ * @event_mask: a local copy of the mask event registers
+ * @last_event_mask: a copy of the last event_mask written to hardware
+ * @startup_events: a copy of the first reading of the event registers
+ * @startup_events_read: whether the first events have been read
+ */
+struct ab5500 {
+	struct mutex access_mutex;
+	struct device *dev;
+	unsigned int ab5500_irq;
+	unsigned int irq_base;
+	char chip_name[32];
+	u8 chip_id;
+	struct mutex irq_lock;
+	u32 abb_events;
+	u8 mask[AB5500_NUM_IRQ_REGS];
+	u8 oldmask[AB5500_NUM_IRQ_REGS];
+	u8 startup_events[AB5500_NUM_IRQ_REGS];
+	bool startup_events_read;
+#ifdef CONFIG_DEBUG_FS
+	unsigned int debug_bank;
+	unsigned int debug_address;
+#endif
+};
+
+struct ab5500_platform_data {
+	struct {unsigned int base; unsigned int count; } irq;
+	void *dev_data[AB5500_NUM_DEVICES];
+	struct abx500_init_settings *init_settings;
+	unsigned int init_settings_sz;
+	bool pm_power_off;
+};
+
+#endif /* MFD_AB5500_H */
diff --git a/include/linux/mfd/abx500/ab8500-gpadc.h b/include/linux/mfd/abx500/ab8500-gpadc.h
new file mode 100644
index 000000000000..252966769d93
--- /dev/null
+++ b/include/linux/mfd/abx500/ab8500-gpadc.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2010 ST-Ericsson SA
+ * Licensed under GPLv2.
+ *
+ * Author: Arun R Murthy <arun.murthy@stericsson.com>
+ * Author: Daniel Willerud <daniel.willerud@stericsson.com>
+ */
+
+#ifndef	_AB8500_GPADC_H
+#define _AB8500_GPADC_H
+
+/* GPADC source: From datasheet(ADCSwSel[4:0] in GPADCCtrl2) */
+#define BAT_CTRL	0x01
+#define BTEMP_BALL	0x02
+#define MAIN_CHARGER_V	0x03
+#define ACC_DETECT1	0x04
+#define ACC_DETECT2	0x05
+#define ADC_AUX1	0x06
+#define ADC_AUX2	0x07
+#define MAIN_BAT_V	0x08
+#define VBUS_V		0x09
+#define MAIN_CHARGER_C	0x0A
+#define USB_CHARGER_C	0x0B
+#define BK_BAT_V	0x0C
+#define DIE_TEMP	0x0D
+
+struct ab8500_gpadc;
+
+struct ab8500_gpadc *ab8500_gpadc_get(char *name);
+int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel);
+int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel);
+int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc,
+    u8 channel, int ad_value);
+
+#endif /* _AB8500_GPADC_H */
diff --git a/include/linux/mfd/abx500/ab8500-gpio.h b/include/linux/mfd/abx500/ab8500-gpio.h
new file mode 100644
index 000000000000..488a8c920a29
--- /dev/null
+++ b/include/linux/mfd/abx500/ab8500-gpio.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright ST-Ericsson 2010.
+ *
+ * Author: Bibek Basu <bibek.basu@stericsson.com>
+ * Licensed under GPLv2.
+ */
+
+#ifndef _AB8500_GPIO_H
+#define _AB8500_GPIO_H
+
+/*
+ * Platform data to register a block: only the initial gpio/irq number.
+ */
+
+struct ab8500_gpio_platform_data {
+	int gpio_base;
+	u32 irq_base;
+	u8  config_reg[7];
+};
+
+#endif /* _AB8500_GPIO_H */
diff --git a/include/linux/mfd/abx500/ab8500-sysctrl.h b/include/linux/mfd/abx500/ab8500-sysctrl.h
new file mode 100644
index 000000000000..10da0291f8f8
--- /dev/null
+++ b/include/linux/mfd/abx500/ab8500-sysctrl.h
@@ -0,0 +1,254 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com> for ST Ericsson.
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#ifndef __AB8500_SYSCTRL_H
+#define __AB8500_SYSCTRL_H
+
+#include <linux/bitops.h>
+
+#ifdef CONFIG_AB8500_CORE
+
+int ab8500_sysctrl_read(u16 reg, u8 *value);
+int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value);
+
+#else
+
+static inline int ab8500_sysctrl_read(u16 reg, u8 *value)
+{
+	return 0;
+}
+
+static inline int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value)
+{
+	return 0;
+}
+
+#endif /* CONFIG_AB8500_CORE */
+
+static inline int ab8500_sysctrl_set(u16 reg, u8 bits)
+{
+	return ab8500_sysctrl_write(reg, bits, bits);
+}
+
+static inline int ab8500_sysctrl_clear(u16 reg, u8 bits)
+{
+	return ab8500_sysctrl_write(reg, bits, 0);
+}
+
+/* Registers */
+#define AB8500_TURNONSTATUS		0x100
+#define AB8500_RESETSTATUS		0x101
+#define AB8500_PONKEY1PRESSSTATUS	0x102
+#define AB8500_SYSCLKREQSTATUS		0x142
+#define AB8500_STW4500CTRL1		0x180
+#define AB8500_STW4500CTRL2		0x181
+#define AB8500_STW4500CTRL3		0x200
+#define AB8500_MAINWDOGCTRL		0x201
+#define AB8500_MAINWDOGTIMER		0x202
+#define AB8500_LOWBAT			0x203
+#define AB8500_BATTOK			0x204
+#define AB8500_SYSCLKTIMER		0x205
+#define AB8500_SMPSCLKCTRL		0x206
+#define AB8500_SMPSCLKSEL1		0x207
+#define AB8500_SMPSCLKSEL2		0x208
+#define AB8500_SMPSCLKSEL3		0x209
+#define AB8500_SYSULPCLKCONF		0x20A
+#define AB8500_SYSULPCLKCTRL1		0x20B
+#define AB8500_SYSCLKCTRL		0x20C
+#define AB8500_SYSCLKREQ1VALID		0x20D
+#define AB8500_SYSTEMCTRLSUP		0x20F
+#define AB8500_SYSCLKREQ1RFCLKBUF	0x210
+#define AB8500_SYSCLKREQ2RFCLKBUF	0x211
+#define AB8500_SYSCLKREQ3RFCLKBUF	0x212
+#define AB8500_SYSCLKREQ4RFCLKBUF	0x213
+#define AB8500_SYSCLKREQ5RFCLKBUF	0x214
+#define AB8500_SYSCLKREQ6RFCLKBUF	0x215
+#define AB8500_SYSCLKREQ7RFCLKBUF	0x216
+#define AB8500_SYSCLKREQ8RFCLKBUF	0x217
+#define AB8500_DITHERCLKCTRL		0x220
+#define AB8500_SWATCTRL			0x230
+#define AB8500_HIQCLKCTRL		0x232
+#define AB8500_VSIMSYSCLKCTRL		0x233
+
+/* Bits */
+#define AB8500_TURNONSTATUS_PORNVBAT BIT(0)
+#define AB8500_TURNONSTATUS_PONKEY1DBF BIT(1)
+#define AB8500_TURNONSTATUS_PONKEY2DBF BIT(2)
+#define AB8500_TURNONSTATUS_RTCALARM BIT(3)
+#define AB8500_TURNONSTATUS_MAINCHDET BIT(4)
+#define AB8500_TURNONSTATUS_VBUSDET BIT(5)
+#define AB8500_TURNONSTATUS_USBIDDETECT BIT(6)
+
+#define AB8500_RESETSTATUS_RESETN4500NSTATUS BIT(0)
+#define AB8500_RESETSTATUS_SWRESETN4500NSTATUS BIT(2)
+
+#define AB8500_PONKEY1PRESSSTATUS_PONKEY1PRESSTIME_MASK 0x7F
+#define AB8500_PONKEY1PRESSSTATUS_PONKEY1PRESSTIME_SHIFT 0
+
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ1STATUS BIT(0)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ2STATUS BIT(1)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ3STATUS BIT(2)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ4STATUS BIT(3)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ5STATUS BIT(4)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ6STATUS BIT(5)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ7STATUS BIT(6)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ8STATUS BIT(7)
+
+#define AB8500_STW4500CTRL1_SWOFF BIT(0)
+#define AB8500_STW4500CTRL1_SWRESET4500N BIT(1)
+#define AB8500_STW4500CTRL1_THDB8500SWOFF BIT(2)
+
+#define AB8500_STW4500CTRL2_RESETNVAUX1VALID BIT(0)
+#define AB8500_STW4500CTRL2_RESETNVAUX2VALID BIT(1)
+#define AB8500_STW4500CTRL2_RESETNVAUX3VALID BIT(2)
+#define AB8500_STW4500CTRL2_RESETNVMODVALID BIT(3)
+#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY1VALID BIT(4)
+#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY2VALID BIT(5)
+#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY3VALID BIT(6)
+#define AB8500_STW4500CTRL2_RESETNVSMPS1VALID BIT(7)
+
+#define AB8500_STW4500CTRL3_CLK32KOUT2DIS BIT(0)
+#define AB8500_STW4500CTRL3_RESETAUDN BIT(1)
+#define AB8500_STW4500CTRL3_RESETDENCN BIT(2)
+#define AB8500_STW4500CTRL3_THSDENA BIT(3)
+
+#define AB8500_MAINWDOGCTRL_MAINWDOGENA BIT(0)
+#define AB8500_MAINWDOGCTRL_MAINWDOGKICK BIT(1)
+#define AB8500_MAINWDOGCTRL_WDEXPTURNONVALID BIT(4)
+
+#define AB8500_MAINWDOGTIMER_MAINWDOGTIMER_MASK 0x7F
+#define AB8500_MAINWDOGTIMER_MAINWDOGTIMER_SHIFT 0
+
+#define AB8500_LOWBAT_LOWBATENA BIT(0)
+#define AB8500_LOWBAT_LOWBAT_MASK 0x7E
+#define AB8500_LOWBAT_LOWBAT_SHIFT 1
+
+#define AB8500_BATTOK_BATTOKSEL0THF_MASK 0x0F
+#define AB8500_BATTOK_BATTOKSEL0THF_SHIFT 0
+#define AB8500_BATTOK_BATTOKSEL1THF_MASK 0xF0
+#define AB8500_BATTOK_BATTOKSEL1THF_SHIFT 4
+
+#define AB8500_SYSCLKTIMER_SYSCLKTIMER_MASK 0x0F
+#define AB8500_SYSCLKTIMER_SYSCLKTIMER_SHIFT 0
+#define AB8500_SYSCLKTIMER_SYSCLKTIMERADJ_MASK 0xF0
+#define AB8500_SYSCLKTIMER_SYSCLKTIMERADJ_SHIFT 4
+
+#define AB8500_SMPSCLKCTRL_SMPSCLKINTSEL_MASK 0x03
+#define AB8500_SMPSCLKCTRL_SMPSCLKINTSEL_SHIFT 0
+#define AB8500_SMPSCLKCTRL_3M2CLKINTENA BIT(2)
+
+#define AB8500_SMPSCLKSEL1_VARMCLKSEL_MASK 0x07
+#define AB8500_SMPSCLKSEL1_VARMCLKSEL_SHIFT 0
+#define AB8500_SMPSCLKSEL1_VAPECLKSEL_MASK 0x38
+#define AB8500_SMPSCLKSEL1_VAPECLKSEL_SHIFT 3
+
+#define AB8500_SMPSCLKSEL2_VMODCLKSEL_MASK 0x07
+#define AB8500_SMPSCLKSEL2_VMODCLKSEL_SHIFT 0
+#define AB8500_SMPSCLKSEL2_VSMPS1CLKSEL_MASK 0x38
+#define AB8500_SMPSCLKSEL2_VSMPS1CLKSEL_SHIFT 3
+
+#define AB8500_SMPSCLKSEL3_VSMPS2CLKSEL_MASK 0x07
+#define AB8500_SMPSCLKSEL3_VSMPS2CLKSEL_SHIFT 0
+#define AB8500_SMPSCLKSEL3_VSMPS3CLKSEL_MASK 0x38
+#define AB8500_SMPSCLKSEL3_VSMPS3CLKSEL_SHIFT 3
+
+#define AB8500_SYSULPCLKCONF_ULPCLKCONF_MASK 0x03
+#define AB8500_SYSULPCLKCONF_ULPCLKCONF_SHIFT 0
+#define AB8500_SYSULPCLKCONF_CLK27MHZSTRE BIT(2)
+#define AB8500_SYSULPCLKCONF_TVOUTCLKDELN BIT(3)
+#define AB8500_SYSULPCLKCONF_TVOUTCLKINV BIT(4)
+#define AB8500_SYSULPCLKCONF_ULPCLKSTRE BIT(5)
+#define AB8500_SYSULPCLKCONF_CLK27MHZBUFENA BIT(6)
+#define AB8500_SYSULPCLKCONF_CLK27MHZPDENA BIT(7)
+
+#define AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_MASK 0x03
+#define AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_SHIFT 0
+#define AB8500_SYSULPCLKCTRL1_ULPCLKREQ BIT(2)
+#define AB8500_SYSULPCLKCTRL1_4500SYSCLKREQ BIT(3)
+#define AB8500_SYSULPCLKCTRL1_AUDIOCLKENA BIT(4)
+#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF2REQ BIT(5)
+#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF3REQ BIT(6)
+#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF4REQ BIT(7)
+
+#define AB8500_SYSCLKCTRL_TVOUTPLLENA BIT(0)
+#define AB8500_SYSCLKCTRL_TVOUTCLKENA BIT(1)
+#define AB8500_SYSCLKCTRL_USBCLKENA BIT(2)
+
+#define AB8500_SYSCLKREQ1VALID_SYSCLKREQ1VALID BIT(0)
+#define AB8500_SYSCLKREQ1VALID_ULPCLKREQ1VALID BIT(1)
+#define AB8500_SYSCLKREQ1VALID_USBSYSCLKREQ1VALID BIT(2)
+
+#define AB8500_SYSTEMCTRLSUP_EXTSUP12LPNCLKSEL_MASK 0x03
+#define AB8500_SYSTEMCTRLSUP_EXTSUP12LPNCLKSEL_SHIFT 0
+#define AB8500_SYSTEMCTRLSUP_EXTSUP3LPNCLKSEL_MASK 0x0C
+#define AB8500_SYSTEMCTRLSUP_EXTSUP3LPNCLKSEL_SHIFT 2
+#define AB8500_SYSTEMCTRLSUP_INTDB8500NOD BIT(4)
+
+#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF4 BIT(4)
+
+#define AB8500_DITHERCLKCTRL_VARMDITHERENA BIT(0)
+#define AB8500_DITHERCLKCTRL_VSMPS3DITHERENA BIT(1)
+#define AB8500_DITHERCLKCTRL_VSMPS1DITHERENA BIT(2)
+#define AB8500_DITHERCLKCTRL_VSMPS2DITHERENA BIT(3)
+#define AB8500_DITHERCLKCTRL_VMODDITHERENA BIT(4)
+#define AB8500_DITHERCLKCTRL_VAPEDITHERENA BIT(5)
+#define AB8500_DITHERCLKCTRL_DITHERDEL_MASK 0xC0
+#define AB8500_DITHERCLKCTRL_DITHERDEL_SHIFT 6
+
+#define AB8500_SWATCTRL_UPDATERF BIT(0)
+#define AB8500_SWATCTRL_SWATENABLE BIT(1)
+#define AB8500_SWATCTRL_RFOFFTIMER_MASK 0x1C
+#define AB8500_SWATCTRL_RFOFFTIMER_SHIFT 2
+#define AB8500_SWATCTRL_SWATBIT5 BIT(6)
+
+#define AB8500_HIQCLKCTRL_SYSCLKREQ1HIQENAVALID BIT(0)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ2HIQENAVALID BIT(1)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ3HIQENAVALID BIT(2)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ4HIQENAVALID BIT(3)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ5HIQENAVALID BIT(4)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ6HIQENAVALID BIT(5)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ7HIQENAVALID BIT(6)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ8HIQENAVALID BIT(7)
+
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ1VALID BIT(0)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ2VALID BIT(1)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ3VALID BIT(2)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ4VALID BIT(3)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ5VALID BIT(4)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ6VALID BIT(5)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ7VALID BIT(6)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ8VALID BIT(7)
+
+#endif /* __AB8500_SYSCTRL_H */
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
new file mode 100644
index 000000000000..838c6b487cc5
--- /dev/null
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
+ */
+#ifndef MFD_AB8500_H
+#define MFD_AB8500_H
+
+#include <linux/device.h>
+
+/*
+ * AB8500 bank addresses
+ */
+#define AB8500_SYS_CTRL1_BLOCK	0x1
+#define AB8500_SYS_CTRL2_BLOCK	0x2
+#define AB8500_REGU_CTRL1	0x3
+#define AB8500_REGU_CTRL2	0x4
+#define AB8500_USB		0x5
+#define AB8500_TVOUT		0x6
+#define AB8500_DBI		0x7
+#define AB8500_ECI_AV_ACC	0x8
+#define AB8500_RESERVED		0x9
+#define AB8500_GPADC		0xA
+#define AB8500_CHARGER		0xB
+#define AB8500_GAS_GAUGE	0xC
+#define AB8500_AUDIO		0xD
+#define AB8500_INTERRUPT	0xE
+#define AB8500_RTC		0xF
+#define AB8500_MISC		0x10
+#define AB8500_DEVELOPMENT	0x11
+#define AB8500_DEBUG		0x12
+#define AB8500_PROD_TEST	0x13
+#define AB8500_OTP_EMUL		0x15
+
+/*
+ * Interrupts
+ */
+
+#define AB8500_INT_MAIN_EXT_CH_NOT_OK	0
+#define AB8500_INT_UN_PLUG_TV_DET	1
+#define AB8500_INT_PLUG_TV_DET		2
+#define AB8500_INT_TEMP_WARM		3
+#define AB8500_INT_PON_KEY2DB_F		4
+#define AB8500_INT_PON_KEY2DB_R		5
+#define AB8500_INT_PON_KEY1DB_F		6
+#define AB8500_INT_PON_KEY1DB_R		7
+#define AB8500_INT_BATT_OVV		8
+#define AB8500_INT_MAIN_CH_UNPLUG_DET	10
+#define AB8500_INT_MAIN_CH_PLUG_DET	11
+#define AB8500_INT_USB_ID_DET_F		12
+#define AB8500_INT_USB_ID_DET_R		13
+#define AB8500_INT_VBUS_DET_F		14
+#define AB8500_INT_VBUS_DET_R		15
+#define AB8500_INT_VBUS_CH_DROP_END	16
+#define AB8500_INT_RTC_60S		17
+#define AB8500_INT_RTC_ALARM		18
+#define AB8500_INT_BAT_CTRL_INDB	20
+#define AB8500_INT_CH_WD_EXP		21
+#define AB8500_INT_VBUS_OVV		22
+#define AB8500_INT_MAIN_CH_DROP_END	23
+#define AB8500_INT_CCN_CONV_ACC		24
+#define AB8500_INT_INT_AUD		25
+#define AB8500_INT_CCEOC		26
+#define AB8500_INT_CC_INT_CALIB		27
+#define AB8500_INT_LOW_BAT_F		28
+#define AB8500_INT_LOW_BAT_R		29
+#define AB8500_INT_BUP_CHG_NOT_OK	30
+#define AB8500_INT_BUP_CHG_OK		31
+#define AB8500_INT_GP_HW_ADC_CONV_END	32
+#define AB8500_INT_ACC_DETECT_1DB_F	33
+#define AB8500_INT_ACC_DETECT_1DB_R	34
+#define AB8500_INT_ACC_DETECT_22DB_F	35
+#define AB8500_INT_ACC_DETECT_22DB_R	36
+#define AB8500_INT_ACC_DETECT_21DB_F	37
+#define AB8500_INT_ACC_DETECT_21DB_R	38
+#define AB8500_INT_GP_SW_ADC_CONV_END	39
+#define AB8500_INT_GPIO6R		40
+#define AB8500_INT_GPIO7R		41
+#define AB8500_INT_GPIO8R		42
+#define AB8500_INT_GPIO9R		43
+#define AB8500_INT_GPIO10R		44
+#define AB8500_INT_GPIO11R		45
+#define AB8500_INT_GPIO12R		46
+#define AB8500_INT_GPIO13R		47
+#define AB8500_INT_GPIO24R		48
+#define AB8500_INT_GPIO25R		49
+#define AB8500_INT_GPIO36R		50
+#define AB8500_INT_GPIO37R		51
+#define AB8500_INT_GPIO38R		52
+#define AB8500_INT_GPIO39R		53
+#define AB8500_INT_GPIO40R		54
+#define AB8500_INT_GPIO41R		55
+#define AB8500_INT_GPIO6F		56
+#define AB8500_INT_GPIO7F		57
+#define AB8500_INT_GPIO8F		58
+#define AB8500_INT_GPIO9F		59
+#define AB8500_INT_GPIO10F		60
+#define AB8500_INT_GPIO11F		61
+#define AB8500_INT_GPIO12F		62
+#define AB8500_INT_GPIO13F		63
+#define AB8500_INT_GPIO24F		64
+#define AB8500_INT_GPIO25F		65
+#define AB8500_INT_GPIO36F		66
+#define AB8500_INT_GPIO37F		67
+#define AB8500_INT_GPIO38F		68
+#define AB8500_INT_GPIO39F		69
+#define AB8500_INT_GPIO40F		70
+#define AB8500_INT_GPIO41F		71
+#define AB8500_INT_ADP_SOURCE_ERROR	72
+#define AB8500_INT_ADP_SINK_ERROR	73
+#define AB8500_INT_ADP_PROBE_PLUG	74
+#define AB8500_INT_ADP_PROBE_UNPLUG	75
+#define AB8500_INT_ADP_SENSE_OFF	76
+#define AB8500_INT_USB_PHY_POWER_ERR	78
+#define AB8500_INT_USB_LINK_STATUS	79
+#define AB8500_INT_BTEMP_LOW		80
+#define AB8500_INT_BTEMP_LOW_MEDIUM	81
+#define AB8500_INT_BTEMP_MEDIUM_HIGH	82
+#define AB8500_INT_BTEMP_HIGH		83
+#define AB8500_INT_USB_CHARGER_NOT_OK	89
+#define AB8500_INT_ID_WAKEUP_R		90
+#define AB8500_INT_ID_DET_R1R		92
+#define AB8500_INT_ID_DET_R2R		93
+#define AB8500_INT_ID_DET_R3R		94
+#define AB8500_INT_ID_DET_R4R		95
+#define AB8500_INT_ID_WAKEUP_F		96
+#define AB8500_INT_ID_DET_R1F		98
+#define AB8500_INT_ID_DET_R2F		99
+#define AB8500_INT_ID_DET_R3F		100
+#define AB8500_INT_ID_DET_R4F		101
+#define AB8500_INT_USB_CHG_DET_DONE	102
+#define AB8500_INT_USB_CH_TH_PROT_F	104
+#define AB8500_INT_USB_CH_TH_PROT_R    105
+#define AB8500_INT_MAIN_CH_TH_PROT_F   106
+#define AB8500_INT_MAIN_CH_TH_PROT_R	107
+#define AB8500_INT_USB_CHARGER_NOT_OKF	111
+
+#define AB8500_NR_IRQS			112
+#define AB8500_NUM_IRQ_REGS		14
+
+/**
+ * struct ab8500 - ab8500 internal structure
+ * @dev: parent device
+ * @lock: read/write operations lock
+ * @irq_lock: genirq bus lock
+ * @irq: irq line
+ * @chip_id: chip revision id
+ * @write: register write
+ * @read: register read
+ * @rx_buf: rx buf for SPI
+ * @tx_buf: tx buf for SPI
+ * @mask: cache of IRQ regs for bus lock
+ * @oldmask: cache of previous IRQ regs for bus lock
+ */
+struct ab8500 {
+	struct device	*dev;
+	struct mutex	lock;
+	struct mutex	irq_lock;
+
+	int		irq_base;
+	int		irq;
+	u8		chip_id;
+
+	int (*write) (struct ab8500 *a8500, u16 addr, u8 data);
+	int (*read) (struct ab8500 *a8500, u16 addr);
+
+	unsigned long	tx_buf[4];
+	unsigned long	rx_buf[4];
+
+	u8 mask[AB8500_NUM_IRQ_REGS];
+	u8 oldmask[AB8500_NUM_IRQ_REGS];
+};
+
+struct regulator_reg_init;
+struct regulator_init_data;
+struct ab8500_gpio_platform_data;
+
+/**
+ * struct ab8500_platform_data - AB8500 platform data
+ * @irq_base: start of AB8500 IRQs, AB8500_NR_IRQS will be used
+ * @init: board-specific initialization after detection of ab8500
+ * @num_regulator_reg_init: number of regulator init registers
+ * @regulator_reg_init: regulator init registers
+ * @num_regulator: number of regulators
+ * @regulator: machine-specific constraints for regulators
+ */
+struct ab8500_platform_data {
+	int irq_base;
+	void (*init) (struct ab8500 *);
+	int num_regulator_reg_init;
+	struct ab8500_regulator_reg_init *regulator_reg_init;
+	int num_regulator;
+	struct regulator_init_data *regulator;
+	struct ab8500_gpio_platform_data *gpio;
+};
+
+extern int __devinit ab8500_init(struct ab8500 *ab8500);
+extern int __devexit ab8500_exit(struct ab8500 *ab8500);
+
+#endif /* MFD_AB8500_H */
-- 
cgit v1.2.3


From 26cc3ab984cd00e95cb58ba5aaea4238ea56c700 Mon Sep 17 00:00:00 2001
From: Igor Grinberg <grinberg@compulab.co.il>
Date: Sun, 13 Nov 2011 11:49:50 +0200
Subject: mfd: Add power off functionality to TWL

TWL family of PMICs, used in master mode, have a power off
functionality. The resulting power off sequence shuts down all the SoC
supplies, LDOs, etc. The sequence is described in the datasheets
chapter "Power-Off Sequence".
Note, that board must be wired correctly for the power off to work as
expected.

Signed-off-by: Igor Grinberg <grinberg@compulab.co.il>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-power.c | 42 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/i2c/twl.h     |  2 ++
 2 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index a764676f0922..d905f5171153 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -34,7 +34,8 @@
 static u8 twl4030_start_script_address = 0x2b;
 
 #define PWR_P1_SW_EVENTS	0x10
-#define PWR_DEVOFF	(1<<0)
+#define PWR_DEVOFF		(1 << 0)
+#define SEQ_OFFSYNC		(1 << 0)
 
 #define PHY_TO_OFF_PM_MASTER(p)		(p - 0x36)
 #define PHY_TO_OFF_PM_RECEIVER(p)	(p - 0x5b)
@@ -511,12 +512,27 @@ int twl4030_remove_script(u8 flags)
 	return err;
 }
 
+/*
+ * In master mode, start the power off sequence.
+ * After a successful execution, TWL shuts down the power to the SoC
+ * and all peripherals connected to it.
+ */
+void twl4030_power_off(void)
+{
+	int err;
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, PWR_DEVOFF,
+			       TWL4030_PM_MASTER_P1_SW_EVENTS);
+	if (err)
+		pr_err("TWL4030 Unable to power off\n");
+}
+
 void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 {
 	int err = 0;
 	int i;
 	struct twl4030_resconfig *resconfig;
-	u8 address = twl4030_start_script_address;
+	u8 val, address = twl4030_start_script_address;
 
 	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
 			TWL4030_PM_MASTER_KEY_CFG1,
@@ -548,6 +564,28 @@ void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 		}
 	}
 
+	/* Board has to be wired properly to use this feature */
+	if (twl4030_scripts->use_poweroff && !pm_power_off) {
+		/* Default for SEQ_OFFSYNC is set, lets ensure this */
+		err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &val,
+				      TWL4030_PM_MASTER_CFG_P123_TRANSITION);
+		if (err) {
+			pr_warning("TWL4030 Unable to read registers\n");
+
+		} else if (!(val & SEQ_OFFSYNC)) {
+			val |= SEQ_OFFSYNC;
+			err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, val,
+					TWL4030_PM_MASTER_CFG_P123_TRANSITION);
+			if (err) {
+				pr_err("TWL4030 Unable to setup SEQ_OFFSYNC\n");
+				goto relock;
+			}
+		}
+
+		pm_power_off = twl4030_power_off;
+	}
+
+relock:
 	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0,
 			TWL4030_PM_MASTER_PROTECT_KEY);
 	if (err)
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 114c0f6fc63d..78d3465251d6 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -652,10 +652,12 @@ struct twl4030_power_data {
 	unsigned num;
 	struct twl4030_resconfig *resource_config;
 #define TWL4030_RESCONFIG_UNDEF	((u8)-1)
+	bool use_poweroff;	/* Board is wired for TWL poweroff */
 };
 
 extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
 extern int twl4030_remove_script(u8 flags);
+extern void twl4030_power_off(void);
 
 struct twl4030_codec_data {
 	unsigned int digimic_delay; /* in ms */
-- 
cgit v1.2.3


From f6dd2db940a1a0c6b9f7112109115c8243ba752b Mon Sep 17 00:00:00 2001
From: Donggeun Kim <dg77.kim@samsung.com>
Date: Wed, 14 Dec 2011 18:23:55 +0900
Subject: mfd: Add platform data and devices for MAX8997 LED control

MAX8997 device does not support LED control function of it.
To enable MAX8997-LED driver, platform data and devices for LED are updated.

Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/max8997.c       |  3 ++-
 include/linux/mfd/max8997.h | 25 ++++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c
index 5be53ae9b61c..cb83a7ab53e7 100644
--- a/drivers/mfd/max8997.c
+++ b/drivers/mfd/max8997.c
@@ -43,7 +43,8 @@ static struct mfd_cell max8997_devs[] = {
 	{ .name = "max8997-battery", },
 	{ .name = "max8997-haptic", },
 	{ .name = "max8997-muic", },
-	{ .name = "max8997-flash", },
+	{ .name = "max8997-led", .id = 1 },
+	{ .name = "max8997-led", .id = 2 },
 };
 
 int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest)
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 49d2a0bfd7fe..fff590521e50 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -131,6 +131,28 @@ struct max8997_muic_platform_data {
 	int num_init_data;
 };
 
+enum max8997_led_mode {
+	MAX8997_NONE,
+	MAX8997_FLASH_MODE,
+	MAX8997_MOVIE_MODE,
+	MAX8997_FLASH_PIN_CONTROL_MODE,
+	MAX8997_MOVIE_PIN_CONTROL_MODE,
+};
+
+/**
+ *  struct max8997_led_platform_data
+ *  The number of LED devices for MAX8997 is two
+ *  @mode: LED mode for each LED device
+ *  @brightness: initial brightness for each LED device
+ *	range:
+ *	[0 - 31]: MAX8997_FLASH_MODE and MAX8997_FLASH_PIN_CONTROL_MODE
+ *	[0 - 15]: MAX8997_MOVIE_MODE and MAX8997_MOVIE_PIN_CONTROL_MODE
+ */
+struct max8997_led_platform_data {
+	enum max8997_led_mode mode[2];
+	u8 brightness[2];
+};
+
 struct max8997_platform_data {
 	/* IRQ */
 	int irq_base;
@@ -172,7 +194,8 @@ struct max8997_platform_data {
 
 	/* HAPTIC: Not implemented */
 	/* RTC: Not implemented */
-	/* Flash: Not implemented */
+	/* ---- LED ---- */
+	struct max8997_led_platform_data *led_pdata;
 };
 
 #endif /* __LINUX_MFD_MAX8998_H */
-- 
cgit v1.2.3


From 1a6e4b7415339e3b11a87cff0d701b8a2e55f062 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 17 Nov 2011 11:02:20 +0530
Subject: mfd: Separate out STMPE controller and interface specific code

Few STMPE controller can have register interface over SPI or I2C. Current
implementation only supports I2C and all code is present in a single file
stmpe.c. It would be better to separate out I2C interface specific code from
controller specific code. Later SPI specific code can be added in a separate
file.

This patch separates out I2C and controller specific code into separate files,
making stmpe.c independent of I2C.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig       |  11 ++++
 drivers/mfd/Makefile      |   1 +
 drivers/mfd/stmpe-i2c.c   | 107 +++++++++++++++++++++++++++++++++++++
 drivers/mfd/stmpe.c       | 133 +++++++++++++++-------------------------------
 drivers/mfd/stmpe.h       |  33 ++++++++++++
 include/linux/mfd/stmpe.h |   7 ++-
 6 files changed, 200 insertions(+), 92 deletions(-)
 create mode 100644 drivers/mfd/stmpe-i2c.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 08a3e087bcea..7bc55819ab4a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -279,6 +279,17 @@ config MFD_STMPE
 		Keypad: stmpe-keypad
 		Touchscreen: stmpe-ts
 
+menu "STMPE Interface Drivers"
+depends on MFD_STMPE
+
+config STMPE_I2C
+	bool "STMPE I2C Inteface"
+	depends on I2C
+	default y
+	help
+	  This is used to enable I2C interface of STMPE
+endmenu
+
 config MFD_TC3589X
 	bool "Support Toshiba TC35892 and variants"
 	depends on I2C=y && GENERIC_HARDIRQS
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index b2292eb75242..5eb90a70c1a5 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 obj-$(CONFIG_MFD_TI_SSP)	+= ti-ssp.o
 
 obj-$(CONFIG_MFD_STMPE)		+= stmpe.o
+obj-$(CONFIG_STMPE_I2C)		+= stmpe-i2c.o
 obj-$(CONFIG_MFD_TC3589X)	+= tc3589x.o
 obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
 obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o tmio_core.o
diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
new file mode 100644
index 000000000000..0a4365902e36
--- /dev/null
+++ b/drivers/mfd/stmpe-i2c.c
@@ -0,0 +1,107 @@
+/*
+ * ST Microelectronics MFD: stmpe's i2c client specific driver
+ *
+ * Copyright (C) ST-Ericsson SA 2010
+ * Copyright (C) ST Microelectronics SA 2011
+ *
+ * License Terms: GNU General Public License, version 2
+ * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
+ * Author: Viresh Kumar <viresh.kumar@st.com> for ST Microelectronics
+ */
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include "stmpe.h"
+
+static int i2c_reg_read(struct stmpe *stmpe, u8 reg)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_read_byte_data(i2c, reg);
+}
+
+static int i2c_reg_write(struct stmpe *stmpe, u8 reg, u8 val)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_write_byte_data(i2c, reg, val);
+}
+
+static int i2c_block_read(struct stmpe *stmpe, u8 reg, u8 length, u8 *values)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_read_i2c_block_data(i2c, reg, length, values);
+}
+
+static int i2c_block_write(struct stmpe *stmpe, u8 reg, u8 length,
+		const u8 *values)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_write_i2c_block_data(i2c, reg, length, values);
+}
+
+static struct stmpe_client_info i2c_ci = {
+	.read_byte = i2c_reg_read,
+	.write_byte = i2c_reg_write,
+	.read_block = i2c_block_read,
+	.write_block = i2c_block_write,
+};
+
+static int __devinit
+stmpe_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
+{
+	i2c_ci.data = (void *)id;
+	i2c_ci.irq = i2c->irq;
+	i2c_ci.client = i2c;
+	i2c_ci.dev = &i2c->dev;
+
+	return stmpe_probe(&i2c_ci, id->driver_data);
+}
+
+static int __devexit stmpe_i2c_remove(struct i2c_client *i2c)
+{
+	struct stmpe *stmpe = dev_get_drvdata(&i2c->dev);
+
+	return stmpe_remove(stmpe);
+}
+
+static const struct i2c_device_id stmpe_i2c_id[] = {
+	{ "stmpe811", STMPE811 },
+	{ "stmpe1601", STMPE1601 },
+	{ "stmpe2401", STMPE2401 },
+	{ "stmpe2403", STMPE2403 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, stmpe_id);
+
+static struct i2c_driver stmpe_i2c_driver = {
+	.driver.name	= "stmpe-i2c",
+	.driver.owner	= THIS_MODULE,
+#ifdef CONFIG_PM
+	.driver.pm	= &stmpe_dev_pm_ops,
+#endif
+	.probe		= stmpe_i2c_probe,
+	.remove		= __devexit_p(stmpe_i2c_remove),
+	.id_table	= stmpe_i2c_id,
+};
+
+static int __init stmpe_init(void)
+{
+	return i2c_add_driver(&stmpe_i2c_driver);
+}
+subsys_initcall(stmpe_init);
+
+static void __exit stmpe_exit(void)
+{
+	i2c_del_driver(&stmpe_i2c_driver);
+}
+module_exit(stmpe_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("STMPE MFD I2C Interface Driver");
+MODULE_AUTHOR("Rabin Vincent <rabin.vincent@stericsson.com>");
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 39efa629a19d..83bacde6a7cb 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -1,4 +1,6 @@
 /*
+ * ST Microelectronics MFD: stmpe's driver
+ *
  * Copyright (C) ST-Ericsson SA 2010
  *
  * License Terms: GNU General Public License, version 2
@@ -7,13 +9,11 @@
 
 #include <linux/gpio.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/pm.h>
 #include <linux/slab.h>
-#include <linux/i2c.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/stmpe.h>
 #include "stmpe.h"
 
 static int __stmpe_enable(struct stmpe *stmpe, unsigned int blocks)
@@ -30,10 +30,9 @@ static int __stmpe_reg_read(struct stmpe *stmpe, u8 reg)
 {
 	int ret;
 
-	ret = i2c_smbus_read_byte_data(stmpe->i2c, reg);
+	ret = stmpe->ci->read_byte(stmpe, reg);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to read reg %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to read reg %#x: %d\n", reg, ret);
 
 	dev_vdbg(stmpe->dev, "rd: reg %#x => data %#x\n", reg, ret);
 
@@ -46,10 +45,9 @@ static int __stmpe_reg_write(struct stmpe *stmpe, u8 reg, u8 val)
 
 	dev_vdbg(stmpe->dev, "wr: reg %#x <= %#x\n", reg, val);
 
-	ret = i2c_smbus_write_byte_data(stmpe->i2c, reg, val);
+	ret = stmpe->ci->write_byte(stmpe, reg, val);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to write reg %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to write reg %#x: %d\n", reg, ret);
 
 	return ret;
 }
@@ -73,10 +71,9 @@ static int __stmpe_block_read(struct stmpe *stmpe, u8 reg, u8 length,
 {
 	int ret;
 
-	ret = i2c_smbus_read_i2c_block_data(stmpe->i2c, reg, length, values);
+	ret = stmpe->ci->read_block(stmpe, reg, length, values);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to read regs %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to read regs %#x: %d\n", reg, ret);
 
 	dev_vdbg(stmpe->dev, "rd: reg %#x (%d) => ret %#x\n", reg, length, ret);
 	stmpe_dump_bytes("stmpe rd: ", values, length);
@@ -92,11 +89,9 @@ static int __stmpe_block_write(struct stmpe *stmpe, u8 reg, u8 length,
 	dev_vdbg(stmpe->dev, "wr: regs %#x (%d)\n", reg, length);
 	stmpe_dump_bytes("stmpe wr: ", values, length);
 
-	ret = i2c_smbus_write_i2c_block_data(stmpe->i2c, reg, length,
-					     values);
+	ret = stmpe->ci->write_block(stmpe, reg, length, values);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to write regs %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to write regs %#x: %d\n", reg, ret);
 
 	return ret;
 }
@@ -874,34 +869,10 @@ static int __devinit stmpe_devices_init(struct stmpe *stmpe)
 	return ret;
 }
 
-#ifdef CONFIG_PM
-static int stmpe_suspend(struct device *dev)
-{
-	struct i2c_client *i2c = to_i2c_client(dev);
-	struct stmpe *stmpe = i2c_get_clientdata(i2c);
-
-	if (device_may_wakeup(&i2c->dev))
-		enable_irq_wake(stmpe->irq);
-
-	return 0;
-}
-
-static int stmpe_resume(struct device *dev)
+/* Called from client specific probe routines */
+int stmpe_probe(struct stmpe_client_info *ci, int partnum)
 {
-	struct i2c_client *i2c = to_i2c_client(dev);
-	struct stmpe *stmpe = i2c_get_clientdata(i2c);
-
-	if (device_may_wakeup(&i2c->dev))
-		disable_irq_wake(stmpe->irq);
-
-	return 0;
-}
-#endif
-
-static int __devinit stmpe_probe(struct i2c_client *i2c,
-				 const struct i2c_device_id *id)
-{
-	struct stmpe_platform_data *pdata = i2c->dev.platform_data;
+	struct stmpe_platform_data *pdata = dev_get_platdata(ci->dev);
 	struct stmpe *stmpe;
 	int ret;
 
@@ -915,18 +886,19 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 	mutex_init(&stmpe->irq_lock);
 	mutex_init(&stmpe->lock);
 
-	stmpe->dev = &i2c->dev;
-	stmpe->i2c = i2c;
-
+	stmpe->dev = ci->dev;
+	stmpe->client = ci->client;
 	stmpe->pdata = pdata;
 	stmpe->irq_base = pdata->irq_base;
-
-	stmpe->partnum = id->driver_data;
-	stmpe->variant = stmpe_variant_info[stmpe->partnum];
+	stmpe->ci = ci;
+	stmpe->partnum = partnum;
+	stmpe->variant = stmpe_variant_info[partnum];
 	stmpe->regs = stmpe->variant->regs;
 	stmpe->num_gpios = stmpe->variant->num_gpios;
+	dev_set_drvdata(stmpe->dev, stmpe);
 
-	i2c_set_clientdata(i2c, stmpe);
+	if (ci->init)
+		ci->init(stmpe);
 
 	if (pdata->irq_over_gpio) {
 		ret = gpio_request_one(pdata->irq_gpio, GPIOF_DIR_IN, "stmpe");
@@ -938,7 +910,7 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 
 		stmpe->irq = gpio_to_irq(pdata->irq_gpio);
 	} else {
-		stmpe->irq = i2c->irq;
+		stmpe->irq = ci->irq;
 	}
 
 	ret = stmpe_chip_init(stmpe);
@@ -950,8 +922,7 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 		goto free_gpio;
 
 	ret = request_threaded_irq(stmpe->irq, NULL, stmpe_irq,
-				   pdata->irq_trigger | IRQF_ONESHOT,
-				   "stmpe", stmpe);
+			pdata->irq_trigger | IRQF_ONESHOT, "stmpe", stmpe);
 	if (ret) {
 		dev_err(stmpe->dev, "failed to request IRQ: %d\n", ret);
 		goto out_removeirq;
@@ -978,10 +949,8 @@ out_free:
 	return ret;
 }
 
-static int __devexit stmpe_remove(struct i2c_client *client)
+int stmpe_remove(struct stmpe *stmpe)
 {
-	struct stmpe *stmpe = i2c_get_clientdata(client);
-
 	mfd_remove_devices(stmpe->dev);
 
 	free_irq(stmpe->irq, stmpe);
@@ -995,45 +964,29 @@ static int __devexit stmpe_remove(struct i2c_client *client)
 	return 0;
 }
 
-static const struct i2c_device_id stmpe_id[] = {
-	{ "stmpe811", STMPE811 },
-	{ "stmpe1601", STMPE1601 },
-	{ "stmpe2401", STMPE2401 },
-	{ "stmpe2403", STMPE2403 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, stmpe_id);
-
 #ifdef CONFIG_PM
-static const struct dev_pm_ops stmpe_dev_pm_ops = {
-	.suspend	= stmpe_suspend,
-	.resume		= stmpe_resume,
-};
-#endif
+static int stmpe_suspend(struct device *dev)
+{
+	struct stmpe *stmpe = dev_get_drvdata(dev);
 
-static struct i2c_driver stmpe_driver = {
-	.driver.name	= "stmpe",
-	.driver.owner	= THIS_MODULE,
-#ifdef CONFIG_PM
-	.driver.pm	= &stmpe_dev_pm_ops,
-#endif
-	.probe		= stmpe_probe,
-	.remove		= __devexit_p(stmpe_remove),
-	.id_table	= stmpe_id,
-};
+	if (device_may_wakeup(dev))
+		enable_irq_wake(stmpe->irq);
 
-static int __init stmpe_init(void)
-{
-	return i2c_add_driver(&stmpe_driver);
+	return 0;
 }
-subsys_initcall(stmpe_init);
 
-static void __exit stmpe_exit(void)
+static int stmpe_resume(struct device *dev)
 {
-	i2c_del_driver(&stmpe_driver);
+	struct stmpe *stmpe = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev))
+		disable_irq_wake(stmpe->irq);
+
+	return 0;
 }
-module_exit(stmpe_exit);
 
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("STMPE MFD core driver");
-MODULE_AUTHOR("Rabin Vincent <rabin.vincent@stericsson.com>");
+const struct dev_pm_ops stmpe_dev_pm_ops = {
+	.suspend	= stmpe_suspend,
+	.resume		= stmpe_resume,
+};
+#endif
diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h
index e4ee38956583..18d89a68ce40 100644
--- a/drivers/mfd/stmpe.h
+++ b/drivers/mfd/stmpe.h
@@ -8,6 +8,14 @@
 #ifndef __STMPE_H
 #define __STMPE_H
 
+#include <linux/device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/stmpe.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+
+extern const struct dev_pm_ops stmpe_dev_pm_ops;
+
 #ifdef STMPE_DUMP_BYTES
 static inline void stmpe_dump_bytes(const char *str, const void *buf,
 				    size_t len)
@@ -67,6 +75,31 @@ struct stmpe_variant_info {
 	int (*enable_autosleep)(struct stmpe *stmpe, int autosleep_timeout);
 };
 
+/**
+ * struct stmpe_client_info - i2c or spi specific routines/info
+ * @data: client specific data
+ * @read_byte: read single byte
+ * @write_byte: write single byte
+ * @read_block: read block or multiple bytes
+ * @write_block: write block or multiple bytes
+ * @init: client init routine, called during probe
+ */
+struct stmpe_client_info {
+	void *data;
+	int irq;
+	void *client;
+	struct device *dev;
+	int (*read_byte)(struct stmpe *stmpe, u8 reg);
+	int (*write_byte)(struct stmpe *stmpe, u8 reg, u8 val);
+	int (*read_block)(struct stmpe *stmpe, u8 reg, u8 len, u8 *values);
+	int (*write_block)(struct stmpe *stmpe, u8 reg, u8 len,
+			const u8 *values);
+	void (*init)(struct stmpe *stmpe);
+};
+
+int stmpe_probe(struct stmpe_client_info *ci, int partnum);
+int stmpe_remove(struct stmpe *stmpe);
+
 #define STMPE_ICR_LSB_HIGH	(1 << 2)
 #define STMPE_ICR_LSB_EDGE	(1 << 1)
 #define STMPE_ICR_LSB_GIM	(1 << 0)
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 270d6613aadf..babc6b2857d3 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -50,13 +50,15 @@ enum {
 
 
 struct stmpe_variant_info;
+struct stmpe_client_info;
 
 /**
  * struct stmpe - STMPE MFD structure
  * @lock: lock protecting I/O operations
  * @irq_lock: IRQ bus lock
  * @dev: device, mostly for dev_dbg()
- * @i2c: i2c client
+ * @client: client - i2c or spi
+ * @ci: client specific information
  * @partnum: part number
  * @variant: the detected STMPE model number
  * @regs: list of addresses of registers which are at different addresses on
@@ -72,7 +74,8 @@ struct stmpe {
 	struct mutex lock;
 	struct mutex irq_lock;
 	struct device *dev;
-	struct i2c_client *i2c;
+	void *client;
+	struct stmpe_client_info *ci;
 	enum stmpe_partnum partnum;
 	struct stmpe_variant_info *variant;
 	const u8 *regs;
-- 
cgit v1.2.3


From 1cda2394e95415f1469ab8eaffd081395e112551 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 17 Nov 2011 11:02:22 +0530
Subject: mfd: Add support for stmpe variant 610

STMPE610 is very much like STMPE811, except the number of gpio pins, which is 8
in 811 and 6 in 610. This patch adds support for variant 610. STMPE610 will
share most of the code with STMPE811.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe-i2c.c   |  1 +
 drivers/mfd/stmpe-spi.c   |  1 +
 drivers/mfd/stmpe.c       | 20 ++++++++++++++++++--
 include/linux/mfd/stmpe.h |  1 +
 4 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
index 0a4365902e36..b11d33b1c892 100644
--- a/drivers/mfd/stmpe-i2c.c
+++ b/drivers/mfd/stmpe-i2c.c
@@ -71,6 +71,7 @@ static int __devexit stmpe_i2c_remove(struct i2c_client *i2c)
 }
 
 static const struct i2c_device_id stmpe_i2c_id[] = {
+	{ "stmpe610", STMPE610 },
 	{ "stmpe811", STMPE811 },
 	{ "stmpe1601", STMPE1601 },
 	{ "stmpe2401", STMPE2401 },
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index 53efce4fe294..46963a5d569f 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -109,6 +109,7 @@ static int __devexit stmpe_spi_remove(struct spi_device *spi)
 }
 
 static const struct spi_device_id stmpe_spi_id[] = {
+	{ "stmpe610", STMPE610 },
 	{ "stmpe811", STMPE811 },
 	{ "stmpe1601", STMPE1601 },
 	{ "stmpe2401", STMPE2401 },
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 83bacde6a7cb..67ff3dc5bb45 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -321,7 +321,7 @@ static struct mfd_cell stmpe_keypad_cell = {
 };
 
 /*
- * Touchscreen (STMPE811)
+ * Touchscreen (STMPE811 or STMPE610)
  */
 
 static struct resource stmpe_ts_resources[] = {
@@ -346,7 +346,7 @@ static struct mfd_cell stmpe_ts_cell = {
 };
 
 /*
- * STMPE811
+ * STMPE811 or STMPE610
  */
 
 static const u8 stmpe811_regs[] = {
@@ -417,6 +417,21 @@ static struct stmpe_variant_info stmpe811 = {
 	.get_altfunc	= stmpe811_get_altfunc,
 };
 
+/* Similar to 811, except number of gpios */
+static struct stmpe_variant_info stmpe610 = {
+	.name		= "stmpe610",
+	.id_val		= 0x0811,
+	.id_mask	= 0xffff,
+	.num_gpios	= 6,
+	.af_bits	= 1,
+	.regs		= stmpe811_regs,
+	.blocks		= stmpe811_blocks,
+	.num_blocks	= ARRAY_SIZE(stmpe811_blocks),
+	.num_irqs	= STMPE811_NR_INTERNAL_IRQS,
+	.enable		= stmpe811_enable,
+	.get_altfunc	= stmpe811_get_altfunc,
+};
+
 /*
  * STMPE1601
  */
@@ -651,6 +666,7 @@ static struct stmpe_variant_info stmpe2403 = {
 };
 
 static struct stmpe_variant_info *stmpe_variant_info[] = {
+	[STMPE610]	= &stmpe610,
 	[STMPE811]	= &stmpe811,
 	[STMPE1601]	= &stmpe1601,
 	[STMPE2401]	= &stmpe2401,
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index babc6b2857d3..342005afd347 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -20,6 +20,7 @@ enum stmpe_block {
 };
 
 enum stmpe_partnum {
+	STMPE610,
 	STMPE811,
 	STMPE1601,
 	STMPE2401,
-- 
cgit v1.2.3


From 7f7f4ea15ef4645f3888310a7a761fc2c4f689c9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 17 Nov 2011 11:02:23 +0530
Subject: mfd: Add support for stmpe variant 801

STMPE801 is a GPIO expander. Registers for 801 are much different from other
variants. This patch adds support for STMPE801 in stmpe mfd driver.

Signed-off-by: Bhupesh Sharma <bhupesh.sharma@st.com>
Signed-off-by: Pratyush Anand <pratyush.anand@st.com>
Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe-i2c.c   |  1 +
 drivers/mfd/stmpe-spi.c   |  1 +
 drivers/mfd/stmpe.c       | 97 ++++++++++++++++++++++++++++++++++++++++-------
 drivers/mfd/stmpe.h       | 19 ++++++++++
 include/linux/mfd/stmpe.h |  1 +
 5 files changed, 106 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
index b11d33b1c892..373f423b1181 100644
--- a/drivers/mfd/stmpe-i2c.c
+++ b/drivers/mfd/stmpe-i2c.c
@@ -72,6 +72,7 @@ static int __devexit stmpe_i2c_remove(struct i2c_client *i2c)
 
 static const struct i2c_device_id stmpe_i2c_id[] = {
 	{ "stmpe610", STMPE610 },
+	{ "stmpe801", STMPE801 },
 	{ "stmpe811", STMPE811 },
 	{ "stmpe1601", STMPE1601 },
 	{ "stmpe2401", STMPE2401 },
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index 46963a5d569f..b58c43c7ea93 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -110,6 +110,7 @@ static int __devexit stmpe_spi_remove(struct spi_device *spi)
 
 static const struct spi_device_id stmpe_spi_id[] = {
 	{ "stmpe610", STMPE610 },
+	{ "stmpe801", STMPE801 },
 	{ "stmpe811", STMPE811 },
 	{ "stmpe1601", STMPE1601 },
 	{ "stmpe2401", STMPE2401 },
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 67ff3dc5bb45..fc2c6afb31e1 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -241,12 +241,14 @@ int stmpe_set_altfunc(struct stmpe *stmpe, u32 pins, enum stmpe_block block)
 	u8 regaddr = stmpe->regs[STMPE_IDX_GPAFR_U_MSB];
 	int af_bits = variant->af_bits;
 	int numregs = DIV_ROUND_UP(stmpe->num_gpios * af_bits, 8);
-	int afperreg = 8 / af_bits;
 	int mask = (1 << af_bits) - 1;
 	u8 regs[numregs];
-	int af;
-	int ret;
+	int af, afperreg, ret;
+
+	if (!variant->get_altfunc)
+		return 0;
 
+	afperreg = 8 / af_bits;
 	mutex_lock(&stmpe->lock);
 
 	ret = __stmpe_enable(stmpe, STMPE_BLOCK_GPIO);
@@ -320,6 +322,50 @@ static struct mfd_cell stmpe_keypad_cell = {
 	.num_resources	= ARRAY_SIZE(stmpe_keypad_resources),
 };
 
+/*
+ * STMPE801
+ */
+static const u8 stmpe801_regs[] = {
+	[STMPE_IDX_CHIP_ID]	= STMPE801_REG_CHIP_ID,
+	[STMPE_IDX_ICR_LSB]	= STMPE801_REG_SYS_CTRL,
+	[STMPE_IDX_GPMR_LSB]	= STMPE801_REG_GPIO_MP_STA,
+	[STMPE_IDX_GPSR_LSB]	= STMPE801_REG_GPIO_SET_PIN,
+	[STMPE_IDX_GPCR_LSB]	= STMPE801_REG_GPIO_SET_PIN,
+	[STMPE_IDX_GPDR_LSB]	= STMPE801_REG_GPIO_DIR,
+	[STMPE_IDX_IEGPIOR_LSB] = STMPE801_REG_GPIO_INT_EN,
+	[STMPE_IDX_ISGPIOR_MSB] = STMPE801_REG_GPIO_INT_STA,
+
+};
+
+static struct stmpe_variant_block stmpe801_blocks[] = {
+	{
+		.cell	= &stmpe_gpio_cell,
+		.irq	= 0,
+		.block	= STMPE_BLOCK_GPIO,
+	},
+};
+
+static int stmpe801_enable(struct stmpe *stmpe, unsigned int blocks,
+			   bool enable)
+{
+	if (blocks & STMPE_BLOCK_GPIO)
+		return 0;
+	else
+		return -EINVAL;
+}
+
+static struct stmpe_variant_info stmpe801 = {
+	.name		= "stmpe801",
+	.id_val		= STMPE801_ID,
+	.id_mask	= 0xffff,
+	.num_gpios	= 8,
+	.regs		= stmpe801_regs,
+	.blocks		= stmpe801_blocks,
+	.num_blocks	= ARRAY_SIZE(stmpe801_blocks),
+	.num_irqs	= STMPE801_NR_INTERNAL_IRQS,
+	.enable		= stmpe801_enable,
+};
+
 /*
  * Touchscreen (STMPE811 or STMPE610)
  */
@@ -667,6 +713,7 @@ static struct stmpe_variant_info stmpe2403 = {
 
 static struct stmpe_variant_info *stmpe_variant_info[] = {
 	[STMPE610]	= &stmpe610,
+	[STMPE801]	= &stmpe801,
 	[STMPE811]	= &stmpe811,
 	[STMPE1601]	= &stmpe1601,
 	[STMPE2401]	= &stmpe2401,
@@ -683,6 +730,11 @@ static irqreturn_t stmpe_irq(int irq, void *data)
 	int ret;
 	int i;
 
+	if (variant->id_val == STMPE801_ID) {
+		handle_nested_irq(stmpe->irq_base);
+		return IRQ_HANDLED;
+	}
+
 	ret = stmpe_block_read(stmpe, israddr, num, isr);
 	if (ret < 0)
 		return IRQ_NONE;
@@ -769,14 +821,17 @@ static struct irq_chip stmpe_irq_chip = {
 
 static int __devinit stmpe_irq_init(struct stmpe *stmpe)
 {
+	struct irq_chip *chip = NULL;
 	int num_irqs = stmpe->variant->num_irqs;
 	int base = stmpe->irq_base;
 	int irq;
 
+	if (stmpe->variant->id_val != STMPE801_ID)
+		chip = &stmpe_irq_chip;
+
 	for (irq = base; irq < base + num_irqs; irq++) {
 		irq_set_chip_data(irq, stmpe);
-		irq_set_chip_and_handler(irq, &stmpe_irq_chip,
-					 handle_edge_irq);
+		irq_set_chip_and_handler(irq, chip, handle_edge_irq);
 		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
@@ -808,7 +863,7 @@ static int __devinit stmpe_chip_init(struct stmpe *stmpe)
 	unsigned int irq_trigger = stmpe->pdata->irq_trigger;
 	int autosleep_timeout = stmpe->pdata->autosleep_timeout;
 	struct stmpe_variant_info *variant = stmpe->variant;
-	u8 icr = STMPE_ICR_LSB_GIM;
+	u8 icr;
 	unsigned int id;
 	u8 data[2];
 	int ret;
@@ -831,16 +886,32 @@ static int __devinit stmpe_chip_init(struct stmpe *stmpe)
 	if (ret)
 		return ret;
 
-	if (irq_trigger == IRQF_TRIGGER_FALLING ||
-	    irq_trigger == IRQF_TRIGGER_RISING)
-		icr |= STMPE_ICR_LSB_EDGE;
+	if (id == STMPE801_ID)
+		icr = STMPE801_REG_SYS_CTRL_INT_EN;
+	else
+		icr = STMPE_ICR_LSB_GIM;
+
+	/* STMPE801 doesn't support Edge interrupts */
+	if (id != STMPE801_ID) {
+		if (irq_trigger == IRQF_TRIGGER_FALLING ||
+				irq_trigger == IRQF_TRIGGER_RISING)
+			icr |= STMPE_ICR_LSB_EDGE;
+	}
 
 	if (irq_trigger == IRQF_TRIGGER_RISING ||
-	    irq_trigger == IRQF_TRIGGER_HIGH)
-		icr |= STMPE_ICR_LSB_HIGH;
+			irq_trigger == IRQF_TRIGGER_HIGH) {
+		if (id == STMPE801_ID)
+			icr |= STMPE801_REG_SYS_CTRL_INT_HI;
+		else
+			icr |= STMPE_ICR_LSB_HIGH;
+	}
 
-	if (stmpe->pdata->irq_invert_polarity)
-		icr ^= STMPE_ICR_LSB_HIGH;
+	if (stmpe->pdata->irq_invert_polarity) {
+		if (id == STMPE801_ID)
+			icr ^= STMPE801_REG_SYS_CTRL_INT_HI;
+		else
+			icr ^= STMPE_ICR_LSB_HIGH;
+	}
 
 	if (stmpe->pdata->autosleep) {
 		ret = stmpe_autosleep(stmpe, autosleep_timeout);
diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h
index a73f4c1085f2..7b8e13f5b764 100644
--- a/drivers/mfd/stmpe.h
+++ b/drivers/mfd/stmpe.h
@@ -104,6 +104,25 @@ int stmpe_remove(struct stmpe *stmpe);
 #define STMPE_ICR_LSB_EDGE	(1 << 1)
 #define STMPE_ICR_LSB_GIM	(1 << 0)
 
+/*
+ * STMPE801
+ */
+#define STMPE801_ID			0x0108
+#define STMPE801_NR_INTERNAL_IRQS	1
+
+#define STMPE801_REG_CHIP_ID		0x00
+#define STMPE801_REG_VERSION_ID		0x02
+#define STMPE801_REG_SYS_CTRL		0x04
+#define STMPE801_REG_GPIO_INT_EN	0x08
+#define STMPE801_REG_GPIO_INT_STA	0x09
+#define STMPE801_REG_GPIO_MP_STA	0x10
+#define STMPE801_REG_GPIO_SET_PIN	0x11
+#define STMPE801_REG_GPIO_DIR		0x12
+
+#define STMPE801_REG_SYS_CTRL_RESET	(1 << 7)
+#define STMPE801_REG_SYS_CTRL_INT_EN	(1 << 2)
+#define STMPE801_REG_SYS_CTRL_INT_HI	(1 << 0)
+
 /*
  * STMPE811
  */
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 342005afd347..ca1d7a347600 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -21,6 +21,7 @@ enum stmpe_block {
 
 enum stmpe_partnum {
 	STMPE610,
+	STMPE801,
 	STMPE811,
 	STMPE1601,
 	STMPE2401,
-- 
cgit v1.2.3


From 0f5f70783eddde2bd277ae521fa04226cb1e249d Mon Sep 17 00:00:00 2001
From: Sangbeom Kim <sbkim73@samsung.com>
Date: Fri, 23 Dec 2011 17:28:08 +0900
Subject: mfd: Add S5M core driver

S5M series are pmic including mutiple functional devices.
It can support PMIC, RTC, Battery charger, codec.
This patch implement core driver for s5m series.

Signed-off-by: Sangbeom Kim <sbkim73@samsung.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/s5m-core.c               | 176 +++++++++++++++++
 include/linux/mfd/s5m87xx/s5m-core.h | 373 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/s5m87xx/s5m-pmic.h | 100 ++++++++++
 include/linux/mfd/s5m87xx/s5m-rtc.h  |  84 ++++++++
 4 files changed, 733 insertions(+)
 create mode 100644 drivers/mfd/s5m-core.c
 create mode 100644 include/linux/mfd/s5m87xx/s5m-core.h
 create mode 100644 include/linux/mfd/s5m87xx/s5m-pmic.h
 create mode 100644 include/linux/mfd/s5m87xx/s5m-rtc.h

(limited to 'include/linux')

diff --git a/drivers/mfd/s5m-core.c b/drivers/mfd/s5m-core.c
new file mode 100644
index 000000000000..e075c113eec6
--- /dev/null
+++ b/drivers/mfd/s5m-core.c
@@ -0,0 +1,176 @@
+/*
+ * s5m87xx.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/mutex.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/s5m87xx/s5m-core.h>
+#include <linux/mfd/s5m87xx/s5m-pmic.h>
+#include <linux/mfd/s5m87xx/s5m-rtc.h>
+#include <linux/regmap.h>
+
+static struct mfd_cell s5m87xx_devs[] = {
+	{
+		.name = "s5m8767-pmic",
+	}, {
+		.name = "s5m-rtc",
+	},
+};
+
+int s5m_reg_read(struct s5m87xx_dev *s5m87xx, u8 reg, void *dest)
+{
+	return regmap_read(s5m87xx->regmap, reg, dest);
+}
+EXPORT_SYMBOL_GPL(s5m_reg_read);
+
+int s5m_bulk_read(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf)
+{
+	return regmap_bulk_read(s5m87xx->regmap, reg, buf, count);;
+}
+EXPORT_SYMBOL_GPL(s5m_bulk_read);
+
+int s5m_reg_write(struct s5m87xx_dev *s5m87xx, u8 reg, u8 value)
+{
+	return regmap_write(s5m87xx->regmap, reg, value);
+}
+EXPORT_SYMBOL_GPL(s5m_reg_write);
+
+int s5m_bulk_write(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf)
+{
+	return regmap_raw_write(s5m87xx->regmap, reg, buf, count * sizeof(u16));
+}
+EXPORT_SYMBOL_GPL(s5m_bulk_write);
+
+int s5m_reg_update(struct s5m87xx_dev *s5m87xx, u8 reg, u8 val, u8 mask)
+{
+	return regmap_update_bits(s5m87xx->regmap, reg, mask, val);
+}
+EXPORT_SYMBOL_GPL(s5m_reg_update);
+
+static struct regmap_config s5m_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static int s5m87xx_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct s5m_platform_data *pdata = i2c->dev.platform_data;
+	struct s5m87xx_dev *s5m87xx;
+	int ret = 0;
+	int error;
+
+	s5m87xx = kzalloc(sizeof(struct s5m87xx_dev), GFP_KERNEL);
+	if (s5m87xx == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, s5m87xx);
+	s5m87xx->dev = &i2c->dev;
+	s5m87xx->i2c = i2c;
+	s5m87xx->irq = i2c->irq;
+	s5m87xx->type = id->driver_data;
+
+	if (pdata) {
+		s5m87xx->device_type = pdata->device_type;
+		s5m87xx->ono = pdata->ono;
+		s5m87xx->irq_base = pdata->irq_base;
+		s5m87xx->wakeup = pdata->wakeup;
+	}
+
+	s5m87xx->regmap = regmap_init_i2c(i2c, &s5m_regmap_config);
+	if (IS_ERR(s5m87xx->regmap)) {
+		error = PTR_ERR(s5m87xx->regmap);
+		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+			error);
+		goto err;
+	}
+
+	s5m87xx->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
+	i2c_set_clientdata(s5m87xx->rtc, s5m87xx);
+
+	if (pdata->cfg_pmic_irq)
+		pdata->cfg_pmic_irq();
+
+	s5m_irq_init(s5m87xx);
+
+	pm_runtime_set_active(s5m87xx->dev);
+
+	ret = mfd_add_devices(s5m87xx->dev, -1,
+				s5m87xx_devs, ARRAY_SIZE(s5m87xx_devs),
+				NULL, 0);
+
+	if (ret < 0)
+		goto err;
+
+	return ret;
+
+err:
+	mfd_remove_devices(s5m87xx->dev);
+	s5m_irq_exit(s5m87xx);
+	i2c_unregister_device(s5m87xx->rtc);
+	regmap_exit(s5m87xx->regmap);
+	kfree(s5m87xx);
+	return ret;
+}
+
+static int s5m87xx_i2c_remove(struct i2c_client *i2c)
+{
+	struct s5m87xx_dev *s5m87xx = i2c_get_clientdata(i2c);
+
+	mfd_remove_devices(s5m87xx->dev);
+	s5m_irq_exit(s5m87xx);
+	i2c_unregister_device(s5m87xx->rtc);
+	regmap_exit(s5m87xx->regmap);
+	kfree(s5m87xx);
+	return 0;
+}
+
+static const struct i2c_device_id s5m87xx_i2c_id[] = {
+	{ "s5m87xx", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, s5m87xx_i2c_id);
+
+static struct i2c_driver s5m87xx_i2c_driver = {
+	.driver = {
+		   .name = "s5m87xx",
+		   .owner = THIS_MODULE,
+	},
+	.probe = s5m87xx_i2c_probe,
+	.remove = s5m87xx_i2c_remove,
+	.id_table = s5m87xx_i2c_id,
+};
+
+static int __init s5m87xx_i2c_init(void)
+{
+	return i2c_add_driver(&s5m87xx_i2c_driver);
+}
+
+subsys_initcall(s5m87xx_i2c_init);
+
+static void __exit s5m87xx_i2c_exit(void)
+{
+	i2c_del_driver(&s5m87xx_i2c_driver);
+}
+module_exit(s5m87xx_i2c_exit);
+
+MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");
+MODULE_DESCRIPTION("Core support for the S5M MFD");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/s5m87xx/s5m-core.h b/include/linux/mfd/s5m87xx/s5m-core.h
new file mode 100644
index 000000000000..a7480b57f92d
--- /dev/null
+++ b/include/linux/mfd/s5m87xx/s5m-core.h
@@ -0,0 +1,373 @@
+/*
+ * s5m-core.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MFD_S5M_CORE_H
+#define __LINUX_MFD_S5M_CORE_H
+
+#define NUM_IRQ_REGS	4
+
+enum s5m_device_type {
+	S5M8751X,
+	S5M8763X,
+	S5M8767X,
+};
+
+/* S5M8767 registers */
+enum s5m8767_reg {
+	S5M8767_REG_ID,
+	S5M8767_REG_INT1,
+	S5M8767_REG_INT2,
+	S5M8767_REG_INT3,
+	S5M8767_REG_INT1M,
+	S5M8767_REG_INT2M,
+	S5M8767_REG_INT3M,
+	S5M8767_REG_STATUS1,
+	S5M8767_REG_STATUS2,
+	S5M8767_REG_STATUS3,
+	S5M8767_REG_CTRL1,
+	S5M8767_REG_CTRL2,
+	S5M8767_REG_LOWBAT1,
+	S5M8767_REG_LOWBAT2,
+	S5M8767_REG_BUCHG,
+	S5M8767_REG_DVSRAMP,
+	S5M8767_REG_DVSTIMER2 = 0x10,
+	S5M8767_REG_DVSTIMER3,
+	S5M8767_REG_DVSTIMER4,
+	S5M8767_REG_LDO1,
+	S5M8767_REG_LDO2,
+	S5M8767_REG_LDO3,
+	S5M8767_REG_LDO4,
+	S5M8767_REG_LDO5,
+	S5M8767_REG_LDO6,
+	S5M8767_REG_LDO7,
+	S5M8767_REG_LDO8,
+	S5M8767_REG_LDO9,
+	S5M8767_REG_LDO10,
+	S5M8767_REG_LDO11,
+	S5M8767_REG_LDO12,
+	S5M8767_REG_LDO13,
+	S5M8767_REG_LDO14 = 0x20,
+	S5M8767_REG_LDO15,
+	S5M8767_REG_LDO16,
+	S5M8767_REG_LDO17,
+	S5M8767_REG_LDO18,
+	S5M8767_REG_LDO19,
+	S5M8767_REG_LDO20,
+	S5M8767_REG_LDO21,
+	S5M8767_REG_LDO22,
+	S5M8767_REG_LDO23,
+	S5M8767_REG_LDO24,
+	S5M8767_REG_LDO25,
+	S5M8767_REG_LDO26,
+	S5M8767_REG_LDO27,
+	S5M8767_REG_LDO28,
+	S5M8767_REG_UVLO = 0x31,
+	S5M8767_REG_BUCK1CTRL1,
+	S5M8767_REG_BUCK1CTRL2,
+	S5M8767_REG_BUCK2CTRL,
+	S5M8767_REG_BUCK2DVS1,
+	S5M8767_REG_BUCK2DVS2,
+	S5M8767_REG_BUCK2DVS3,
+	S5M8767_REG_BUCK2DVS4,
+	S5M8767_REG_BUCK2DVS5,
+	S5M8767_REG_BUCK2DVS6,
+	S5M8767_REG_BUCK2DVS7,
+	S5M8767_REG_BUCK2DVS8,
+	S5M8767_REG_BUCK3CTRL,
+	S5M8767_REG_BUCK3DVS1,
+	S5M8767_REG_BUCK3DVS2,
+	S5M8767_REG_BUCK3DVS3,
+	S5M8767_REG_BUCK3DVS4,
+	S5M8767_REG_BUCK3DVS5,
+	S5M8767_REG_BUCK3DVS6,
+	S5M8767_REG_BUCK3DVS7,
+	S5M8767_REG_BUCK3DVS8,
+	S5M8767_REG_BUCK4CTRL,
+	S5M8767_REG_BUCK4DVS1,
+	S5M8767_REG_BUCK4DVS2,
+	S5M8767_REG_BUCK4DVS3,
+	S5M8767_REG_BUCK4DVS4,
+	S5M8767_REG_BUCK4DVS5,
+	S5M8767_REG_BUCK4DVS6,
+	S5M8767_REG_BUCK4DVS7,
+	S5M8767_REG_BUCK4DVS8,
+	S5M8767_REG_BUCK5CTRL1,
+	S5M8767_REG_BUCK5CTRL2,
+	S5M8767_REG_BUCK5CTRL3,
+	S5M8767_REG_BUCK5CTRL4,
+	S5M8767_REG_BUCK5CTRL5,
+	S5M8767_REG_BUCK6CTRL1,
+	S5M8767_REG_BUCK6CTRL2,
+	S5M8767_REG_BUCK7CTRL1,
+	S5M8767_REG_BUCK7CTRL2,
+	S5M8767_REG_BUCK8CTRL1,
+	S5M8767_REG_BUCK8CTRL2,
+	S5M8767_REG_BUCK9CTRL1,
+	S5M8767_REG_BUCK9CTRL2,
+	S5M8767_REG_LDO1CTRL,
+	S5M8767_REG_LDO2_1CTRL,
+	S5M8767_REG_LDO2_2CTRL,
+	S5M8767_REG_LDO2_3CTRL,
+	S5M8767_REG_LDO2_4CTRL,
+	S5M8767_REG_LDO3CTRL,
+	S5M8767_REG_LDO4CTRL,
+	S5M8767_REG_LDO5CTRL,
+	S5M8767_REG_LDO6CTRL,
+	S5M8767_REG_LDO7CTRL,
+	S5M8767_REG_LDO8CTRL,
+	S5M8767_REG_LDO9CTRL,
+	S5M8767_REG_LDO10CTRL,
+	S5M8767_REG_LDO11CTRL,
+	S5M8767_REG_LDO12CTRL,
+	S5M8767_REG_LDO13CTRL,
+	S5M8767_REG_LDO14CTRL,
+	S5M8767_REG_LDO15CTRL,
+	S5M8767_REG_LDO16CTRL,
+	S5M8767_REG_LDO17CTRL,
+	S5M8767_REG_LDO18CTRL,
+	S5M8767_REG_LDO19CTRL,
+	S5M8767_REG_LDO20CTRL,
+	S5M8767_REG_LDO21CTRL,
+	S5M8767_REG_LDO22CTRL,
+	S5M8767_REG_LDO23CTRL,
+	S5M8767_REG_LDO24CTRL,
+	S5M8767_REG_LDO25CTRL,
+	S5M8767_REG_LDO26CTRL,
+	S5M8767_REG_LDO27CTRL,
+	S5M8767_REG_LDO28CTRL,
+};
+
+/* S5M8763 registers */
+enum s5m8763_reg {
+	S5M8763_REG_IRQ1,
+	S5M8763_REG_IRQ2,
+	S5M8763_REG_IRQ3,
+	S5M8763_REG_IRQ4,
+	S5M8763_REG_IRQM1,
+	S5M8763_REG_IRQM2,
+	S5M8763_REG_IRQM3,
+	S5M8763_REG_IRQM4,
+	S5M8763_REG_STATUS1,
+	S5M8763_REG_STATUS2,
+	S5M8763_REG_STATUSM1,
+	S5M8763_REG_STATUSM2,
+	S5M8763_REG_CHGR1,
+	S5M8763_REG_CHGR2,
+	S5M8763_REG_LDO_ACTIVE_DISCHARGE1,
+	S5M8763_REG_LDO_ACTIVE_DISCHARGE2,
+	S5M8763_REG_BUCK_ACTIVE_DISCHARGE3,
+	S5M8763_REG_ONOFF1,
+	S5M8763_REG_ONOFF2,
+	S5M8763_REG_ONOFF3,
+	S5M8763_REG_ONOFF4,
+	S5M8763_REG_BUCK1_VOLTAGE1,
+	S5M8763_REG_BUCK1_VOLTAGE2,
+	S5M8763_REG_BUCK1_VOLTAGE3,
+	S5M8763_REG_BUCK1_VOLTAGE4,
+	S5M8763_REG_BUCK2_VOLTAGE1,
+	S5M8763_REG_BUCK2_VOLTAGE2,
+	S5M8763_REG_BUCK3,
+	S5M8763_REG_BUCK4,
+	S5M8763_REG_LDO1_LDO2,
+	S5M8763_REG_LDO3,
+	S5M8763_REG_LDO4,
+	S5M8763_REG_LDO5,
+	S5M8763_REG_LDO6,
+	S5M8763_REG_LDO7,
+	S5M8763_REG_LDO7_LDO8,
+	S5M8763_REG_LDO9_LDO10,
+	S5M8763_REG_LDO11,
+	S5M8763_REG_LDO12,
+	S5M8763_REG_LDO13,
+	S5M8763_REG_LDO14,
+	S5M8763_REG_LDO15,
+	S5M8763_REG_LDO16,
+	S5M8763_REG_BKCHR,
+	S5M8763_REG_LBCNFG1,
+	S5M8763_REG_LBCNFG2,
+};
+
+enum s5m8767_irq {
+	S5M8767_IRQ_PWRR,
+	S5M8767_IRQ_PWRF,
+	S5M8767_IRQ_PWR1S,
+	S5M8767_IRQ_JIGR,
+	S5M8767_IRQ_JIGF,
+	S5M8767_IRQ_LOWBAT2,
+	S5M8767_IRQ_LOWBAT1,
+
+	S5M8767_IRQ_MRB,
+	S5M8767_IRQ_DVSOK2,
+	S5M8767_IRQ_DVSOK3,
+	S5M8767_IRQ_DVSOK4,
+
+	S5M8767_IRQ_RTC60S,
+	S5M8767_IRQ_RTCA1,
+	S5M8767_IRQ_RTCA2,
+	S5M8767_IRQ_SMPL,
+	S5M8767_IRQ_RTC1S,
+	S5M8767_IRQ_WTSR,
+
+	S5M8767_IRQ_NR,
+};
+
+#define S5M8767_IRQ_PWRR_MASK		(1 << 0)
+#define S5M8767_IRQ_PWRF_MASK		(1 << 1)
+#define S5M8767_IRQ_PWR1S_MASK		(1 << 3)
+#define S5M8767_IRQ_JIGR_MASK		(1 << 4)
+#define S5M8767_IRQ_JIGF_MASK		(1 << 5)
+#define S5M8767_IRQ_LOWBAT2_MASK	(1 << 6)
+#define S5M8767_IRQ_LOWBAT1_MASK	(1 << 7)
+
+#define S5M8767_IRQ_MRB_MASK		(1 << 2)
+#define S5M8767_IRQ_DVSOK2_MASK		(1 << 3)
+#define S5M8767_IRQ_DVSOK3_MASK		(1 << 4)
+#define S5M8767_IRQ_DVSOK4_MASK		(1 << 5)
+
+#define S5M8767_IRQ_RTC60S_MASK		(1 << 0)
+#define S5M8767_IRQ_RTCA1_MASK		(1 << 1)
+#define S5M8767_IRQ_RTCA2_MASK		(1 << 2)
+#define S5M8767_IRQ_SMPL_MASK		(1 << 3)
+#define S5M8767_IRQ_RTC1S_MASK		(1 << 4)
+#define S5M8767_IRQ_WTSR_MASK		(1 << 5)
+
+enum s5m8763_irq {
+	S5M8763_IRQ_DCINF,
+	S5M8763_IRQ_DCINR,
+	S5M8763_IRQ_JIGF,
+	S5M8763_IRQ_JIGR,
+	S5M8763_IRQ_PWRONF,
+	S5M8763_IRQ_PWRONR,
+
+	S5M8763_IRQ_WTSREVNT,
+	S5M8763_IRQ_SMPLEVNT,
+	S5M8763_IRQ_ALARM1,
+	S5M8763_IRQ_ALARM0,
+
+	S5M8763_IRQ_ONKEY1S,
+	S5M8763_IRQ_TOPOFFR,
+	S5M8763_IRQ_DCINOVPR,
+	S5M8763_IRQ_CHGRSTF,
+	S5M8763_IRQ_DONER,
+	S5M8763_IRQ_CHGFAULT,
+
+	S5M8763_IRQ_LOBAT1,
+	S5M8763_IRQ_LOBAT2,
+
+	S5M8763_IRQ_NR,
+};
+
+#define S5M8763_IRQ_DCINF_MASK		(1 << 2)
+#define S5M8763_IRQ_DCINR_MASK		(1 << 3)
+#define S5M8763_IRQ_JIGF_MASK		(1 << 4)
+#define S5M8763_IRQ_JIGR_MASK		(1 << 5)
+#define S5M8763_IRQ_PWRONF_MASK		(1 << 6)
+#define S5M8763_IRQ_PWRONR_MASK		(1 << 7)
+
+#define S5M8763_IRQ_WTSREVNT_MASK	(1 << 0)
+#define S5M8763_IRQ_SMPLEVNT_MASK	(1 << 1)
+#define S5M8763_IRQ_ALARM1_MASK		(1 << 2)
+#define S5M8763_IRQ_ALARM0_MASK		(1 << 3)
+
+#define S5M8763_IRQ_ONKEY1S_MASK	(1 << 0)
+#define S5M8763_IRQ_TOPOFFR_MASK	(1 << 2)
+#define S5M8763_IRQ_DCINOVPR_MASK	(1 << 3)
+#define S5M8763_IRQ_CHGRSTF_MASK	(1 << 4)
+#define S5M8763_IRQ_DONER_MASK		(1 << 5)
+#define S5M8763_IRQ_CHGFAULT_MASK	(1 << 7)
+
+#define S5M8763_IRQ_LOBAT1_MASK		(1 << 0)
+#define S5M8763_IRQ_LOBAT2_MASK		(1 << 1)
+
+#define S5M8763_ENRAMP                  (1 << 4)
+
+/**
+ * struct s5m87xx_dev - s5m87xx master device for sub-drivers
+ * @dev: master device of the chip (can be used to access platform data)
+ * @i2c: i2c client private data for regulator
+ * @rtc: i2c client private data for rtc
+ * @iolock: mutex for serializing io access
+ * @irqlock: mutex for buslock
+ * @irq_base: base IRQ number for s5m87xx, required for IRQs
+ * @irq: generic IRQ number for s5m87xx
+ * @ono: power onoff IRQ number for s5m87xx
+ * @irq_masks_cur: currently active value
+ * @irq_masks_cache: cached hardware value
+ * @type: indicate which s5m87xx "variant" is used
+ */
+struct s5m87xx_dev {
+	struct device *dev;
+	struct regmap *regmap;
+	struct i2c_client *i2c;
+	struct i2c_client *rtc;
+	struct mutex iolock;
+	struct mutex irqlock;
+
+	int device_type;
+	int irq_base;
+	int irq;
+	int ono;
+	u8 irq_masks_cur[NUM_IRQ_REGS];
+	u8 irq_masks_cache[NUM_IRQ_REGS];
+	int type;
+	bool wakeup;
+};
+
+int s5m_irq_init(struct s5m87xx_dev *s5m87xx);
+void s5m_irq_exit(struct s5m87xx_dev *s5m87xx);
+int s5m_irq_resume(struct s5m87xx_dev *s5m87xx);
+
+extern int s5m_reg_read(struct s5m87xx_dev *s5m87xx, u8 reg, void *dest);
+extern int s5m_bulk_read(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf);
+extern int s5m_reg_write(struct s5m87xx_dev *s5m87xx, u8 reg, u8 value);
+extern int s5m_bulk_write(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf);
+extern int s5m_reg_update(struct s5m87xx_dev *s5m87xx, u8 reg, u8 val, u8 mask);
+
+struct s5m_platform_data {
+	struct s5m_regulator_data	*regulators;
+	int				device_type;
+	int				num_regulators;
+
+	int				irq_base;
+	int 				(*cfg_pmic_irq)(void);
+
+	int				ono;
+	bool				wakeup;
+	bool				buck_voltage_lock;
+
+	int				buck_gpios[3];
+	int				buck2_voltage[8];
+	bool				buck2_gpiodvs;
+	int				buck3_voltage[8];
+	bool				buck3_gpiodvs;
+	int				buck4_voltage[8];
+	bool				buck4_gpiodvs;
+
+	int				buck_set1;
+	int				buck_set2;
+	int				buck_set3;
+	int				buck2_enable;
+	int				buck3_enable;
+	int				buck4_enable;
+	int				buck_default_idx;
+	int				buck2_default_idx;
+	int				buck3_default_idx;
+	int				buck4_default_idx;
+
+	int                             buck_ramp_delay;
+	bool                            buck2_ramp_enable;
+	bool                            buck3_ramp_enable;
+	bool                            buck4_ramp_enable;
+};
+
+#endif /*  __LINUX_MFD_S5M_CORE_H */
diff --git a/include/linux/mfd/s5m87xx/s5m-pmic.h b/include/linux/mfd/s5m87xx/s5m-pmic.h
new file mode 100644
index 000000000000..a72a5d27e62e
--- /dev/null
+++ b/include/linux/mfd/s5m87xx/s5m-pmic.h
@@ -0,0 +1,100 @@
+/* s5m87xx.h
+ *
+ * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef __LINUX_MFD_S5M_PMIC_H
+#define __LINUX_MFD_S5M_PMIC_H
+
+#include <linux/regulator/machine.h>
+
+/* S5M8767 regulator ids */
+enum s5m8767_regulators {
+	S5M8767_LDO1,
+	S5M8767_LDO2,
+	S5M8767_LDO3,
+	S5M8767_LDO4,
+	S5M8767_LDO5,
+	S5M8767_LDO6,
+	S5M8767_LDO7,
+	S5M8767_LDO8,
+	S5M8767_LDO9,
+	S5M8767_LDO10,
+	S5M8767_LDO11,
+	S5M8767_LDO12,
+	S5M8767_LDO13,
+	S5M8767_LDO14,
+	S5M8767_LDO15,
+	S5M8767_LDO16,
+	S5M8767_LDO17,
+	S5M8767_LDO18,
+	S5M8767_LDO19,
+	S5M8767_LDO20,
+	S5M8767_LDO21,
+	S5M8767_LDO22,
+	S5M8767_LDO23,
+	S5M8767_LDO24,
+	S5M8767_LDO25,
+	S5M8767_LDO26,
+	S5M8767_LDO27,
+	S5M8767_LDO28,
+	S5M8767_BUCK1,
+	S5M8767_BUCK2,
+	S5M8767_BUCK3,
+	S5M8767_BUCK4,
+	S5M8767_BUCK5,
+	S5M8767_BUCK6,
+	S5M8767_BUCK7,
+	S5M8767_BUCK8,
+	S5M8767_BUCK9,
+	S5M8767_AP_EN32KHZ,
+	S5M8767_CP_EN32KHZ,
+
+	S5M8767_REG_MAX,
+};
+
+/* S5M8763 regulator ids */
+enum s5m8763_regulators {
+	S5M8763_LDO1,
+	S5M8763_LDO2,
+	S5M8763_LDO3,
+	S5M8763_LDO4,
+	S5M8763_LDO5,
+	S5M8763_LDO6,
+	S5M8763_LDO7,
+	S5M8763_LDO8,
+	S5M8763_LDO9,
+	S5M8763_LDO10,
+	S5M8763_LDO11,
+	S5M8763_LDO12,
+	S5M8763_LDO13,
+	S5M8763_LDO14,
+	S5M8763_LDO15,
+	S5M8763_LDO16,
+	S5M8763_BUCK1,
+	S5M8763_BUCK2,
+	S5M8763_BUCK3,
+	S5M8763_BUCK4,
+	S5M8763_AP_EN32KHZ,
+	S5M8763_CP_EN32KHZ,
+	S5M8763_ENCHGVI,
+	S5M8763_ESAFEUSB1,
+	S5M8763_ESAFEUSB2,
+};
+
+/**
+ * s5m87xx_regulator_data - regulator data
+ * @id: regulator id
+ * @initdata: regulator init data (contraints, supplies, ...)
+ */
+struct s5m_regulator_data {
+	int				id;
+	struct regulator_init_data	*initdata;
+};
+
+#endif /*  __LINUX_MFD_S5M_PMIC_H */
diff --git a/include/linux/mfd/s5m87xx/s5m-rtc.h b/include/linux/mfd/s5m87xx/s5m-rtc.h
new file mode 100644
index 000000000000..6ce8da264cec
--- /dev/null
+++ b/include/linux/mfd/s5m87xx/s5m-rtc.h
@@ -0,0 +1,84 @@
+/*
+ * s5m-rtc.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MFD_S5M_RTC_H
+#define __LINUX_MFD_S5M_RTC_H
+
+enum s5m87xx_rtc_reg {
+	S5M87XX_RTC_SEC,
+	S5M87XX_RTC_MIN,
+	S5M87XX_RTC_HOUR,
+	S5M87XX_RTC_WEEKDAY,
+	S5M87XX_RTC_DATE,
+	S5M87XX_RTC_MONTH,
+	S5M87XX_RTC_YEAR1,
+	S5M87XX_RTC_YEAR2,
+	S5M87XX_ALARM0_SEC,
+	S5M87XX_ALARM0_MIN,
+	S5M87XX_ALARM0_HOUR,
+	S5M87XX_ALARM0_WEEKDAY,
+	S5M87XX_ALARM0_DATE,
+	S5M87XX_ALARM0_MONTH,
+	S5M87XX_ALARM0_YEAR1,
+	S5M87XX_ALARM0_YEAR2,
+	S5M87XX_ALARM1_SEC,
+	S5M87XX_ALARM1_MIN,
+	S5M87XX_ALARM1_HOUR,
+	S5M87XX_ALARM1_WEEKDAY,
+	S5M87XX_ALARM1_DATE,
+	S5M87XX_ALARM1_MONTH,
+	S5M87XX_ALARM1_YEAR1,
+	S5M87XX_ALARM1_YEAR2,
+	S5M87XX_ALARM0_CONF,
+	S5M87XX_ALARM1_CONF,
+	S5M87XX_RTC_STATUS,
+	S5M87XX_WTSR_SMPL_CNTL,
+	S5M87XX_RTC_UDR_CON,
+};
+
+#define RTC_I2C_ADDR		(0x0C >> 1)
+
+#define HOUR_12			(1 << 7)
+#define HOUR_AMPM		(1 << 6)
+#define HOUR_PM			(1 << 5)
+#define ALARM0_STATUS		(1 << 1)
+#define ALARM1_STATUS		(1 << 2)
+#define UPDATE_AD		(1 << 0)
+
+/* RTC Control Register */
+#define BCD_EN_SHIFT		0
+#define BCD_EN_MASK		(1 << BCD_EN_SHIFT)
+#define MODEL24_SHIFT		1
+#define MODEL24_MASK		(1 << MODEL24_SHIFT)
+/* RTC Update Register1 */
+#define RTC_UDR_SHIFT		0
+#define RTC_UDR_MASK		(1 << RTC_UDR_SHIFT)
+/* RTC Hour register */
+#define HOUR_PM_SHIFT		6
+#define HOUR_PM_MASK		(1 << HOUR_PM_SHIFT)
+/* RTC Alarm Enable */
+#define ALARM_ENABLE_SHIFT	7
+#define ALARM_ENABLE_MASK	(1 << ALARM_ENABLE_SHIFT)
+
+enum {
+	RTC_SEC = 0,
+	RTC_MIN,
+	RTC_HOUR,
+	RTC_WEEKDAY,
+	RTC_DATE,
+	RTC_MONTH,
+	RTC_YEAR1,
+	RTC_YEAR2,
+};
+
+#endif /*  __LINUX_MFD_S5M_RTC_H */
-- 
cgit v1.2.3


From ba74e80ebaf8209cb553eb2195b26302270cfa42 Mon Sep 17 00:00:00 2001
From: Kevin Liu <kliu5@marvell.com>
Date: Wed, 4 Jan 2012 15:14:24 +0800
Subject: mfd: Add pm ops to max8925

Signed-off-by: Kevin Liu <kliu5@marvell.com>
Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/max8925-i2c.c   | 27 +++++++++++++++++++++++++++
 include/linux/mfd/max8925.h |  2 ++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c
index 0219115e00c7..d9e4b36edee9 100644
--- a/drivers/mfd/max8925-i2c.c
+++ b/drivers/mfd/max8925-i2c.c
@@ -161,6 +161,8 @@ static int __devinit max8925_probe(struct i2c_client *client,
 	chip->adc = i2c_new_dummy(chip->i2c->adapter, ADC_I2C_ADDR);
 	i2c_set_clientdata(chip->adc, chip);
 
+	device_init_wakeup(&client->dev, 1);
+
 	max8925_device_init(chip, pdata);
 
 	return 0;
@@ -177,10 +179,35 @@ static int __devexit max8925_remove(struct i2c_client *client)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int max8925_suspend(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct max8925_chip *chip = i2c_get_clientdata(client);
+
+	if (device_may_wakeup(dev) && chip->wakeup_flag)
+		enable_irq_wake(chip->core_irq);
+	return 0;
+}
+
+static int max8925_resume(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct max8925_chip *chip = i2c_get_clientdata(client);
+
+	if (device_may_wakeup(dev) && chip->wakeup_flag)
+		disable_irq_wake(chip->core_irq);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(max8925_pm_ops, max8925_suspend, max8925_resume);
+
 static struct i2c_driver max8925_driver = {
 	.driver	= {
 		.name	= "max8925",
 		.owner	= THIS_MODULE,
+		.pm     = &max8925_pm_ops,
 	},
 	.probe		= max8925_probe,
 	.remove		= __devexit_p(max8925_remove),
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 5259dfe8c585..daaba00f0bc5 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -206,6 +206,8 @@ struct max8925_chip {
 	int			irq_base;
 	int			core_irq;
 	int			tsc_irq;
+
+	unsigned int            wakeup_flag;
 };
 
 struct max8925_backlight_pdata {
-- 
cgit v1.2.3


From 17ab594fa5f6c766a3daf304d2e6b643f863c24c Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Fri, 18 Nov 2011 11:10:10 -0800
Subject: ahci: platform support for suspend/resume

Add platform hooks for custom suspend() and resume() functions. The
generic suspend/resume code in drivers/ata/ahci_platform.c is adapted
from the PCI version in drivers/ata/ahci.c.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci_platform.c   | 68 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/ahci_platform.h |  2 ++
 2 files changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 43b875810d1b..48be4e189163 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -202,6 +202,71 @@ static int __devexit ahci_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int ahci_suspend(struct device *dev)
+{
+	struct ahci_platform_data *pdata = dev_get_platdata(dev);
+	struct ata_host *host = dev_get_drvdata(dev);
+	struct ahci_host_priv *hpriv = host->private_data;
+	void __iomem *mmio = hpriv->mmio;
+	u32 ctl;
+	int rc;
+
+	if (hpriv->flags & AHCI_HFLAG_NO_SUSPEND) {
+		dev_err(dev, "firmware update required for suspend/resume\n");
+		return -EIO;
+	}
+
+	/*
+	 * AHCI spec rev1.1 section 8.3.3:
+	 * Software must disable interrupts prior to requesting a
+	 * transition of the HBA to D3 state.
+	 */
+	ctl = readl(mmio + HOST_CTL);
+	ctl &= ~HOST_IRQ_EN;
+	writel(ctl, mmio + HOST_CTL);
+	readl(mmio + HOST_CTL); /* flush */
+
+	rc = ata_host_suspend(host, PMSG_SUSPEND);
+	if (rc)
+		return rc;
+
+	if (pdata && pdata->suspend)
+		return pdata->suspend(dev);
+	return 0;
+}
+
+static int ahci_resume(struct device *dev)
+{
+	struct ahci_platform_data *pdata = dev_get_platdata(dev);
+	struct ata_host *host = dev_get_drvdata(dev);
+	int rc;
+
+	if (pdata && pdata->resume) {
+		rc = pdata->resume(dev);
+		if (rc)
+			return rc;
+	}
+
+	if (dev->power.power_state.event == PM_EVENT_SUSPEND) {
+		rc = ahci_reset_controller(host);
+		if (rc)
+			return rc;
+
+		ahci_init_controller(host);
+	}
+
+	ata_host_resume(host);
+
+	return 0;
+}
+
+static struct dev_pm_ops ahci_pm_ops = {
+	.suspend		= &ahci_suspend,
+	.resume			= &ahci_resume,
+};
+#endif
+
 static const struct of_device_id ahci_of_match[] = {
 	{ .compatible = "calxeda,hb-ahci", },
 	{},
@@ -214,6 +279,9 @@ static struct platform_driver ahci_driver = {
 		.name = "ahci",
 		.owner = THIS_MODULE,
 		.of_match_table = ahci_of_match,
+#ifdef CONFIG_PM
+		.pm = &ahci_pm_ops,
+#endif
 	},
 	.id_table	= ahci_devtype,
 };
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index be3d9a77d6ed..73a25005d88a 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -23,6 +23,8 @@ struct ata_port_info;
 struct ahci_platform_data {
 	int (*init)(struct device *dev, void __iomem *addr);
 	void (*exit)(struct device *dev);
+	int (*suspend)(struct device *dev);
+	int (*resume)(struct device *dev);
 	const struct ata_port_info *ata_port_info;
 	unsigned int force_port_map;
 	unsigned int mask_port_map;
-- 
cgit v1.2.3


From f78a26f55b2438c439609fc90b473f7f08f5b697 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 14 Dec 2011 01:01:05 +0900
Subject: sh: pfc: Variable bitfield width config register support

Add support for variable config reg hardware by adding
the macro PINMUX_CFG_REG_VAR(). The width of each bitfield
needs to be passed to the macro, and the correct space must
be consumed by each bitfield in the enum table following the
macro. Data registers still need to have fixed bitfields.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/pfc.c       | 44 +++++++++++++++++++++++++++++++++-----------
 include/linux/sh_pfc.h |  9 ++++++++-
 2 files changed, 41 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/pfc.c b/drivers/sh/pfc.c
index 5481d19518f9..f975f4a33439 100644
--- a/drivers/sh/pfc.c
+++ b/drivers/sh/pfc.c
@@ -174,10 +174,19 @@ static void config_reg_helper(struct pinmux_info *gpioc,
 			      unsigned long *maskp,
 			      unsigned long *posp)
 {
+	int k;
+
 	*mapped_regp = pfc_phys_to_virt(gpioc, crp->reg);
 
-	*maskp = (1 << crp->field_width) - 1;
-	*posp = crp->reg_width - ((in_pos + 1) * crp->field_width);
+	if (crp->field_width) {
+		*maskp = (1 << crp->field_width) - 1;
+		*posp = crp->reg_width - ((in_pos + 1) * crp->field_width);
+	} else {
+		*maskp = (1 << crp->var_field_width[in_pos]) - 1;
+		*posp = crp->reg_width;
+		for (k = 0; k <= in_pos; k++)
+			*posp -= crp->var_field_width[k];
+	}
 }
 
 static int read_config_reg(struct pinmux_info *gpioc,
@@ -303,8 +312,8 @@ static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id,
 			  unsigned long **cntp)
 {
 	struct pinmux_cfg_reg *config_reg;
-	unsigned long r_width, f_width;
-	int k, n;
+	unsigned long r_width, f_width, curr_width, ncomb;
+	int k, m, n, pos, bit_pos;
 
 	k = 0;
 	while (1) {
@@ -315,14 +324,27 @@ static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id,
 
 		if (!r_width)
 			break;
-		for (n = 0; n < (r_width / f_width) * (1 << f_width); n++) {
-			if (config_reg->enum_ids[n] == enum_id) {
-				*crp = config_reg;
-				*fieldp = n / (1 << f_width);
-				*valuep = n % (1 << f_width);
-				*cntp = &config_reg->cnt[n / (1 << f_width)];
-				return 0;
+
+		pos = 0;
+		m = 0;
+		for (bit_pos = 0; bit_pos < r_width; bit_pos += curr_width) {
+			if (f_width)
+				curr_width = f_width;
+			else
+				curr_width = config_reg->var_field_width[m];
+
+			ncomb = 1 << curr_width;
+			for (n = 0; n < ncomb; n++) {
+				if (config_reg->enum_ids[pos + n] == enum_id) {
+					*crp = config_reg;
+					*fieldp = m;
+					*valuep = n;
+					*cntp = &config_reg->cnt[m];
+					return 0;
+				}
 			}
+			pos += ncomb;
+			m++;
 		}
 		k++;
 	}
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 91666a58529d..84538c42d64a 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -45,12 +45,19 @@ struct pinmux_cfg_reg {
 	unsigned long reg, reg_width, field_width;
 	unsigned long *cnt;
 	pinmux_enum_t *enum_ids;
+	unsigned long *var_field_width;
 };
 
 #define PINMUX_CFG_REG(name, r, r_width, f_width) \
 	.reg = r, .reg_width = r_width, .field_width = f_width,		\
 	.cnt = (unsigned long [r_width / f_width]) {}, \
-	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)]) \
+	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)])
+
+#define PINMUX_CFG_REG_VAR(name, r, r_width, var_fw0, var_fwn...) \
+	.reg = r, .reg_width = r_width,	\
+	.cnt = (unsigned long [r_width]) {}, \
+	.var_field_width = (unsigned long [r_width]) { var_fw0, var_fwn, 0 }, \
+	.enum_ids = (pinmux_enum_t [])
 
 struct pinmux_data_reg {
 	unsigned long reg, reg_width, reg_shadow;
-- 
cgit v1.2.3


From e499ada829cf769ac6f16627cd9f09b855a7fd6d Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 14 Dec 2011 01:01:14 +0900
Subject: sh: pfc: Unlock register support

Add PFC support for a 32-bit unlock register. Needed to
drive the r8a7779 PFC that comes with a funky PMMR register.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/pfc.c       | 22 ++++++++++------------
 include/linux/sh_pfc.h |  2 ++
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/pfc.c b/drivers/sh/pfc.c
index f975f4a33439..522c6c46d1be 100644
--- a/drivers/sh/pfc.c
+++ b/drivers/sh/pfc.c
@@ -210,7 +210,7 @@ static void write_config_reg(struct pinmux_info *gpioc,
 			     unsigned long field, unsigned long value)
 {
 	void __iomem *mapped_reg;
-	unsigned long mask, pos;
+	unsigned long mask, pos, data;
 
 	config_reg_helper(gpioc, crp, field, &mapped_reg, &mask, &pos);
 
@@ -221,17 +221,15 @@ static void write_config_reg(struct pinmux_info *gpioc,
 	mask = ~(mask << pos);
 	value = value << pos;
 
-	switch (crp->reg_width) {
-	case 8:
-		iowrite8((ioread8(mapped_reg) & mask) | value, mapped_reg);
-		break;
-	case 16:
-		iowrite16((ioread16(mapped_reg) & mask) | value, mapped_reg);
-		break;
-	case 32:
-		iowrite32((ioread32(mapped_reg) & mask) | value, mapped_reg);
-		break;
-	}
+	data = gpio_read_raw_reg(mapped_reg, crp->reg_width);
+	data &= mask;
+	data |= value;
+
+	if (gpioc->unlock_reg)
+		gpio_write_raw_reg(pfc_phys_to_virt(gpioc, gpioc->unlock_reg),
+				   32, ~data);
+
+	gpio_write_raw_reg(mapped_reg, crp->reg_width, data);
 }
 
 static int setup_data_reg(struct pinmux_info *gpioc, unsigned gpio)
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 84538c42d64a..5c15aed9c4b2 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -116,6 +116,8 @@ struct pinmux_info {
 	unsigned int num_resources;
 	struct pfc_window *window;
 
+	unsigned long unlock_reg;
+
 	struct gpio_chip chip;
 };
 
-- 
cgit v1.2.3


From 0048278552e9752fd578c3d8deee756987c10873 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 22 Dec 2011 14:52:21 +0100
Subject: jbd: Remove j_barrier mutex

j_barrier mutex is used for serializing different journal lock operations.  The
problem with it is that e.g. FIFREEZE ioctl results in process leaving kernel
with j_barrier mutex held which makes lockdep freak out. Also hibernation code
wants to freeze filesystem but it cannot do so because it then cannot hibernate
the system because of mutex being locked.

So we remove j_barrier mutex and use direct wait on j_barrier_count instead.
Since locking journal is a rare operation we don't have to care about fairness
or such things.

CC: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd/journal.c     |  1 -
 fs/jbd/transaction.c | 38 ++++++++++++++++++++++----------------
 include/linux/jbd.h  |  4 ----
 3 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index fea8dd661d2b..1656dc2e6a08 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -721,7 +721,6 @@ static journal_t * journal_init_common (void)
 	init_waitqueue_head(&journal->j_wait_checkpoint);
 	init_waitqueue_head(&journal->j_wait_commit);
 	init_waitqueue_head(&journal->j_wait_updates);
-	mutex_init(&journal->j_barrier);
 	mutex_init(&journal->j_checkpoint_mutex);
 	spin_lock_init(&journal->j_revoke_lock);
 	spin_lock_init(&journal->j_list_lock);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 7e59c6e66f9b..7fce94b04bc3 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -426,17 +426,34 @@ int journal_restart(handle_t *handle, int nblocks)
  * void journal_lock_updates () - establish a transaction barrier.
  * @journal:  Journal to establish a barrier on.
  *
- * This locks out any further updates from being started, and blocks
- * until all existing updates have completed, returning only once the
- * journal is in a quiescent state with no updates running.
- *
- * The journal lock should not be held on entry.
+ * This locks out any further updates from being started, and blocks until all
+ * existing updates have completed, returning only once the journal is in a
+ * quiescent state with no updates running.
+ *
+ * We do not use simple mutex for synchronization as there are syscalls which
+ * want to return with filesystem locked and that trips up lockdep. Also
+ * hibernate needs to lock filesystem but locked mutex then blocks hibernation.
+ * Since locking filesystem is rare operation, we use simple counter and
+ * waitqueue for locking.
  */
 void journal_lock_updates(journal_t *journal)
 {
 	DEFINE_WAIT(wait);
 
+wait:
+	/* Wait for previous locked operation to finish */
+	wait_event(journal->j_wait_transaction_locked,
+		   journal->j_barrier_count == 0);
+
 	spin_lock(&journal->j_state_lock);
+	/*
+	 * Check reliably under the lock whether we are the ones winning the race
+	 * and locking the journal
+	 */
+	if (journal->j_barrier_count > 0) {
+		spin_unlock(&journal->j_state_lock);
+		goto wait;
+	}
 	++journal->j_barrier_count;
 
 	/* Wait until there are no running updates */
@@ -460,14 +477,6 @@ void journal_lock_updates(journal_t *journal)
 		spin_lock(&journal->j_state_lock);
 	}
 	spin_unlock(&journal->j_state_lock);
-
-	/*
-	 * We have now established a barrier against other normal updates, but
-	 * we also need to barrier against other journal_lock_updates() calls
-	 * to make sure that we serialise special journal-locked operations
-	 * too.
-	 */
-	mutex_lock(&journal->j_barrier);
 }
 
 /**
@@ -475,14 +484,11 @@ void journal_lock_updates(journal_t *journal)
  * @journal:  Journal to release the barrier on.
  *
  * Release a transaction barrier obtained with journal_lock_updates().
- *
- * Should be called without the journal lock held.
  */
 void journal_unlock_updates (journal_t *journal)
 {
 	J_ASSERT(journal->j_barrier_count != 0);
 
-	mutex_unlock(&journal->j_barrier);
 	spin_lock(&journal->j_state_lock);
 	--journal->j_barrier_count;
 	spin_unlock(&journal->j_state_lock);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 492cc12244fd..d211732b9e99 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -497,7 +497,6 @@ struct transaction_s
  * @j_format_version: Version of the superblock format
  * @j_state_lock: Protect the various scalars in the journal
  * @j_barrier_count:  Number of processes waiting to create a barrier lock
- * @j_barrier: The barrier lock itself
  * @j_running_transaction: The current running transaction..
  * @j_committing_transaction: the transaction we are pushing to disk
  * @j_checkpoint_transactions: a linked circular list of all transactions
@@ -580,9 +579,6 @@ struct journal_s
 	 */
 	int			j_barrier_count;
 
-	/* The barrier lock itself */
-	struct mutex		j_barrier;
-
 	/*
 	 * Transactions: The current running transaction...
 	 * [j_state_lock] [caller holding open handle]
-- 
cgit v1.2.3


From c1257b4798d48b73ad1a9ca359504cd49caefa0d Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Wed, 2 Nov 2011 13:34:42 -0700
Subject: mtd: nand: add Macronix manufacturer

Macronix is produing SLC NAND MX30LF1208AA, so add their manufacturer
code to the manufacturer lists.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_ids.c | 1 +
 include/linux/mtd/nand.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 00cf1b0d6053..56c688dafe92 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -176,6 +176,7 @@ struct nand_manufacturers nand_manuf_ids[] = {
 	{NAND_MFR_HYNIX, "Hynix"},
 	{NAND_MFR_MICRON, "Micron"},
 	{NAND_MFR_AMD, "AMD"},
+	{NAND_MFR_MACRONIX, "Macronix"},
 	{0x0, "Unknown"}
 };
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 904131bab501..63b5a8b6dfbd 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -555,6 +555,7 @@ struct nand_chip {
 #define NAND_MFR_HYNIX		0xad
 #define NAND_MFR_MICRON		0x2c
 #define NAND_MFR_AMD		0x01
+#define NAND_MFR_MACRONIX	0xc2
 
 /**
  * struct nand_flash_dev - NAND Flash Device ID Structure
-- 
cgit v1.2.3


From 8e987465a137d4824710e02550f06aa891c9b865 Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Mon, 14 Nov 2011 18:44:34 -0600
Subject: mtd: cfi: Allow per-mapping CFI device endianness

This patch allows each CFI device map to use its own endianness. The
globally defined CFI endianness (CONFIG_MTD_CFI_NOSWAP,
CONFIG_MTD_CFI_BE_BYTE_SWAP or CONFIG_MTD_CFI_LE_BYTE_SWAP) becomes the
default value which can be overridden by a driver for a particular device.

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0020.c |  5 ++-
 include/linux/mtd/cfi.h             | 16 ++++----
 include/linux/mtd/cfi_endian.h      | 76 ++++++++++++++-----------------------
 include/linux/mtd/map.h             |  1 +
 4 files changed, 41 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 179814a95f3a..666c52f8bf8d 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -139,8 +139,9 @@ struct mtd_info *cfi_cmdset_0020(struct map_info *map, int primary)
 		}
 
 		/* Do some byteswapping if necessary */
-		extp->FeatureSupport = cfi32_to_cpu(extp->FeatureSupport);
-		extp->BlkStatusRegMask = cfi32_to_cpu(extp->BlkStatusRegMask);
+		extp->FeatureSupport = cfi32_to_cpu(map, extp->FeatureSupport);
+		extp->BlkStatusRegMask = cfi32_to_cpu(map,
+						extp->BlkStatusRegMask);
 
 #ifdef DEBUG_CFI_FEATURES
 		/* Tell the user about it in lots of lovely detail */
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index d24925492972..d5d2ec6494bb 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -354,10 +354,10 @@ static inline map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cf
 		onecmd = cmd;
 		break;
 	case 2:
-		onecmd = cpu_to_cfi16(cmd);
+		onecmd = cpu_to_cfi16(map, cmd);
 		break;
 	case 4:
-		onecmd = cpu_to_cfi32(cmd);
+		onecmd = cpu_to_cfi32(map, cmd);
 		break;
 	}
 
@@ -437,10 +437,10 @@ static inline unsigned long cfi_merge_status(map_word val, struct map_info *map,
 	case 1:
 		break;
 	case 2:
-		res = cfi16_to_cpu(res);
+		res = cfi16_to_cpu(map, res);
 		break;
 	case 4:
-		res = cfi32_to_cpu(res);
+		res = cfi32_to_cpu(map, res);
 		break;
 	default: BUG();
 	}
@@ -480,12 +480,12 @@ static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr)
 	if (map_bankwidth_is_1(map)) {
 		return val.x[0];
 	} else if (map_bankwidth_is_2(map)) {
-		return cfi16_to_cpu(val.x[0]);
+		return cfi16_to_cpu(map, val.x[0]);
 	} else {
 		/* No point in a 64-bit byteswap since that would just be
 		   swapping the responses from different chips, and we are
 		   only interested in one chip (a representative sample) */
-		return cfi32_to_cpu(val.x[0]);
+		return cfi32_to_cpu(map, val.x[0]);
 	}
 }
 
@@ -496,12 +496,12 @@ static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr)
 	if (map_bankwidth_is_1(map)) {
 		return val.x[0] & 0xff;
 	} else if (map_bankwidth_is_2(map)) {
-		return cfi16_to_cpu(val.x[0]);
+		return cfi16_to_cpu(map, val.x[0]);
 	} else {
 		/* No point in a 64-bit byteswap since that would just be
 		   swapping the responses from different chips, and we are
 		   only interested in one chip (a representative sample) */
-		return cfi32_to_cpu(val.x[0]);
+		return cfi32_to_cpu(map, val.x[0]);
 	}
 }
 
diff --git a/include/linux/mtd/cfi_endian.h b/include/linux/mtd/cfi_endian.h
index 51cc3f5917a8..b97a625071f8 100644
--- a/include/linux/mtd/cfi_endian.h
+++ b/include/linux/mtd/cfi_endian.h
@@ -19,53 +19,35 @@
 
 #include <asm/byteorder.h>
 
-#ifndef CONFIG_MTD_CFI_ADV_OPTIONS
-
-#define CFI_HOST_ENDIAN
-
-#else
-
-#ifdef CONFIG_MTD_CFI_NOSWAP
-#define CFI_HOST_ENDIAN
-#endif
-
-#ifdef CONFIG_MTD_CFI_LE_BYTE_SWAP
-#define CFI_LITTLE_ENDIAN
-#endif
-
-#ifdef CONFIG_MTD_CFI_BE_BYTE_SWAP
-#define CFI_BIG_ENDIAN
-#endif
-
-#endif
-
-#if defined(CFI_LITTLE_ENDIAN)
-#define cpu_to_cfi8(x) (x)
-#define cfi8_to_cpu(x) (x)
-#define cpu_to_cfi16(x) cpu_to_le16(x)
-#define cpu_to_cfi32(x) cpu_to_le32(x)
-#define cpu_to_cfi64(x) cpu_to_le64(x)
-#define cfi16_to_cpu(x) le16_to_cpu(x)
-#define cfi32_to_cpu(x) le32_to_cpu(x)
-#define cfi64_to_cpu(x) le64_to_cpu(x)
-#elif defined (CFI_BIG_ENDIAN)
-#define cpu_to_cfi8(x) (x)
-#define cfi8_to_cpu(x) (x)
-#define cpu_to_cfi16(x) cpu_to_be16(x)
-#define cpu_to_cfi32(x) cpu_to_be32(x)
-#define cpu_to_cfi64(x) cpu_to_be64(x)
-#define cfi16_to_cpu(x) be16_to_cpu(x)
-#define cfi32_to_cpu(x) be32_to_cpu(x)
-#define cfi64_to_cpu(x) be64_to_cpu(x)
-#elif defined (CFI_HOST_ENDIAN)
-#define cpu_to_cfi8(x) (x)
-#define cfi8_to_cpu(x) (x)
-#define cpu_to_cfi16(x) (x)
-#define cpu_to_cfi32(x) (x)
-#define cpu_to_cfi64(x) (x)
-#define cfi16_to_cpu(x) (x)
-#define cfi32_to_cpu(x) (x)
-#define cfi64_to_cpu(x) (x)
+#define CFI_HOST_ENDIAN 1
+#define CFI_LITTLE_ENDIAN 2
+#define CFI_BIG_ENDIAN 3
+
+#if !defined(CONFIG_MTD_CFI_ADV_OPTIONS) || defined(CONFIG_MTD_CFI_NOSWAP)
+#define CFI_DEFAULT_ENDIAN CFI_HOST_ENDIAN
+#elif defined(CONFIG_MTD_CFI_LE_BYTE_SWAP)
+#define CFI_DEFAULT_ENDIAN CFI_LITTLE_ENDIAN
+#elif defined(CONFIG_MTD_CFI_BE_BYTE_SWAP)
+#define CFI_DEFAULT_ENDIAN CFI_BIG_ENDIAN
 #else
 #error No CFI endianness defined
 #endif
+
+#define cfi_default(s) ((s)?:CFI_DEFAULT_ENDIAN)
+#define cfi_be(s) (cfi_default(s) == CFI_BIG_ENDIAN)
+#define cfi_le(s) (cfi_default(s) == CFI_LITTLE_ENDIAN)
+#define cfi_host(s) (cfi_default(s) == CFI_HOST_ENDIAN)
+
+#define cpu_to_cfi8(map, x) (x)
+#define cfi8_to_cpu(map, x) (x)
+#define cpu_to_cfi16(map, x) _cpu_to_cfi(16, (map)->swap, (x))
+#define cpu_to_cfi32(map, x) _cpu_to_cfi(32, (map)->swap, (x))
+#define cpu_to_cfi64(map, x) _cpu_to_cfi(64, (map)->swap, (x))
+#define cfi16_to_cpu(map, x) _cfi_to_cpu(16, (map)->swap, (x))
+#define cfi32_to_cpu(map, x) _cfi_to_cpu(32, (map)->swap, (x))
+#define cfi64_to_cpu(map, x) _cfi_to_cpu(64, (map)->swap, (x))
+
+#define _cpu_to_cfi(w, s, x) (cfi_host(s)?(x):_swap_to_cfi(w, s, x))
+#define _cfi_to_cpu(w, s, x) (cfi_host(s)?(x):_swap_to_cpu(w, s, x))
+#define _swap_to_cfi(w, s, x) (cfi_be(s)?cpu_to_be##w(x):cpu_to_le##w(x))
+#define _swap_to_cpu(w, s, x) (cfi_be(s)?be##w##_to_cpu(x):le##w##_to_cpu(x))
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index a9e6ba46865e..1132410f14c6 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -214,6 +214,7 @@ struct map_info {
 	void __iomem *virt;
 	void *cached;
 
+	int swap; /* this mapping's byte-swapping requirement */
 	int bankwidth; /* in octets. This isn't necessarily the width
 		       of actual bus cycles -- it's the repeat interval
 		      in bytes, before you are talking to the first chip again.
-- 
cgit v1.2.3


From 529688fed64a7759323cbd170754c61aad0dd48b Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Mon, 5 Dec 2011 16:08:09 +0100
Subject: mtd: maps: physmap: allow partition parsers for physmap_flash_data

Arch setup code might want to use their own partition parsers, but still
use the generic physmap flash driver.

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Acked-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap.c  | 5 ++++-
 include/linux/mtd/physmap.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 66e8200079c2..1f749d58ae6f 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -85,6 +85,7 @@ static int physmap_flash_probe(struct platform_device *dev)
 	struct physmap_flash_data *physmap_data;
 	struct physmap_flash_info *info;
 	const char **probe_type;
+	const char **part_types;
 	int err = 0;
 	int i;
 	int devices_found = 0;
@@ -171,7 +172,9 @@ static int physmap_flash_probe(struct platform_device *dev)
 	if (err)
 		goto err_out;
 
-	mtd_device_parse_register(info->cmtd, part_probe_types, 0,
+	part_types = physmap_data->part_probe_types ? : part_probe_types;
+
+	mtd_device_parse_register(info->cmtd, part_types, 0,
 				  physmap_data->parts, physmap_data->nr_parts);
 	return 0;
 
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index 04e018160e2b..d2887e76b7f6 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -30,6 +30,7 @@ struct physmap_flash_data {
 	unsigned int		pfow_base;
 	char                    *probe_type;
 	struct mtd_partition	*parts;
+	const char		**part_probe_types;
 };
 
 #endif /* __LINUX_MTD_PHYSMAP__ */
-- 
cgit v1.2.3


From 4a42243886b87cd28a39b192161767c2af851a55 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 18:28:01 +0200
Subject: mtd: map.h: fix arm cross-build failure

This patch fixes the following build failure:
In file included from include/linux/mtd/qinfo.h:4:0,
                 from include/linux/mtd/pfow.h:7,
                 from drivers/mtd/lpddr/lpddr_cmds.c:27:
include/linux/mtd/map.h: In function 'inline_map_read':
include/linux/mtd/map.h:409:3: error: implicit declaration of function 'BUILD_BUG_ON' [-Werror=implicit-function-declaration]

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 1132410f14c6..94e924e2ecd5 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -26,7 +26,7 @@
 #include <linux/list.h>
 #include <linux/string.h>
 #include <linux/bug.h>
-
+#include <linux/kernel.h>
 
 #include <asm/unaligned.h>
 #include <asm/system.h>
-- 
cgit v1.2.3


From 7e1f0dc0551b99acb5e8fa161a7ac401994d57d8 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 15:25:39 +0200
Subject: mtd: introduce mtd_erase interface

This patch is part of a patch-set which changes the MTD interface
from 'mtd->func()' form to 'mtd_func()' form. We need this because
we want to add common code to to all drivers in the mtd core level,
which is impossible with the current interface when MTD clients
call driver functions like 'read()' or 'write()' directly.

At this point we just introduce a new inline wrapper function, but
later some of them are expected to gain more code. E.g., the input
parameters check should be moved to the wrappers rather than be
duplicated at many drivers.

This particular patch introduced the 'mtd_erase()' interface. The
following patches add all the other interfaces one by one.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/ftl.c                   |  2 +-
 drivers/mtd/inftlmount.c            |  4 ++--
 drivers/mtd/mtdblock.c              |  2 +-
 drivers/mtd/mtdchar.c               |  2 +-
 drivers/mtd/mtdconcat.c             |  2 +-
 drivers/mtd/mtdoops.c               |  2 +-
 drivers/mtd/mtdpart.c               |  2 +-
 drivers/mtd/mtdswap.c               |  2 +-
 drivers/mtd/nftlmount.c             |  2 +-
 drivers/mtd/rfd_ftl.c               |  2 +-
 drivers/mtd/sm_ftl.c                |  2 +-
 drivers/mtd/tests/mtd_oobtest.c     |  2 +-
 drivers/mtd/tests/mtd_pagetest.c    |  2 +-
 drivers/mtd/tests/mtd_speedtest.c   |  4 ++--
 drivers/mtd/tests/mtd_stresstest.c  |  2 +-
 drivers/mtd/tests/mtd_subpagetest.c |  2 +-
 drivers/mtd/tests/mtd_torturetest.c |  2 +-
 drivers/mtd/ubi/io.c                |  2 +-
 drivers/staging/spectra/lld_mtd.c   |  2 +-
 fs/jffs2/erase.c                    |  2 +-
 fs/logfs/dev_mtd.c                  |  2 +-
 include/linux/mtd/mtd.h             | 19 ++++++++++++++-----
 22 files changed, 37 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index c7382bb686c6..a982889277c8 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -355,7 +355,7 @@ static int erase_xfer(partition_t *part,
     erase->len = 1 << part->header.EraseUnitSize;
     erase->priv = (u_long)part;
 
-    ret = part->mbd.mtd->erase(part->mbd.mtd, erase);
+    ret = mtd_erase(part->mbd.mtd, erase);
 
     if (!ret)
 	    xfer->EraseCount++;
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 2ff601f816ce..0d946f10a682 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -220,7 +220,7 @@ static int find_boot_record(struct INFTLrecord *inftl)
 				 */
 				instr->addr = ip->Reserved0 * inftl->EraseSize;
 				instr->len = inftl->EraseSize;
-				mtd->erase(mtd, instr);
+				mtd_erase(mtd, instr);
 			}
 			if ((ip->lastUnit - ip->firstUnit + 1) < ip->virtualUnits) {
 				printk(KERN_WARNING "INFTL: Media Header "
@@ -393,7 +393,7 @@ int INFTL_formatblock(struct INFTLrecord *inftl, int block)
 	   mark only the failed block in the bbt. */
 	for (physblock = 0; physblock < inftl->EraseSize;
 	     physblock += instr->len, instr->addr += instr->len) {
-		mtd->erase(inftl->mbd.mtd, instr);
+		mtd_erase(inftl->mbd.mtd, instr);
 
 		if (instr->state == MTD_ERASE_FAILED) {
 			printk(KERN_WARNING "INFTL: error while formatting block %d\n",
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 7c1dc908a174..9b01cb0266e4 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -85,7 +85,7 @@ static int erase_write (struct mtd_info *mtd, unsigned long pos,
 	set_current_state(TASK_INTERRUPTIBLE);
 	add_wait_queue(&wait_q, &wait);
 
-	ret = mtd->erase(mtd, &erase);
+	ret = mtd_erase(mtd, &erase);
 	if (ret) {
 		set_current_state(TASK_RUNNING);
 		remove_wait_queue(&wait_q, &wait);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 00423cc85807..41d64ff4c252 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -731,7 +731,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			  wq_head is no longer there when the
 			  callback routine tries to wake us up.
 			*/
-			ret = mtd->erase(mtd, erase);
+			ret = mtd_erase(mtd, erase);
 			if (!ret) {
 				set_current_state(TASK_UNINTERRUPTIBLE);
 				add_wait_queue(&waitq, &wait);
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 6df4d4d4eb92..76123bd49314 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -379,7 +379,7 @@ static int concat_dev_erase(struct mtd_info *mtd, struct erase_info *erase)
 	 * FIXME: Allow INTERRUPTIBLE. Which means
 	 * not having the wait_queue head on the stack.
 	 */
-	err = mtd->erase(mtd, erase);
+	err = mtd_erase(mtd, erase);
 	if (!err) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&waitq, &wait);
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index f3cdce9a85a6..9b2d86323169 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -112,7 +112,7 @@ static int mtdoops_erase_block(struct mtdoops_context *cxt, int offset)
 	set_current_state(TASK_INTERRUPTIBLE);
 	add_wait_queue(&wait_q, &wait);
 
-	ret = mtd->erase(mtd, &erase);
+	ret = mtd_erase(mtd, &erase);
 	if (ret) {
 		set_current_state(TASK_RUNNING);
 		remove_wait_queue(&wait_q, &wait);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index a0bd2de4752b..d318fee28595 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -257,7 +257,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
 	if (instr->addr >= mtd->size)
 		return -EINVAL;
 	instr->addr += part->offset;
-	ret = part->master->erase(part->master, instr);
+	ret = mtd_erase(part->master, instr);
 	if (ret) {
 		if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 			instr->fail_addr -= part->offset;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index bd9590c723e4..4e12875a916c 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -567,7 +567,7 @@ retry:
 	erase.len	= mtd->erasesize;
 	erase.priv	= (u_long)&wq;
 
-	ret = mtd->erase(mtd, &erase);
+	ret = mtd_erase(mtd, &erase);
 	if (ret) {
 		if (retries++ < MTDSWAP_ERASE_RETRIES) {
 			dev_warn(d->dev,
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index ac4092591aea..9164a56fb5c0 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -326,7 +326,7 @@ int NFTL_formatblock(struct NFTLrecord *nftl, int block)
 	instr->mtd = nftl->mbd.mtd;
 	instr->addr = block * nftl->EraseSize;
 	instr->len = nftl->EraseSize;
-	mtd->erase(mtd, instr);
+	mtd_erase(mtd, instr);
 
 	if (instr->state == MTD_ERASE_FAILED) {
 		printk("Error while formatting block %d\n", block);
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index 73ae217a4252..39de8727a524 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -342,7 +342,7 @@ static int erase_block(struct partition *part, int block)
 	part->blocks[block].state = BLOCK_ERASING;
 	part->blocks[block].free_sectors = 0;
 
-	rc = part->mbd.mtd->erase(part->mbd.mtd, erase);
+	rc = mtd_erase(part->mbd.mtd, erase);
 
 	if (rc) {
 		printk(KERN_ERR PREFIX "erase of region %llx,%llx on '%s' "
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 1c9f307ae0a1..2f1acb1ab5e8 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -479,7 +479,7 @@ static int sm_erase_block(struct sm_ftl *ftl, int zone_num, uint16_t block,
 		return -EIO;
 	}
 
-	if (mtd->erase(mtd, &erase)) {
+	if (mtd_erase(mtd, &erase)) {
 		sm_printk("erase of block %d in zone %d failed",
 							block, zone_num);
 		goto error;
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index 933f7e5f32d3..7d52854c16dd 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -78,7 +78,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index afafb6935fd0..271819fabb55 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -77,7 +77,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 493b367bdd35..f67a65e21043 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -79,7 +79,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
@@ -105,7 +105,7 @@ static int multiblock_erase(int ebnum, int blocks)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize * blocks;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d, blocks %d\n",
 		       err, ebnum, blocks);
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index 811642fea6b4..a204a9f90524 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -112,7 +112,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (unlikely(err)) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index 1a05bfac4eee..16d0c05024d7 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -80,7 +80,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index 03ab649a6964..102c79b7ac66 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -105,7 +105,7 @@ static inline int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index f20b6f22f240..b6c8959e6c7e 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -361,7 +361,7 @@ retry:
 	ei.callback = erase_callback;
 	ei.priv     = (unsigned long)&wq;
 
-	err = ubi->mtd->erase(ubi->mtd, &ei);
+	err = mtd_erase(ubi->mtd, &ei);
 	if (err) {
 		if (retries++ < UBI_IO_RETRIES) {
 			dbg_io("error %d while erasing PEB %d, retry",
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index a9c309a167c2..d638fafab649 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -188,7 +188,7 @@ u16 mtd_Erase_Block(u32 block_add)
 	erase.len = spectra_mtd->erasesize;
 	erase.priv = (unsigned long)&comp;
 
-	ret = spectra_mtd->erase(spectra_mtd, &erase);
+	ret = mtd_erase(spectra_mtd, &erase);
 	if (!ret) {
 		wait_for_completion(&comp);
 		if (erase.state != MTD_ERASE_DONE)
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index e513f1913c15..540e8eca1b49 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -74,7 +74,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
 	((struct erase_priv_struct *)instr->priv)->jeb = jeb;
 	((struct erase_priv_struct *)instr->priv)->c = c;
 
-	ret = c->mtd->erase(c->mtd, instr);
+	ret = mtd_erase(c->mtd, instr);
 	if (!ret)
 		return;
 
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index eb423ebcf538..046362894352 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -105,7 +105,7 @@ static int logfs_mtd_erase(struct super_block *sb, loff_t ofs, size_t len,
 	ei.len = len;
 	ei.callback = logfs_erase_callback;
 	ei.priv = (long)&complete;
-	ret = mtd->erase(mtd, &ei);
+	ret = mtd_erase(mtd, &ei);
 	if (ret)
 		return -EIO;
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 9f5b312af783..201bad557047 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -171,11 +171,8 @@ struct mtd_info {
 	struct mtd_erase_region_info *eraseregions;
 
 	/*
-	 * Erase is an asynchronous operation.  Device drivers are supposed
-	 * to call instr->callback() whenever the operation completes, even
-	 * if it completes with a failure.
-	 * Callers are supposed to pass a callback function and wait for it
-	 * to be called before writing to the block.
+	 * Do not call via these pointers, use corresponding mtd_*()
+	 * wrappers instead.
 	 */
 	int (*erase) (struct mtd_info *mtd, struct erase_info *instr);
 
@@ -274,6 +271,18 @@ struct mtd_info {
 	void (*put_device) (struct mtd_info *mtd);
 };
 
+/*
+ * Erase is an asynchronous operation.  Device drivers are supposed
+ * to call instr->callback() whenever the operation completes, even
+ * if it completes with a failure.
+ * Callers are supposed to pass a callback function and wait for it
+ * to be called before writing to the block.
+ */
+static inline int mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	return mtd->erase(mtd, instr);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From d35ea200c0fb5315f16fb2599a4bafd9c1a7b386 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:00:37 +0200
Subject: mtd: introduce mtd_point interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdpart.c   |  4 ++--
 fs/jffs2/erase.c        |  4 ++--
 fs/jffs2/readinode.c    |  4 ++--
 fs/jffs2/scan.c         |  4 ++--
 include/linux/mtd/mtd.h | 14 ++++++++++----
 5 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index d318fee28595..5b664722e5b0 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -89,8 +89,8 @@ static int part_point(struct mtd_info *mtd, loff_t from, size_t len,
 		len = 0;
 	else if (from + len > mtd->size)
 		len = mtd->size - from;
-	return part->master->point (part->master, from + part->offset,
-				    len, retlen, virt, phys);
+	return mtd_point(part->master, from + part->offset, len, retlen,
+			 virt, phys);
 }
 
 static void part_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 540e8eca1b49..53f8794fda6a 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -340,8 +340,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 	if (c->mtd->point) {
 		unsigned long *wordebuf;
 
-		ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size,
-				    &retlen, &ebuf, NULL);
+		ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen,
+				&ebuf, NULL);
 		if (ret) {
 			D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
 			goto do_flash_read;
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index ee57bac1ba6d..dde61effeda2 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -63,8 +63,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 	/* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
 	 * adding and jffs2_flash_read_end() interface. */
 	if (c->mtd->point) {
-		err = c->mtd->point(c->mtd, ofs, len, &retlen,
-				    (void **)&buffer, NULL);
+		err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer,
+				NULL);
 		if (!err && retlen < len) {
 			JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
 			c->mtd->unpoint(c->mtd, ofs, retlen);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 28107ca136e4..53e05c8e5b69 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -97,8 +97,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 	size_t pointlen, try_size;
 
 	if (c->mtd->point) {
-		ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen,
-				    (void **)&flashbuf, NULL);
+		ret = mtd_point(c->mtd, 0, c->mtd->size, &pointlen,
+				(void **)&flashbuf, NULL);
 		if (!ret && pointlen < c->mtd->size) {
 			/* Don't muck about if it won't let us point to the whole flash */
 			D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 201bad557047..ca7bfdaf7a6f 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -175,11 +175,8 @@ struct mtd_info {
 	 * wrappers instead.
 	 */
 	int (*erase) (struct mtd_info *mtd, struct erase_info *instr);
-
-	/* This stuff for eXecute-In-Place */
-	/* phys is optional and may be set to NULL */
 	int (*point) (struct mtd_info *mtd, loff_t from, size_t len,
-			size_t *retlen, void **virt, resource_size_t *phys);
+		      size_t *retlen, void **virt, resource_size_t *phys);
 
 	/* We probably shouldn't allow XIP if the unpoint isn't a NULL */
 	void (*unpoint) (struct mtd_info *mtd, loff_t from, size_t len);
@@ -283,6 +280,15 @@ static inline int mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
 	return mtd->erase(mtd, instr);
 }
 
+/*
+ * This stuff for eXecute-In-Place. phys is optional and may be set to NULL.
+ */
+static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len,
+			    size_t *retlen, void **virt, resource_size_t *phys)
+{
+	return mtd->point(mtd, from, len, retlen, virt, phys);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 7219778ad9c18cc2c05c7fca0abe026afbc19dfb Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:05:52 +0200
Subject: mtd: introduce mtd_unpoint interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdpart.c   | 2 +-
 fs/jffs2/erase.c        | 4 ++--
 fs/jffs2/readinode.c    | 6 +++---
 fs/jffs2/scan.c         | 4 ++--
 include/linux/mtd/mtd.h | 8 ++++++--
 5 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 5b664722e5b0..b09624a5497c 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -97,7 +97,7 @@ static void part_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 {
 	struct mtd_part *part = PART(mtd);
 
-	part->master->unpoint(part->master, from + part->offset, len);
+	mtd_unpoint(part->master, from + part->offset, len);
 }
 
 static unsigned long part_get_unmapped_area(struct mtd_info *mtd,
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 53f8794fda6a..ffdf4fca9c54 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -349,7 +349,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 		if (retlen < c->sector_size) {
 			/* Don't muck about if it won't let us point to the whole erase sector */
 			D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen));
-			c->mtd->unpoint(c->mtd, jeb->offset, retlen);
+			mtd_unpoint(c->mtd, jeb->offset, retlen);
 			goto do_flash_read;
 		}
 		wordebuf = ebuf-sizeof(*wordebuf);
@@ -358,7 +358,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 		   if (*++wordebuf != ~0)
 			   break;
 		} while(--retlen);
-		c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size);
+		mtd_unpoint(c->mtd, jeb->offset, c->sector_size);
 		if (retlen) {
 			printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
 			       *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf));
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index dde61effeda2..fca2f84e1add 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -67,7 +67,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 				NULL);
 		if (!err && retlen < len) {
 			JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
-			c->mtd->unpoint(c->mtd, ofs, retlen);
+			mtd_unpoint(c->mtd, ofs, retlen);
 		} else if (err)
 			JFFS2_WARNING("MTD point failed: error code %d.\n", err);
 		else
@@ -101,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 		kfree(buffer);
 #ifndef __ECOS
 	else
-		c->mtd->unpoint(c->mtd, ofs, len);
+		mtd_unpoint(c->mtd, ofs, len);
 #endif
 
 	if (crc != tn->data_crc) {
@@ -137,7 +137,7 @@ free_out:
 		kfree(buffer);
 #ifndef __ECOS
 	else
-		c->mtd->unpoint(c->mtd, ofs, len);
+		mtd_unpoint(c->mtd, ofs, len);
 #endif
 	return err;
 }
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 53e05c8e5b69..72f3960f44a9 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -102,7 +102,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 		if (!ret && pointlen < c->mtd->size) {
 			/* Don't muck about if it won't let us point to the whole flash */
 			D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
-			c->mtd->unpoint(c->mtd, 0, pointlen);
+			mtd_unpoint(c->mtd, 0, pointlen);
 			flashbuf = NULL;
 		}
 		if (ret)
@@ -273,7 +273,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 		kfree(flashbuf);
 #ifndef __ECOS
 	else
-		c->mtd->unpoint(c->mtd, 0, c->mtd->size);
+		mtd_unpoint(c->mtd, 0, c->mtd->size);
 #endif
 	kfree(s);
 	return ret;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index ca7bfdaf7a6f..a7d22b7fcb4c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -177,8 +177,6 @@ struct mtd_info {
 	int (*erase) (struct mtd_info *mtd, struct erase_info *instr);
 	int (*point) (struct mtd_info *mtd, loff_t from, size_t len,
 		      size_t *retlen, void **virt, resource_size_t *phys);
-
-	/* We probably shouldn't allow XIP if the unpoint isn't a NULL */
 	void (*unpoint) (struct mtd_info *mtd, loff_t from, size_t len);
 
 	/* Allow NOMMU mmap() to directly map the device (if not NULL)
@@ -289,6 +287,12 @@ static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len,
 	return mtd->point(mtd, from, len, retlen, virt, phys);
 }
 
+/* We probably shouldn't allow XIP if the unpoint isn't a NULL */
+static inline void mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
+{
+	return mtd->unpoint(mtd, from, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 04c601bfa4cb29c968dcb66e44c799c9c01d8675 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:10:15 +0200
Subject: mtd: introduce mtd_get_unmapped_area interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  2 +-
 drivers/mtd/mtdconcat.c |  4 ++--
 drivers/mtd/mtdpart.c   |  3 +--
 fs/romfs/mmap-nommu.c   |  2 +-
 include/linux/mtd/mtd.h | 18 +++++++++++++-----
 5 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 41d64ff4c252..c51f04a00afb 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1135,7 +1135,7 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file,
 		if (offset > mtd->size - len)
 			return (unsigned long) -EINVAL;
 
-		return mtd->get_unmapped_area(mtd, len, offset, flags);
+		return mtd_get_unmapped_area(mtd, len, offset, flags);
 	}
 
 	/* can't map directly */
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 76123bd49314..b3895cf20bb2 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -726,8 +726,8 @@ static unsigned long concat_get_unmapped_area(struct mtd_info *mtd,
 			return (unsigned long) -EINVAL;
 
 		if (subdev->get_unmapped_area)
-			return subdev->get_unmapped_area(subdev, len, offset,
-							 flags);
+			return mtd_get_unmapped_area(subdev, len, offset,
+						     flags);
 
 		break;
 	}
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index b09624a5497c..55a9cb544fc1 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -108,8 +108,7 @@ static unsigned long part_get_unmapped_area(struct mtd_info *mtd,
 	struct mtd_part *part = PART(mtd);
 
 	offset += part->offset;
-	return part->master->get_unmapped_area(part->master, len, offset,
-					       flags);
+	return mtd_get_unmapped_area(part->master, len, offset, flags);
 }
 
 static int part_read_oob(struct mtd_info *mtd, loff_t from,
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index eed99428f104..d5168e8e7dcb 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -53,7 +53,7 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
 		if (offset > mtd->size - len)
 			return (unsigned long) -EINVAL;
 
-		return mtd->get_unmapped_area(mtd, len, offset, flags);
+		return mtd_get_unmapped_area(mtd, len, offset, flags);
 	}
 
 cant_map_directly:
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a7d22b7fcb4c..f38e8276b408 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -178,11 +178,6 @@ struct mtd_info {
 	int (*point) (struct mtd_info *mtd, loff_t from, size_t len,
 		      size_t *retlen, void **virt, resource_size_t *phys);
 	void (*unpoint) (struct mtd_info *mtd, loff_t from, size_t len);
-
-	/* Allow NOMMU mmap() to directly map the device (if not NULL)
-	 * - return the address to which the offset maps
-	 * - return -ENOSYS to indicate refusal to do the mapping
-	 */
 	unsigned long (*get_unmapped_area) (struct mtd_info *mtd,
 					    unsigned long len,
 					    unsigned long offset,
@@ -293,6 +288,19 @@ static inline void mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 	return mtd->unpoint(mtd, from, len);
 }
 
+/*
+ * Allow NOMMU mmap() to directly map the device (if not NULL)
+ * - return the address to which the offset maps
+ * - return -ENOSYS to indicate refusal to do the mapping
+ */
+static inline unsigned long mtd_get_unmapped_area(struct mtd_info *mtd,
+						  unsigned long len,
+						  unsigned long offset,
+						  unsigned long flags)
+{
+	return mtd->get_unmapped_area(mtd, len, offset, flags);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 329ad399a9b3adf52c90637b21ca029fcf7f8795 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:30:16 +0200
Subject: mtd: introduce mtd_read interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/arm/mach-davinci/board-da850-evm.c   |  2 +-
 arch/cris/arch-v32/drivers/axisflashmap.c |  4 +--
 drivers/mtd/afs.c                         |  4 +--
 drivers/mtd/ar7part.c                     | 15 ++++++-----
 drivers/mtd/bcm63xxpart.c                 | 12 ++++-----
 drivers/mtd/ftl.c                         | 41 ++++++++++++++++---------------
 drivers/mtd/inftlcore.c                   | 19 ++++++++------
 drivers/mtd/inftlmount.c                  | 10 ++++----
 drivers/mtd/mtdblock.c                    |  8 +++---
 drivers/mtd/mtdblock_ro.c                 |  2 +-
 drivers/mtd/mtdchar.c                     |  2 +-
 drivers/mtd/mtdconcat.c                   |  2 +-
 drivers/mtd/mtdoops.c                     |  4 +--
 drivers/mtd/mtdpart.c                     |  3 +--
 drivers/mtd/mtdswap.c                     |  4 +--
 drivers/mtd/nand/diskonchip.c             |  4 +--
 drivers/mtd/nand/nand_bbt.c               |  6 ++---
 drivers/mtd/nftlcore.c                    | 17 ++++++++-----
 drivers/mtd/nftlmount.c                   |  6 ++---
 drivers/mtd/redboot.c                     |  4 +--
 drivers/mtd/rfd_ftl.c                     | 24 +++++++++---------
 drivers/mtd/ssfdc.c                       |  6 ++---
 drivers/mtd/tests/mtd_pagetest.c          | 28 ++++++++++-----------
 drivers/mtd/tests/mtd_readtest.c          |  2 +-
 drivers/mtd/tests/mtd_speedtest.c         |  8 +++---
 drivers/mtd/tests/mtd_stresstest.c        |  2 +-
 drivers/mtd/tests/mtd_subpagetest.c       |  8 +++---
 drivers/mtd/tests/mtd_torturetest.c       |  2 +-
 drivers/mtd/ubi/debug.c                   |  2 +-
 drivers/mtd/ubi/io.c                      |  6 ++---
 drivers/staging/spectra/lld_mtd.c         |  8 +++---
 fs/jffs2/erase.c                          |  2 +-
 fs/jffs2/wbuf.c                           |  9 ++++---
 fs/logfs/dev_mtd.c                        |  2 +-
 include/linux/mtd/mtd.h                   |  9 ++++++-
 35 files changed, 152 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 6659a90dbcad..8b079f9d6924 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -127,7 +127,7 @@ static void da850_evm_m25p80_notify_add(struct mtd_info *mtd)
 	size_t retlen;
 
 	if (!strcmp(mtd->name, "MAC-Address")) {
-		mtd->read(mtd, 0, ETH_ALEN, &retlen, mac_addr);
+		mtd_read(mtd, 0, ETH_ALEN, &retlen, mac_addr);
 		if (retlen == ETH_ALEN)
 			pr_info("Read MAC addr from SPI Flash: %pM\n",
 				mac_addr);
diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index a2bde3744622..011bddbf073f 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -413,8 +413,8 @@ static int __init init_axis_flash(void)
 		} while (blockstat && ptable_sector);
 #endif
 		if (ptable_sector) {
-			main_mtd->read(main_mtd, ptable_sector, PAGESIZE,
-				&len, page);
+			mtd_read(main_mtd, ptable_sector, PAGESIZE, &len,
+				 page);
 			ptable_head = &((struct partitiontable *) page)->head;
 		}
 
diff --git a/drivers/mtd/afs.c b/drivers/mtd/afs.c
index 89a02f6f65dc..5a3942bf109c 100644
--- a/drivers/mtd/afs.c
+++ b/drivers/mtd/afs.c
@@ -75,7 +75,7 @@ afs_read_footer(struct mtd_info *mtd, u_int *img_start, u_int *iis_start,
 	size_t sz;
 	int ret;
 
-	ret = mtd->read(mtd, ptr, sizeof(fs), &sz, (u_char *) &fs);
+	ret = mtd_read(mtd, ptr, sizeof(fs), &sz, (u_char *)&fs);
 	if (ret >= 0 && sz != sizeof(fs))
 		ret = -EINVAL;
 
@@ -132,7 +132,7 @@ afs_read_iis(struct mtd_info *mtd, struct image_info_struct *iis, u_int ptr)
 	int ret, i;
 
 	memset(iis, 0, sizeof(*iis));
-	ret = mtd->read(mtd, ptr, sizeof(*iis), &sz, (u_char *) iis);
+	ret = mtd_read(mtd, ptr, sizeof(*iis), &sz, (u_char *)iis);
 	if (ret < 0)
 		goto failed;
 
diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c
index f40ea4547554..945393129952 100644
--- a/drivers/mtd/ar7part.c
+++ b/drivers/mtd/ar7part.c
@@ -73,8 +73,8 @@ static int create_mtd_partitions(struct mtd_info *master,
 
 	do { /* Try 10 blocks starting from master->erasesize */
 		offset = pre_size;
-		master->read(master, offset,
-			     sizeof(header), &len, (uint8_t *)&header);
+		mtd_read(master, offset, sizeof(header), &len,
+			 (uint8_t *)&header);
 		if (!strncmp((char *)&header, "TIENV0.8", 8))
 			ar7_parts[1].offset = pre_size;
 		if (header.checksum == LOADER_MAGIC1)
@@ -95,16 +95,16 @@ static int create_mtd_partitions(struct mtd_info *master,
 	case LOADER_MAGIC1:
 		while (header.length) {
 			offset += sizeof(header) + header.length;
-			master->read(master, offset, sizeof(header),
-				     &len, (uint8_t *)&header);
+			mtd_read(master, offset, sizeof(header), &len,
+				 (uint8_t *)&header);
 		}
 		root_offset = offset + sizeof(header) + 4;
 		break;
 	case LOADER_MAGIC2:
 		while (header.length) {
 			offset += sizeof(header) + header.length;
-			master->read(master, offset, sizeof(header),
-				     &len, (uint8_t *)&header);
+			mtd_read(master, offset, sizeof(header), &len,
+				 (uint8_t *)&header);
 		}
 		root_offset = offset + sizeof(header) + 4 + 0xff;
 		root_offset &= ~(uint32_t)0xff;
@@ -114,8 +114,7 @@ static int create_mtd_partitions(struct mtd_info *master,
 		break;
 	}
 
-	master->read(master, root_offset,
-		sizeof(header), &len, (u8 *)&header);
+	mtd_read(master, root_offset, sizeof(header), &len, (u8 *)&header);
 	if (header.checksum != SQUASHFS_MAGIC) {
 		root_offset += master->erasesize - 1;
 		root_offset &= ~(master->erasesize - 1);
diff --git a/drivers/mtd/bcm63xxpart.c b/drivers/mtd/bcm63xxpart.c
index 9ee8bc426e93..608321ee056e 100644
--- a/drivers/mtd/bcm63xxpart.c
+++ b/drivers/mtd/bcm63xxpart.c
@@ -48,8 +48,8 @@ static int bcm63xx_detect_cfe(struct mtd_info *master)
 	int ret;
 	size_t retlen;
 
-	ret = master->read(master, BCM963XX_CFE_VERSION_OFFSET, 5, &retlen,
-			   (void *)buf);
+	ret = mtd_read(master, BCM963XX_CFE_VERSION_OFFSET, 5, &retlen,
+		       (void *)buf);
 	buf[retlen] = 0;
 
 	if (ret)
@@ -59,8 +59,8 @@ static int bcm63xx_detect_cfe(struct mtd_info *master)
 		return 0;
 
 	/* very old CFE's do not have the cfe-v string, so check for magic */
-	ret = master->read(master, BCM63XX_CFE_MAGIC_OFFSET, 8, &retlen,
-			   (void *)buf);
+	ret = mtd_read(master, BCM63XX_CFE_MAGIC_OFFSET, 8, &retlen,
+		       (void *)buf);
 	buf[retlen] = 0;
 
 	return strncmp("CFE1CFE1", buf, 8);
@@ -95,8 +95,8 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
 		return -ENOMEM;
 
 	/* Get the tag */
-	ret = master->read(master, cfelen, sizeof(struct bcm_tag), &retlen,
-			   (void *)buf);
+	ret = mtd_read(master, cfelen, sizeof(struct bcm_tag), &retlen,
+		       (void *)buf);
 
 	if (retlen != sizeof(struct bcm_tag)) {
 		vfree(buf);
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index a982889277c8..12fd7ebd3fd8 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -168,8 +168,8 @@ static int scan_header(partition_t *part)
 	 (offset + sizeof(header)) < max_offset;
 	 offset += part->mbd.mtd->erasesize ? : 0x2000) {
 
-	err = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(header), &ret,
-			      (unsigned char *)&header);
+	err = mtd_read(part->mbd.mtd, offset, sizeof(header), &ret,
+                       (unsigned char *)&header);
 
 	if (err)
 	    return err;
@@ -224,8 +224,8 @@ static int build_maps(partition_t *part)
     for (i = 0; i < le16_to_cpu(part->header.NumEraseUnits); i++) {
 	offset = ((i + le16_to_cpu(part->header.FirstPhysicalEUN))
 		      << part->header.EraseUnitSize);
-	ret = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(header), &retval,
-			      (unsigned char *)&header);
+	ret = mtd_read(part->mbd.mtd, offset, sizeof(header), &retval,
+                       (unsigned char *)&header);
 
 	if (ret)
 	    goto out_XferInfo;
@@ -289,9 +289,9 @@ static int build_maps(partition_t *part)
 	part->EUNInfo[i].Deleted = 0;
 	offset = part->EUNInfo[i].Offset + le32_to_cpu(header.BAMOffset);
 
-	ret = part->mbd.mtd->read(part->mbd.mtd, offset,
-			      part->BlocksPerUnit * sizeof(uint32_t), &retval,
-			      (unsigned char *)part->bam_cache);
+	ret = mtd_read(part->mbd.mtd, offset,
+                       part->BlocksPerUnit * sizeof(uint32_t), &retval,
+                       (unsigned char *)part->bam_cache);
 
 	if (ret)
 		goto out_bam_cache;
@@ -485,9 +485,9 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
 
 	offset = eun->Offset + le32_to_cpu(part->header.BAMOffset);
 
-	ret = part->mbd.mtd->read(part->mbd.mtd, offset,
-			      part->BlocksPerUnit * sizeof(uint32_t),
-			      &retlen, (u_char *) (part->bam_cache));
+	ret = mtd_read(part->mbd.mtd, offset,
+                       part->BlocksPerUnit * sizeof(uint32_t), &retlen,
+                       (u_char *)(part->bam_cache));
 
 	/* mark the cache bad, in case we get an error later */
 	part->bam_index = 0xffff;
@@ -523,8 +523,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
 	    break;
 	case BLOCK_DATA:
 	case BLOCK_REPLACEMENT:
-	    ret = part->mbd.mtd->read(part->mbd.mtd, src, SECTOR_SIZE,
-                        &retlen, (u_char *) buf);
+	    ret = mtd_read(part->mbd.mtd, src, SECTOR_SIZE, &retlen,
+                           (u_char *)buf);
 	    if (ret) {
 		printk(KERN_WARNING "ftl: Error reading old xfer unit in copy_erase_unit\n");
 		return ret;
@@ -747,10 +747,11 @@ static uint32_t find_free(partition_t *part)
 	/* Invalidate cache */
 	part->bam_index = 0xffff;
 
-	ret = part->mbd.mtd->read(part->mbd.mtd,
-		       part->EUNInfo[eun].Offset + le32_to_cpu(part->header.BAMOffset),
-		       part->BlocksPerUnit * sizeof(uint32_t),
-		       &retlen, (u_char *) (part->bam_cache));
+	ret = mtd_read(part->mbd.mtd,
+                       part->EUNInfo[eun].Offset + le32_to_cpu(part->header.BAMOffset),
+                       part->BlocksPerUnit * sizeof(uint32_t),
+                       &retlen,
+                       (u_char *)(part->bam_cache));
 
 	if (ret) {
 	    printk(KERN_WARNING"ftl: Error reading BAM in find_free\n");
@@ -810,8 +811,8 @@ static int ftl_read(partition_t *part, caddr_t buffer,
 	else {
 	    offset = (part->EUNInfo[log_addr / bsize].Offset
 			  + (log_addr % bsize));
-	    ret = part->mbd.mtd->read(part->mbd.mtd, offset, SECTOR_SIZE,
-			   &retlen, (u_char *) buffer);
+	    ret = mtd_read(part->mbd.mtd, offset, SECTOR_SIZE, &retlen,
+                           (u_char *)buffer);
 
 	    if (ret) {
 		printk(KERN_WARNING "Error reading MTD device in ftl_read()\n");
@@ -849,8 +850,8 @@ static int set_bam_entry(partition_t *part, uint32_t log_addr,
 		  le32_to_cpu(part->header.BAMOffset));
 
 #ifdef PSYCHO_DEBUG
-    ret = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(uint32_t),
-                        &retlen, (u_char *)&old_addr);
+    ret = mtd_read(part->mbd.mtd, offset, sizeof(uint32_t), &retlen,
+                   (u_char *)&old_addr);
     if (ret) {
 	printk(KERN_WARNING"ftl: Error reading old_addr in set_bam_entry: %d\n",ret);
 	return ret;
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index dd034efd1875..0b038bed7b9c 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -343,14 +343,17 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
 		if (BlockMap[block] == BLOCK_NIL)
 			continue;
 
-		ret = mtd->read(mtd, (inftl->EraseSize * BlockMap[block]) +
-				(block * SECTORSIZE), SECTORSIZE, &retlen,
-				movebuf);
+		ret = mtd_read(mtd,
+			       (inftl->EraseSize * BlockMap[block]) + (block * SECTORSIZE),
+			       SECTORSIZE,
+			       &retlen,
+			       movebuf);
 		if (ret < 0 && !mtd_is_bitflip(ret)) {
-			ret = mtd->read(mtd,
-					(inftl->EraseSize * BlockMap[block]) +
-					(block * SECTORSIZE), SECTORSIZE,
-					&retlen, movebuf);
+			ret = mtd_read(mtd,
+				       (inftl->EraseSize * BlockMap[block]) + (block * SECTORSIZE),
+				       SECTORSIZE,
+				       &retlen,
+				       movebuf);
 			if (ret != -EIO)
 				pr_debug("INFTL: error went away on retry?\n");
 		}
@@ -914,7 +917,7 @@ foundit:
 	} else {
 		size_t retlen;
 		loff_t ptr = (thisEUN * inftl->EraseSize) + blockofs;
-		int ret = mtd->read(mtd, ptr, SECTORSIZE, &retlen, buffer);
+		int ret = mtd_read(mtd, ptr, SECTORSIZE, &retlen, buffer);
 
 		/* Handle corrected bit flips gracefully */
 		if (ret < 0 && !mtd_is_bitflip(ret))
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 0d946f10a682..9bfbca5d88d6 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -73,8 +73,8 @@ static int find_boot_record(struct INFTLrecord *inftl)
 		 * Check for BNAND header first. Then whinge if it's found
 		 * but later checks fail.
 		 */
-		ret = mtd->read(mtd, block * inftl->EraseSize,
-				SECTORSIZE, &retlen, buf);
+		ret = mtd_read(mtd, block * inftl->EraseSize, SECTORSIZE,
+			       &retlen, buf);
 		/* We ignore ret in case the ECC of the MediaHeader is invalid
 		   (which is apparently acceptable) */
 		if (retlen != SECTORSIZE) {
@@ -118,8 +118,8 @@ static int find_boot_record(struct INFTLrecord *inftl)
 		memcpy(mh, buf, sizeof(struct INFTLMediaHeader));
 
 		/* Read the spare media header at offset 4096 */
-		mtd->read(mtd, block * inftl->EraseSize + 4096,
-			  SECTORSIZE, &retlen, buf);
+		mtd_read(mtd, block * inftl->EraseSize + 4096, SECTORSIZE,
+			 &retlen, buf);
 		if (retlen != SECTORSIZE) {
 			printk(KERN_WARNING "INFTL: Unable to read spare "
 			       "Media Header\n");
@@ -342,7 +342,7 @@ static int check_free_sectors(struct INFTLrecord *inftl, unsigned int address,
 	int i;
 
 	for (i = 0; i < len; i += SECTORSIZE) {
-		if (mtd->read(mtd, address, SECTORSIZE, &retlen, buf))
+		if (mtd_read(mtd, address, SECTORSIZE, &retlen, buf))
 			return -1;
 		if (memcmpb(buf, 0xff, SECTORSIZE) != 0)
 			return -1;
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 9b01cb0266e4..b0644d2d2a6e 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -184,8 +184,8 @@ static int do_cached_write (struct mtdblk_dev *mtdblk, unsigned long pos,
 			    mtdblk->cache_offset != sect_start) {
 				/* fill the cache with the current sector */
 				mtdblk->cache_state = STATE_EMPTY;
-				ret = mtd->read(mtd, sect_start, sect_size,
-						&retlen, mtdblk->cache_data);
+				ret = mtd_read(mtd, sect_start, sect_size,
+					       &retlen, mtdblk->cache_data);
 				if (ret)
 					return ret;
 				if (retlen != sect_size)
@@ -222,7 +222,7 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
 			mtd->name, pos, len);
 
 	if (!sect_size)
-		return mtd->read(mtd, pos, len, &retlen, buf);
+		return mtd_read(mtd, pos, len, &retlen, buf);
 
 	while (len > 0) {
 		unsigned long sect_start = (pos/sect_size)*sect_size;
@@ -241,7 +241,7 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
 		    mtdblk->cache_offset == sect_start) {
 			memcpy (buf, mtdblk->cache_data + offset, size);
 		} else {
-			ret = mtd->read(mtd, pos, size, &retlen, buf);
+			ret = mtd_read(mtd, pos, size, &retlen, buf);
 			if (ret)
 				return ret;
 			if (retlen != size)
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index 0470a6e86309..f5737b1153fa 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -30,7 +30,7 @@ static int mtdblock_readsect(struct mtd_blktrans_dev *dev,
 {
 	size_t retlen;
 
-	if (dev->mtd->read(dev->mtd, (block * 512), 512, &retlen, buf))
+	if (mtd_read(dev->mtd, (block * 512), 512, &retlen, buf))
 		return 1;
 	return 0;
 }
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index c51f04a00afb..c7f484687fa3 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -232,7 +232,7 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
 			break;
 		}
 		default:
-			ret = mtd->read(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_read(mtd, *ppos, len, &retlen, kbuf);
 		}
 		/* Nand returns -EBADMSG on ECC errors, but it returns
 		 * the data. For our userspace tools it is important
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index b3895cf20bb2..45460349fd12 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -91,7 +91,7 @@ concat_read(struct mtd_info *mtd, loff_t from, size_t len,
 			/* Entire transaction goes into this subdev */
 			size = len;
 
-		err = subdev->read(subdev, from, size, &retsize, buf);
+		err = mtd_read(subdev, from, size, &retsize, buf);
 
 		/* Save information about bitflips! */
 		if (unlikely(err)) {
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 9b2d86323169..23629ad08507 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -258,8 +258,8 @@ static void find_next_position(struct mtdoops_context *cxt)
 			continue;
 		/* Assume the page is used */
 		mark_page_used(cxt, page);
-		ret = mtd->read(mtd, page * record_size, MTDOOPS_HEADER_SIZE,
-				&retlen, (u_char *) &count[0]);
+		ret = mtd_read(mtd, page * record_size, MTDOOPS_HEADER_SIZE,
+			       &retlen, (u_char *)&count[0]);
 		if (retlen != MTDOOPS_HEADER_SIZE ||
 				(ret < 0 && !mtd_is_bitflip(ret))) {
 			printk(KERN_ERR "mtdoops: read failure at %ld (%td of %d read), err %d\n",
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 55a9cb544fc1..59cd7974bc50 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -70,8 +70,7 @@ static int part_read(struct mtd_info *mtd, loff_t from, size_t len,
 		len = 0;
 	else if (from + len > mtd->size)
 		len = mtd->size - from;
-	res = part->master->read(part->master, from + part->offset,
-				   len, retlen, buf);
+	res = mtd_read(part->master, from + part->offset, len, retlen, buf);
 	if (unlikely(res)) {
 		if (mtd_is_bitflip(res))
 			mtd->ecc_stats.corrected += part->master->ecc_stats.corrected - stats.corrected;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 4e12875a916c..b3282d2aa8f8 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -736,7 +736,7 @@ static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
 	retries = 0;
 
 retry:
-	ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
+	ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
 
 	if (ret < 0 && !mtd_is_bitflip(ret)) {
 		oldeb = d->eb_data + oldblock / d->pages_per_eblk;
@@ -1161,7 +1161,7 @@ static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
 	retries = 0;
 
 retry:
-	ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, buf);
+	ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf);
 
 	d->mtd_read_count++;
 	if (mtd_is_bitflip(ret)) {
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index 5780dbab6113..df921e7a496c 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -1072,7 +1072,7 @@ static int __init find_media_headers(struct mtd_info *mtd, u_char *buf, const ch
 	size_t retlen;
 
 	for (offs = 0; offs < mtd->size; offs += mtd->erasesize) {
-		ret = mtd->read(mtd, offs, mtd->writesize, &retlen, buf);
+		ret = mtd_read(mtd, offs, mtd->writesize, &retlen, buf);
 		if (retlen != mtd->writesize)
 			continue;
 		if (ret) {
@@ -1097,7 +1097,7 @@ static int __init find_media_headers(struct mtd_info *mtd, u_char *buf, const ch
 	/* Only one mediaheader was found.  We want buf to contain a
 	   mediaheader on return, so we'll have to re-read the one we found. */
 	offs = doc->mh0_page << this->page_shift;
-	ret = mtd->read(mtd, offs, mtd->writesize, &retlen, buf);
+	ret = mtd_read(mtd, offs, mtd->writesize, &retlen, buf);
 	if (retlen != mtd->writesize) {
 		/* Insanity.  Give up. */
 		printk(KERN_ERR "Read DiskOnChip Media Header once, but can't reread it???\n");
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 69148ae3bf58..1bcd6bc6798c 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -201,7 +201,7 @@ static int read_bbt(struct mtd_info *mtd, uint8_t *buf, int page, int num,
 			from += marker_len;
 			marker_len = 0;
 		}
-		res = mtd->read(mtd, from, len, &retlen, buf);
+		res = mtd_read(mtd, from, len, &retlen, buf);
 		if (res < 0) {
 			if (mtd_is_eccerr(res)) {
 				pr_info("nand_bbt: ECC error in BBT at "
@@ -298,7 +298,7 @@ static int scan_read_raw_data(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
 	if (td->options & NAND_BBT_VERSION)
 		len++;
 
-	return mtd->read(mtd, offs, len, &retlen, buf);
+	return mtd_read(mtd, offs, len, &retlen, buf);
 }
 
 /* Scan read raw data from flash */
@@ -756,7 +756,7 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf,
 			/* Make it block aligned */
 			to &= ~((loff_t)((1 << this->bbt_erase_shift) - 1));
 			len = 1 << this->bbt_erase_shift;
-			res = mtd->read(mtd, to, len, &retlen, buf);
+			res = mtd_read(mtd, to, len, &retlen, buf);
 			if (res < 0) {
 				if (retlen != len) {
 					pr_info("nand_bbt: error reading block "
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index cda77b562ad4..1a9d9c1d3a74 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -423,12 +423,17 @@ static u16 NFTL_foldchain (struct NFTLrecord *nftl, unsigned thisVUC, unsigned p
 		if (BlockMap[block] == BLOCK_NIL)
 			continue;
 
-		ret = mtd->read(mtd, (nftl->EraseSize * BlockMap[block]) + (block * 512),
-				512, &retlen, movebuf);
+		ret = mtd_read(mtd,
+			       (nftl->EraseSize * BlockMap[block]) + (block * 512),
+			       512,
+			       &retlen,
+			       movebuf);
 		if (ret < 0 && !mtd_is_bitflip(ret)) {
-			ret = mtd->read(mtd, (nftl->EraseSize * BlockMap[block])
-					+ (block * 512), 512, &retlen,
-					movebuf);
+			ret = mtd_read(mtd,
+				       (nftl->EraseSize * BlockMap[block]) + (block * 512),
+				       512,
+				       &retlen,
+				       movebuf);
 			if (ret != -EIO)
 				printk("Error went away on retry.\n");
 		}
@@ -771,7 +776,7 @@ static int nftl_readblock(struct mtd_blktrans_dev *mbd, unsigned long block,
 	} else {
 		loff_t ptr = (lastgoodEUN * nftl->EraseSize) + blockofs;
 		size_t retlen;
-		int res = mtd->read(mtd, ptr, 512, &retlen, buffer);
+		int res = mtd_read(mtd, ptr, 512, &retlen, buffer);
 
 		if (res < 0 && !mtd_is_bitflip(res))
 			return -EIO;
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index 9164a56fb5c0..b068dc8a3666 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -63,8 +63,8 @@ static int find_boot_record(struct NFTLrecord *nftl)
 
 		/* Check for ANAND header first. Then can whinge if it's found but later
 		   checks fail */
-		ret = mtd->read(mtd, block * nftl->EraseSize, SECTORSIZE,
-				&retlen, buf);
+		ret = mtd_read(mtd, block * nftl->EraseSize, SECTORSIZE,
+			       &retlen, buf);
 		/* We ignore ret in case the ECC of the MediaHeader is invalid
 		   (which is apparently acceptable) */
 		if (retlen != SECTORSIZE) {
@@ -274,7 +274,7 @@ static int check_free_sectors(struct NFTLrecord *nftl, unsigned int address, int
 	int i;
 
 	for (i = 0; i < len; i += SECTORSIZE) {
-		if (mtd->read(mtd, address, SECTORSIZE, &retlen, buf))
+		if (mtd_read(mtd, address, SECTORSIZE, &retlen, buf))
 			return -1;
 		if (memcmpb(buf, 0xff, SECTORSIZE) != 0)
 			return -1;
diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c
index e366b1d84ead..623d9b86d0d9 100644
--- a/drivers/mtd/redboot.c
+++ b/drivers/mtd/redboot.c
@@ -104,8 +104,8 @@ static int parse_redboot_partitions(struct mtd_info *master,
 	printk(KERN_NOTICE "Searching for RedBoot partition table in %s at offset 0x%lx\n",
 	       master->name, offset);
 
-	ret = master->read(master, offset,
-			   master->erasesize, &retlen, (void *)buf);
+	ret = mtd_read(master, offset, master->erasesize, &retlen,
+		       (void *)buf);
 
 	if (ret)
 		goto out;
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index 39de8727a524..d9fe2d0533d9 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -200,9 +200,9 @@ static int scan_header(struct partition *part)
 		part->sector_map[i] = -1;
 
 	for (i=0, blocks_found=0; i<part->total_blocks; i++) {
-		rc = part->mbd.mtd->read(part->mbd.mtd,
-				i * part->block_size, part->header_size,
-				&retlen, (u_char*)part->header_cache);
+		rc = mtd_read(part->mbd.mtd, i * part->block_size,
+			      part->header_size, &retlen,
+			      (u_char *)part->header_cache);
 
 		if (!rc && retlen != part->header_size)
 			rc = -EIO;
@@ -250,8 +250,8 @@ static int rfd_ftl_readsect(struct mtd_blktrans_dev *dev, u_long sector, char *b
 
 	addr = part->sector_map[sector];
 	if (addr != -1) {
-		rc = part->mbd.mtd->read(part->mbd.mtd, addr, SECTOR_SIZE,
-						&retlen, (u_char*)buf);
+		rc = mtd_read(part->mbd.mtd, addr, SECTOR_SIZE, &retlen,
+			      (u_char *)buf);
 		if (!rc && retlen != SECTOR_SIZE)
 			rc = -EIO;
 
@@ -372,9 +372,8 @@ static int move_block_contents(struct partition *part, int block_no, u_long *old
 	if (!map)
 		goto err2;
 
-	rc = part->mbd.mtd->read(part->mbd.mtd,
-		part->blocks[block_no].offset, part->header_size,
-		&retlen, (u_char*)map);
+	rc = mtd_read(part->mbd.mtd, part->blocks[block_no].offset,
+		      part->header_size, &retlen, (u_char *)map);
 
 	if (!rc && retlen != part->header_size)
 		rc = -EIO;
@@ -413,8 +412,8 @@ static int move_block_contents(struct partition *part, int block_no, u_long *old
 			}
 			continue;
 		}
-		rc = part->mbd.mtd->read(part->mbd.mtd, addr,
-			SECTOR_SIZE, &retlen, sector_data);
+		rc = mtd_read(part->mbd.mtd, addr, SECTOR_SIZE, &retlen,
+			      sector_data);
 
 		if (!rc && retlen != SECTOR_SIZE)
 			rc = -EIO;
@@ -563,8 +562,9 @@ static int find_writable_block(struct partition *part, u_long *old_sector)
 		}
 	}
 
-	rc = part->mbd.mtd->read(part->mbd.mtd, part->blocks[block].offset,
-		part->header_size, &retlen, (u_char*)part->header_cache);
+	rc = mtd_read(part->mbd.mtd, part->blocks[block].offset,
+		      part->header_size, &retlen,
+		      (u_char *)part->header_cache);
 
 	if (!rc && retlen != part->header_size)
 		rc = -EIO;
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index 976e3d28b962..293e22a5710f 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -123,8 +123,8 @@ static int get_valid_cis_sector(struct mtd_info *mtd)
 	 */
 	for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) {
 		if (!mtd->block_isbad(mtd, offset)) {
-			ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen,
-				sect_buf);
+			ret = mtd_read(mtd, offset, SECTOR_SIZE, &retlen,
+				       sect_buf);
 
 			/* CIS pattern match on the sector buffer */
 			if (ret < 0 || retlen != SECTOR_SIZE) {
@@ -156,7 +156,7 @@ static int read_physical_sector(struct mtd_info *mtd, uint8_t *sect_buf,
 	size_t retlen;
 	loff_t offset = (loff_t)sect_no << SECTOR_SHIFT;
 
-	ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf);
+	ret = mtd_read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf);
 	if (ret < 0 || retlen != SECTOR_SIZE)
 		return -1;
 
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index 271819fabb55..6d62e24a03ed 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -127,7 +127,7 @@ static int verify_eraseblock(int ebnum)
 	set_random_data(writebuf, mtd->erasesize);
 	for (j = 0; j < pgcnt - 1; ++j, addr += pgsize) {
 		/* Do a read to set the internal dataRAMs to different data */
-		err = mtd->read(mtd, addr0, bufsize, &read, twopages);
+		err = mtd_read(mtd, addr0, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -135,7 +135,7 @@ static int verify_eraseblock(int ebnum)
 			       (long long)addr0);
 			return err;
 		}
-		err = mtd->read(mtd, addrn - bufsize, bufsize, &read, twopages);
+		err = mtd_read(mtd, addrn - bufsize, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -145,7 +145,7 @@ static int verify_eraseblock(int ebnum)
 		}
 		memset(twopages, 0, bufsize);
 		read = 0;
-		err = mtd->read(mtd, addr, bufsize, &read, twopages);
+		err = mtd_read(mtd, addr, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -163,7 +163,7 @@ static int verify_eraseblock(int ebnum)
 	if (addr <= addrn - pgsize - pgsize && !bbt[ebnum + 1]) {
 		unsigned long oldnext = next;
 		/* Do a read to set the internal dataRAMs to different data */
-		err = mtd->read(mtd, addr0, bufsize, &read, twopages);
+		err = mtd_read(mtd, addr0, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -171,7 +171,7 @@ static int verify_eraseblock(int ebnum)
 			       (long long)addr0);
 			return err;
 		}
-		err = mtd->read(mtd, addrn - bufsize, bufsize, &read, twopages);
+		err = mtd_read(mtd, addrn - bufsize, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -181,7 +181,7 @@ static int verify_eraseblock(int ebnum)
 		}
 		memset(twopages, 0, bufsize);
 		read = 0;
-		err = mtd->read(mtd, addr, bufsize, &read, twopages);
+		err = mtd_read(mtd, addr, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -230,7 +230,7 @@ static int crosstest(void)
 	/* Read 2nd-to-last page to pp1 */
 	read = 0;
 	addr = addrn - pgsize - pgsize;
-	err = mtd->read(mtd, addr, pgsize, &read, pp1);
+	err = mtd_read(mtd, addr, pgsize, &read, pp1);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -243,7 +243,7 @@ static int crosstest(void)
 	/* Read 3rd-to-last page to pp1 */
 	read = 0;
 	addr = addrn - pgsize - pgsize - pgsize;
-	err = mtd->read(mtd, addr, pgsize, &read, pp1);
+	err = mtd_read(mtd, addr, pgsize, &read, pp1);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -257,7 +257,7 @@ static int crosstest(void)
 	read = 0;
 	addr = addr0;
 	printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
-	err = mtd->read(mtd, addr, pgsize, &read, pp2);
+	err = mtd_read(mtd, addr, pgsize, &read, pp2);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -271,7 +271,7 @@ static int crosstest(void)
 	read = 0;
 	addr = addrn - pgsize;
 	printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
-	err = mtd->read(mtd, addr, pgsize, &read, pp3);
+	err = mtd_read(mtd, addr, pgsize, &read, pp3);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -285,7 +285,7 @@ static int crosstest(void)
 	read = 0;
 	addr = addr0;
 	printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
-	err = mtd->read(mtd, addr, pgsize, &read, pp4);
+	err = mtd_read(mtd, addr, pgsize, &read, pp4);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -344,7 +344,7 @@ static int erasecrosstest(void)
 
 	printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
 	memset(readbuf, 0, pgsize);
-	err = mtd->read(mtd, addr0, pgsize, &read, readbuf);
+	err = mtd_read(mtd, addr0, pgsize, &read, readbuf);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -382,7 +382,7 @@ static int erasecrosstest(void)
 
 	printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
 	memset(readbuf, 0, pgsize);
-	err = mtd->read(mtd, addr0, pgsize, &read, readbuf);
+	err = mtd_read(mtd, addr0, pgsize, &read, readbuf);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -438,7 +438,7 @@ static int erasetest(void)
 		return err;
 
 	printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
-	err = mtd->read(mtd, addr0, pgsize, &read, twopages);
+	err = mtd_read(mtd, addr0, pgsize, &read, twopages);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
index 550fe51225a7..0c58d2976c76 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/mtd_readtest.c
@@ -52,7 +52,7 @@ static int read_eraseblock_by_page(int ebnum)
 
 	for (i = 0; i < pgcnt; i++) {
 		memset(buf, 0 , pgcnt);
-		ret = mtd->read(mtd, addr, pgsize, &read, buf);
+		ret = mtd_read(mtd, addr, pgsize, &read, buf);
 		if (ret == -EUCLEAN)
 			ret = 0;
 		if (ret || read != pgsize) {
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index f67a65e21043..3c9529bd0a62 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -214,7 +214,7 @@ static int read_eraseblock(int ebnum)
 	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	err = mtd->read(mtd, addr, mtd->erasesize, &read, iobuf);
+	err = mtd_read(mtd, addr, mtd->erasesize, &read, iobuf);
 	/* Ignore corrected ECC errors */
 	if (mtd_is_bitflip(err))
 		err = 0;
@@ -235,7 +235,7 @@ static int read_eraseblock_by_page(int ebnum)
 	void *buf = iobuf;
 
 	for (i = 0; i < pgcnt; i++) {
-		err = mtd->read(mtd, addr, pgsize, &read, buf);
+		err = mtd_read(mtd, addr, pgsize, &read, buf);
 		/* Ignore corrected ECC errors */
 		if (mtd_is_bitflip(err))
 			err = 0;
@@ -261,7 +261,7 @@ static int read_eraseblock_by_2pages(int ebnum)
 	void *buf = iobuf;
 
 	for (i = 0; i < n; i++) {
-		err = mtd->read(mtd, addr, sz, &read, buf);
+		err = mtd_read(mtd, addr, sz, &read, buf);
 		/* Ignore corrected ECC errors */
 		if (mtd_is_bitflip(err))
 			err = 0;
@@ -276,7 +276,7 @@ static int read_eraseblock_by_2pages(int ebnum)
 		buf += sz;
 	}
 	if (pgcnt % 2) {
-		err = mtd->read(mtd, addr, pgsize, &read, buf);
+		err = mtd_read(mtd, addr, pgsize, &read, buf);
 		/* Ignore corrected ECC errors */
 		if (mtd_is_bitflip(err))
 			err = 0;
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index a204a9f90524..83a843723880 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -153,7 +153,7 @@ static int do_read(void)
 			len = mtd->erasesize - offs;
 	}
 	addr = eb * mtd->erasesize + offs;
-	err = mtd->read(mtd, addr, len, &read, readbuf);
+	err = mtd_read(mtd, addr, len, &read, readbuf);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (unlikely(err || read != len)) {
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index 16d0c05024d7..d81f89a19daa 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -196,7 +196,7 @@ static int verify_eraseblock(int ebnum)
 	set_random_data(writebuf, subpgsize);
 	clear_data(readbuf, subpgsize);
 	read = 0;
-	err = mtd->read(mtd, addr, subpgsize, &read, readbuf);
+	err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 	if (unlikely(err || read != subpgsize)) {
 		if (mtd_is_bitflip(err) && read == subpgsize) {
 			printk(PRINT_PREF "ECC correction at %#llx\n",
@@ -224,7 +224,7 @@ static int verify_eraseblock(int ebnum)
 	set_random_data(writebuf, subpgsize);
 	clear_data(readbuf, subpgsize);
 	read = 0;
-	err = mtd->read(mtd, addr, subpgsize, &read, readbuf);
+	err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 	if (unlikely(err || read != subpgsize)) {
 		if (mtd_is_bitflip(err) && read == subpgsize) {
 			printk(PRINT_PREF "ECC correction at %#llx\n",
@@ -262,7 +262,7 @@ static int verify_eraseblock2(int ebnum)
 		set_random_data(writebuf, subpgsize * k);
 		clear_data(readbuf, subpgsize * k);
 		read = 0;
-		err = mtd->read(mtd, addr, subpgsize * k, &read, readbuf);
+		err = mtd_read(mtd, addr, subpgsize * k, &read, readbuf);
 		if (unlikely(err || read != subpgsize * k)) {
 			if (mtd_is_bitflip(err) && read == subpgsize * k) {
 				printk(PRINT_PREF "ECC correction at %#llx\n",
@@ -296,7 +296,7 @@ static int verify_eraseblock_ff(int ebnum)
 	for (j = 0; j < mtd->erasesize / subpgsize; ++j) {
 		clear_data(readbuf, subpgsize);
 		read = 0;
-		err = mtd->read(mtd, addr, subpgsize, &read, readbuf);
+		err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 		if (unlikely(err || read != subpgsize)) {
 			if (mtd_is_bitflip(err) && read == subpgsize) {
 				printk(PRINT_PREF "ECC correction at %#llx\n",
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index 102c79b7ac66..ecc68bf3f3f2 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -137,7 +137,7 @@ static inline int check_eraseblock(int ebnum, unsigned char *buf)
 	}
 
 retry:
-	err = mtd->read(mtd, addr, len, &read, check_buf);
+	err = mtd_read(mtd, addr, len, &read, check_buf);
 	if (mtd_is_bitflip(err))
 		printk(PRINT_PREF "single bit flip occurred at EB %d "
 		       "MTD reported that it was fixed.\n", ebnum);
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index ab80c0debac8..e2cdebf40840 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -216,7 +216,7 @@ void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len)
 	buf = vmalloc(len);
 	if (!buf)
 		return;
-	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	err = mtd_read(ubi->mtd, addr, len, &read, buf);
 	if (err && err != -EUCLEAN) {
 		ubi_err("error %d while reading %d bytes from PEB %d:%d, "
 			"read %zd bytes", err, len, pnum, offset, read);
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index b6c8959e6c7e..433382951d3d 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -170,7 +170,7 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
 
 	addr = (loff_t)pnum * ubi->peb_size + offset;
 retry:
-	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	err = mtd_read(ubi->mtd, addr, len, &read, buf);
 	if (err) {
 		const char *errstr = mtd_is_eccerr(err) ? " (ECC error)" : "";
 
@@ -1357,7 +1357,7 @@ int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum,
 		return 0;
 	}
 
-	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf1);
+	err = mtd_read(ubi->mtd, addr, len, &read, buf1);
 	if (err && !mtd_is_bitflip(err))
 		goto out_free;
 
@@ -1421,7 +1421,7 @@ int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len)
 		return 0;
 	}
 
-	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	err = mtd_read(ubi->mtd, addr, len, &read, buf);
 	if (err && !mtd_is_bitflip(err)) {
 		ubi_err("error %d while reading %d bytes from PEB %d:%d, "
 			"read %zd bytes", err, len, pnum, offset, read);
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index d638fafab649..eccd08d0e009 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -283,9 +283,11 @@ u16 mtd_Read_Page_Main(u8 *read_data, u32 Block,
 
 
 	while (PageCount) {
-		ret = spectra_mtd->read(spectra_mtd,
-					(Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					DeviceInfo.wPageDataSize, &retlen, read_data);
+		ret = mtd_read(spectra_mtd,
+			       (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+			       DeviceInfo.wPageDataSize,
+			       &retlen,
+			       read_data);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index ffdf4fca9c54..c59d642cade2 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -381,7 +381,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 
 		*bad_offset = ofs;
 
-		ret = c->mtd->read(c->mtd, ofs, readlen, &retlen, ebuf);
+		ret = mtd_read(c->mtd, ofs, readlen, &retlen, ebuf);
 		if (ret) {
 			printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret);
 			ret = -EIO;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index b09e51d2f81f..a24d3d21b63d 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -228,7 +228,7 @@ static int jffs2_verify_write(struct jffs2_sb_info *c, unsigned char *buf,
 	size_t retlen;
 	char *eccstr;
 
-	ret = c->mtd->read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify);
+	ret = mtd_read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify);
 	if (ret && ret != -EUCLEAN && ret != -EBADMSG) {
 		printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x failed: %d\n", c->wbuf_ofs, ret);
 		return ret;
@@ -337,7 +337,8 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
 		}
 
 		/* Do the read... */
-		ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf);
+		ret = mtd_read(c->mtd, start, c->wbuf_ofs - start, &retlen,
+			       buf);
 
 		/* ECC recovered ? */
 		if ((ret == -EUCLEAN || ret == -EBADMSG) &&
@@ -948,11 +949,11 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re
 	int	ret;
 
 	if (!jffs2_is_writebuffered(c))
-		return c->mtd->read(c->mtd, ofs, len, retlen, buf);
+		return mtd_read(c->mtd, ofs, len, retlen, buf);
 
 	/* Read flash */
 	down_read(&c->wbuf_sem);
-	ret = c->mtd->read(c->mtd, ofs, len, retlen, buf);
+	ret = mtd_read(c->mtd, ofs, len, retlen, buf);
 
 	if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) {
 		if (ret == -EBADMSG)
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 046362894352..3ee64351685f 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -20,7 +20,7 @@ static int logfs_mtd_read(struct super_block *sb, loff_t ofs, size_t len,
 	size_t retlen;
 	int ret;
 
-	ret = mtd->read(mtd, ofs, len, &retlen, buf);
+	ret = mtd_read(mtd, ofs, len, &retlen, buf);
 	BUG_ON(ret == -EINVAL);
 	if (ret)
 		return ret;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index f38e8276b408..56478eb4bbc0 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -182,6 +182,8 @@ struct mtd_info {
 					    unsigned long len,
 					    unsigned long offset,
 					    unsigned long flags);
+	int (*read) (struct mtd_info *mtd, loff_t from, size_t len,
+		     size_t *retlen, u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -189,7 +191,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 
-	int (*read) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf);
 
 	/* In blackbox flight recorder like scenarios we want to make successful
@@ -301,6 +302,12 @@ static inline unsigned long mtd_get_unmapped_area(struct mtd_info *mtd,
 	return mtd->get_unmapped_area(mtd, len, offset, flags);
 }
 
+static inline int mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
+			   size_t *retlen, u_char *buf)
+{
+	return mtd->read(mtd, from, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From eda95cbf75193808f62948fb0142ba0901d8bee2 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:35:41 +0200
Subject: mtd: introduce mtd_write interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0020.c |  8 +++++---
 drivers/mtd/ftl.c                   | 35 ++++++++++++++++++-----------------
 drivers/mtd/mtdblock.c              |  4 ++--
 drivers/mtd/mtdblock_ro.c           |  2 +-
 drivers/mtd/mtdchar.c               |  2 +-
 drivers/mtd/mtdconcat.c             |  2 +-
 drivers/mtd/mtdcore.c               |  3 ++-
 drivers/mtd/mtdoops.c               |  4 ++--
 drivers/mtd/mtdpart.c               |  3 +--
 drivers/mtd/mtdswap.c               |  2 +-
 drivers/mtd/rfd_ftl.c               | 17 ++++++++---------
 drivers/mtd/tests/mtd_pagetest.c    |  8 ++++----
 drivers/mtd/tests/mtd_speedtest.c   |  8 ++++----
 drivers/mtd/tests/mtd_stresstest.c  |  2 +-
 drivers/mtd/tests/mtd_subpagetest.c |  6 +++---
 drivers/mtd/tests/mtd_torturetest.c |  2 +-
 drivers/mtd/ubi/io.c                |  7 +++----
 drivers/staging/spectra/lld_mtd.c   |  8 +++++---
 fs/jffs2/wbuf.c                     | 18 +++++++++---------
 fs/jffs2/writev.c                   |  5 +++--
 fs/logfs/dev_mtd.c                  |  2 +-
 include/linux/mtd/mtd.h             |  9 ++++++++-
 22 files changed, 84 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 666c52f8bf8d..85e80180b65b 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -699,7 +699,8 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
 				continue;
 			}
 			memcpy(buffer+buflen, elem_base, ECCBUF_SIZE-buflen);
-			ret = mtd->write(mtd, to, ECCBUF_SIZE, &thislen, buffer);
+			ret = mtd_write(mtd, to, ECCBUF_SIZE, &thislen,
+					buffer);
 			totlen += thislen;
 			if (ret || thislen != ECCBUF_SIZE)
 				goto write_error;
@@ -708,7 +709,8 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
 			to += ECCBUF_SIZE;
 		}
 		if (ECCBUF_DIV(elem_len)) { /* write clean aligned data */
-			ret = mtd->write(mtd, to, ECCBUF_DIV(elem_len), &thislen, elem_base);
+			ret = mtd_write(mtd, to, ECCBUF_DIV(elem_len),
+					&thislen, elem_base);
 			totlen += thislen;
 			if (ret || thislen != ECCBUF_DIV(elem_len))
 				goto write_error;
@@ -722,7 +724,7 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	}
 	if (buflen) { /* flush last page, even if not full */
 		/* This is sometimes intended behaviour, really */
-		ret = mtd->write(mtd, to, buflen, &thislen, buffer);
+		ret = mtd_write(mtd, to, buflen, &thislen, buffer);
 		totlen += thislen;
 		if (ret || thislen != ECCBUF_SIZE)
 			goto write_error;
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 12fd7ebd3fd8..d591b1d0a6c1 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -422,8 +422,8 @@ static int prepare_xfer(partition_t *part, int i)
     header.LogicalEUN = cpu_to_le16(0xffff);
     header.EraseCount = cpu_to_le32(xfer->EraseCount);
 
-    ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset, sizeof(header),
-			   &retlen, (u_char *)&header);
+    ret = mtd_write(part->mbd.mtd, xfer->Offset, sizeof(header), &retlen,
+                    (u_char *)&header);
 
     if (ret) {
 	return ret;
@@ -438,8 +438,8 @@ static int prepare_xfer(partition_t *part, int i)
 
     for (i = 0; i < nbam; i++, offset += sizeof(uint32_t)) {
 
-	ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint32_t),
-			       &retlen, (u_char *)&ctl);
+	ret = mtd_write(part->mbd.mtd, offset, sizeof(uint32_t), &retlen,
+                        (u_char *)&ctl);
 
 	if (ret)
 	    return ret;
@@ -503,8 +503,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
     offset = xfer->Offset + 20; /* Bad! */
     unit = cpu_to_le16(0x7fff);
 
-    ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint16_t),
-			   &retlen, (u_char *) &unit);
+    ret = mtd_write(part->mbd.mtd, offset, sizeof(uint16_t), &retlen,
+                    (u_char *)&unit);
 
     if (ret) {
 	printk( KERN_WARNING "ftl: Failed to write back to BAM cache in copy_erase_unit()!\n");
@@ -531,8 +531,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
             }
 
 
-	    ret = part->mbd.mtd->write(part->mbd.mtd, dest, SECTOR_SIZE,
-                        &retlen, (u_char *) buf);
+	    ret = mtd_write(part->mbd.mtd, dest, SECTOR_SIZE, &retlen,
+                            (u_char *)buf);
 	    if (ret)  {
 		printk(KERN_WARNING "ftl: Error writing new xfer unit in copy_erase_unit\n");
 		return ret;
@@ -550,9 +550,11 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
     }
 
     /* Write the BAM to the transfer unit */
-    ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset + le32_to_cpu(part->header.BAMOffset),
-                    part->BlocksPerUnit * sizeof(int32_t), &retlen,
-		    (u_char *)part->bam_cache);
+    ret = mtd_write(part->mbd.mtd,
+                    xfer->Offset + le32_to_cpu(part->header.BAMOffset),
+                    part->BlocksPerUnit * sizeof(int32_t),
+                    &retlen,
+                    (u_char *)part->bam_cache);
     if (ret) {
 	printk( KERN_WARNING "ftl: Error writing BAM in copy_erase_unit\n");
 	return ret;
@@ -560,8 +562,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
 
 
     /* All clear? Then update the LogicalEUN again */
-    ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset + 20, sizeof(uint16_t),
-			   &retlen, (u_char *)&srcunitswap);
+    ret = mtd_write(part->mbd.mtd, xfer->Offset + 20, sizeof(uint16_t),
+                    &retlen, (u_char *)&srcunitswap);
 
     if (ret) {
 	printk(KERN_WARNING "ftl: Error writing new LogicalEUN in copy_erase_unit\n");
@@ -887,8 +889,8 @@ static int set_bam_entry(partition_t *part, uint32_t log_addr,
 #endif
 	part->bam_cache[blk] = le_virt_addr;
     }
-    ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint32_t),
-                            &retlen, (u_char *)&le_virt_addr);
+    ret = mtd_write(part->mbd.mtd, offset, sizeof(uint32_t), &retlen,
+                    (u_char *)&le_virt_addr);
 
     if (ret) {
 	printk(KERN_NOTICE "ftl_cs: set_bam_entry() failed!\n");
@@ -947,8 +949,7 @@ static int ftl_write(partition_t *part, caddr_t buffer,
 	part->EUNInfo[part->bam_index].Deleted++;
 	offset = (part->EUNInfo[part->bam_index].Offset +
 		      blk * SECTOR_SIZE);
-	ret = part->mbd.mtd->write(part->mbd.mtd, offset, SECTOR_SIZE, &retlen,
-                                     buffer);
+	ret = mtd_write(part->mbd.mtd, offset, SECTOR_SIZE, &retlen, buffer);
 
 	if (ret) {
 	    printk(KERN_NOTICE "ftl_cs: block write failed!\n");
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index b0644d2d2a6e..ac7f1f1faa2d 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -102,7 +102,7 @@ static int erase_write (struct mtd_info *mtd, unsigned long pos,
 	 * Next, write the data to flash.
 	 */
 
-	ret = mtd->write(mtd, pos, len, &retlen, buf);
+	ret = mtd_write(mtd, pos, len, &retlen, buf);
 	if (ret)
 		return ret;
 	if (retlen != len)
@@ -152,7 +152,7 @@ static int do_cached_write (struct mtdblk_dev *mtdblk, unsigned long pos,
 		mtd->name, pos, len);
 
 	if (!sect_size)
-		return mtd->write(mtd, pos, len, &retlen, buf);
+		return mtd_write(mtd, pos, len, &retlen, buf);
 
 	while (len > 0) {
 		unsigned long sect_start = (pos/sect_size)*sect_size;
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index f5737b1153fa..92759a9d2985 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -40,7 +40,7 @@ static int mtdblock_writesect(struct mtd_blktrans_dev *dev,
 {
 	size_t retlen;
 
-	if (dev->mtd->write(dev->mtd, (block * 512), 512, &retlen, buf))
+	if (mtd_write(dev->mtd, (block * 512), 512, &retlen, buf))
 		return 1;
 	return 0;
 }
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index c7f484687fa3..922da31d2c6b 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -331,7 +331,7 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
 		}
 
 		default:
-			ret = (*(mtd->write))(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_write(mtd, *ppos, len, &retlen, kbuf);
 		}
 		if (!ret) {
 			*ppos += retlen;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 45460349fd12..45215501c4c7 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -148,7 +148,7 @@ concat_write(struct mtd_info *mtd, loff_t to, size_t len,
 		if (!(subdev->flags & MTD_WRITEABLE))
 			err = -EROFS;
 		else
-			err = subdev->write(subdev, to, size, &retsize, buf);
+			err = mtd_write(subdev, to, size, &retsize, buf);
 
 		if (err)
 			break;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index b01993ea260e..e36191ab47c3 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -699,7 +699,8 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		for (i=0; i<count; i++) {
 			if (!vecs[i].iov_len)
 				continue;
-			ret = mtd->write(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base);
+			ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen,
+					vecs[i].iov_base);
 			totlen += thislen;
 			if (ret || thislen != vecs[i].iov_len)
 				break;
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 23629ad08507..9c9d58617c98 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -225,8 +225,8 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
 		ret = mtd->panic_write(mtd, cxt->nextpage * record_size,
 					record_size, &retlen, cxt->oops_buf);
 	else
-		ret = mtd->write(mtd, cxt->nextpage * record_size,
-					record_size, &retlen, cxt->oops_buf);
+		ret = mtd_write(mtd, cxt->nextpage * record_size,
+				record_size, &retlen, cxt->oops_buf);
 
 	if (retlen != record_size || ret < 0)
 		printk(KERN_ERR "mtdoops: write failure at %ld (%td of %ld written), error %d\n",
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 59cd7974bc50..96574a036567 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -188,8 +188,7 @@ static int part_write(struct mtd_info *mtd, loff_t to, size_t len,
 		len = 0;
 	else if (to + len > mtd->size)
 		len = mtd->size - to;
-	return part->master->write(part->master, to + part->offset,
-				    len, retlen, buf);
+	return mtd_write(part->master, to + part->offset, len, retlen, buf);
 }
 
 static int part_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index b3282d2aa8f8..6ff823e29c0c 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -689,7 +689,7 @@ retry:
 		return ret;
 
 	writepos = (loff_t)*bp << PAGE_SHIFT;
-	ret =  mtd->write(mtd, writepos, PAGE_SIZE, &retlen, buf);
+	ret =  mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf);
 	if (ret == -EIO || mtd_is_eccerr(ret)) {
 		d->curr_write_pos--;
 		eb->active_count--;
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index d9fe2d0533d9..c594bb7abfa3 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -304,9 +304,8 @@ static void erase_callback(struct erase_info *erase)
 	part->blocks[i].used_sectors = 0;
 	part->blocks[i].erases++;
 
-	rc = part->mbd.mtd->write(part->mbd.mtd,
-		part->blocks[i].offset, sizeof(magic), &retlen,
-		(u_char*)&magic);
+	rc = mtd_write(part->mbd.mtd, part->blocks[i].offset, sizeof(magic),
+		       &retlen, (u_char *)&magic);
 
 	if (!rc && retlen != sizeof(magic))
 		rc = -EIO;
@@ -595,8 +594,8 @@ static int mark_sector_deleted(struct partition *part, u_long old_addr)
 
 	addr = part->blocks[block].offset +
 			(HEADER_MAP_OFFSET + offset) * sizeof(u16);
-	rc = part->mbd.mtd->write(part->mbd.mtd, addr,
-		sizeof(del), &retlen, (u_char*)&del);
+	rc = mtd_write(part->mbd.mtd, addr, sizeof(del), &retlen,
+		       (u_char *)&del);
 
 	if (!rc && retlen != sizeof(del))
 		rc = -EIO;
@@ -668,8 +667,8 @@ static int do_writesect(struct mtd_blktrans_dev *dev, u_long sector, char *buf,
 
 	addr = (i + part->header_sectors_per_block) * SECTOR_SIZE +
 		block->offset;
-	rc = part->mbd.mtd->write(part->mbd.mtd,
-		addr, SECTOR_SIZE, &retlen, (u_char*)buf);
+	rc = mtd_write(part->mbd.mtd, addr, SECTOR_SIZE, &retlen,
+		       (u_char *)buf);
 
 	if (!rc && retlen != SECTOR_SIZE)
 		rc = -EIO;
@@ -688,8 +687,8 @@ static int do_writesect(struct mtd_blktrans_dev *dev, u_long sector, char *buf,
 	part->header_cache[i + HEADER_MAP_OFFSET] = entry;
 
 	addr = block->offset + (HEADER_MAP_OFFSET + i) * sizeof(u16);
-	rc = part->mbd.mtd->write(part->mbd.mtd, addr,
-			sizeof(entry), &retlen, (u_char*)&entry);
+	rc = mtd_write(part->mbd.mtd, addr, sizeof(entry), &retlen,
+		       (u_char *)&entry);
 
 	if (!rc && retlen != sizeof(entry))
 		rc = -EIO;
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index 6d62e24a03ed..83da97e54f97 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -100,7 +100,7 @@ static int write_eraseblock(int ebnum)
 
 	set_random_data(writebuf, mtd->erasesize);
 	cond_resched();
-	err = mtd->write(mtd, addr, mtd->erasesize, &written, writebuf);
+	err = mtd_write(mtd, addr, mtd->erasesize, &written, writebuf);
 	if (err || written != mtd->erasesize)
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr);
@@ -335,7 +335,7 @@ static int erasecrosstest(void)
 	printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
 	set_random_data(writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
-	err = mtd->write(mtd, addr0, pgsize, &written, writebuf);
+	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr0);
@@ -368,7 +368,7 @@ static int erasecrosstest(void)
 	printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
 	set_random_data(writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
-	err = mtd->write(mtd, addr0, pgsize, &written, writebuf);
+	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr0);
@@ -425,7 +425,7 @@ static int erasetest(void)
 
 	printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
 	set_random_data(writebuf, pgsize);
-	err = mtd->write(mtd, addr0, pgsize, &written, writebuf);
+	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr0);
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 3c9529bd0a62..c7b18e189082 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -143,7 +143,7 @@ static int write_eraseblock(int ebnum)
 	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	err = mtd->write(mtd, addr, mtd->erasesize, &written, iobuf);
+	err = mtd_write(mtd, addr, mtd->erasesize, &written, iobuf);
 	if (err || written != mtd->erasesize) {
 		printk(PRINT_PREF "error: write failed at %#llx\n", addr);
 		if (!err)
@@ -161,7 +161,7 @@ static int write_eraseblock_by_page(int ebnum)
 	void *buf = iobuf;
 
 	for (i = 0; i < pgcnt; i++) {
-		err = mtd->write(mtd, addr, pgsize, &written, buf);
+		err = mtd_write(mtd, addr, pgsize, &written, buf);
 		if (err || written != pgsize) {
 			printk(PRINT_PREF "error: write failed at %#llx\n",
 			       addr);
@@ -184,7 +184,7 @@ static int write_eraseblock_by_2pages(int ebnum)
 	void *buf = iobuf;
 
 	for (i = 0; i < n; i++) {
-		err = mtd->write(mtd, addr, sz, &written, buf);
+		err = mtd_write(mtd, addr, sz, &written, buf);
 		if (err || written != sz) {
 			printk(PRINT_PREF "error: write failed at %#llx\n",
 			       addr);
@@ -196,7 +196,7 @@ static int write_eraseblock_by_2pages(int ebnum)
 		buf += sz;
 	}
 	if (pgcnt % 2) {
-		err = mtd->write(mtd, addr, pgsize, &written, buf);
+		err = mtd_write(mtd, addr, pgsize, &written, buf);
 		if (err || written != pgsize) {
 			printk(PRINT_PREF "error: write failed at %#llx\n",
 			       addr);
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index 83a843723880..f8aac4b7e59a 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -192,7 +192,7 @@ static int do_write(void)
 		}
 	}
 	addr = eb * mtd->erasesize + offs;
-	err = mtd->write(mtd, addr, len, &written, writebuf);
+	err = mtd_write(mtd, addr, len, &written, writebuf);
 	if (unlikely(err || written != len)) {
 		printk(PRINT_PREF "error: write failed at 0x%llx\n",
 		       (long long)addr);
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index d81f89a19daa..b90c01036b49 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -120,7 +120,7 @@ static int write_eraseblock(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 
 	set_random_data(writebuf, subpgsize);
-	err = mtd->write(mtd, addr, subpgsize, &written, writebuf);
+	err = mtd_write(mtd, addr, subpgsize, &written, writebuf);
 	if (unlikely(err || written != subpgsize)) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr);
@@ -134,7 +134,7 @@ static int write_eraseblock(int ebnum)
 	addr += subpgsize;
 
 	set_random_data(writebuf, subpgsize);
-	err = mtd->write(mtd, addr, subpgsize, &written, writebuf);
+	err = mtd_write(mtd, addr, subpgsize, &written, writebuf);
 	if (unlikely(err || written != subpgsize)) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr);
@@ -158,7 +158,7 @@ static int write_eraseblock2(int ebnum)
 		if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
 			break;
 		set_random_data(writebuf, subpgsize * k);
-		err = mtd->write(mtd, addr, subpgsize * k, &written, writebuf);
+		err = mtd_write(mtd, addr, subpgsize * k, &written, writebuf);
 		if (unlikely(err || written != subpgsize * k)) {
 			printk(PRINT_PREF "error: write failed at %#llx\n",
 			       (long long)addr);
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index ecc68bf3f3f2..dd34a519fa7a 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -189,7 +189,7 @@ static inline int write_pattern(int ebnum, void *buf)
 		addr = (ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
 		len = pgcnt * pgsize;
 	}
-	err = mtd->write(mtd, addr, len, &written, buf);
+	err = mtd_write(mtd, addr, len, &written, buf);
 	if (err) {
 		printk(PRINT_PREF "error %d while writing EB %d, written %zd"
 		      " bytes\n", err, ebnum, written);
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 433382951d3d..8d832fc9e9e4 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -289,7 +289,7 @@ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset,
 	}
 
 	addr = (loff_t)pnum * ubi->peb_size + offset;
-	err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf);
+	err = mtd_write(ubi->mtd, addr, len, &written, buf);
 	if (err) {
 		ubi_err("error %d while writing %d bytes to PEB %d:%d, written "
 			"%zd bytes", err, len, pnum, offset, written);
@@ -525,11 +525,10 @@ static int nor_erase_prepare(struct ubi_device *ubi, int pnum)
 	 * the header comment in scan.c for more information).
 	 */
 	addr = (loff_t)pnum * ubi->peb_size;
-	err = ubi->mtd->write(ubi->mtd, addr, 4, &written, (void *)&data);
+	err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data);
 	if (!err) {
 		addr += ubi->vid_hdr_aloffset;
-		err = ubi->mtd->write(ubi->mtd, addr, 4, &written,
-				      (void *)&data);
+		err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data);
 		if (!err)
 			return 0;
 	}
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index eccd08d0e009..2eb032131960 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -233,9 +233,11 @@ u16 mtd_Write_Page_Main(u8 *write_data, u32 Block,
 
 
 	while (PageCount) {
-		ret = spectra_mtd->write(spectra_mtd,
-					 (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					 DeviceInfo.wPageDataSize, &retlen, write_data);
+		ret = mtd_write(spectra_mtd,
+				(Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+				DeviceInfo.wPageDataSize,
+				&retlen,
+				write_data);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index a24d3d21b63d..3ea2f8db9358 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -414,13 +414,12 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
 		if (breakme++ == 20) {
 			printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs);
 			breakme = 0;
-			c->mtd->write(c->mtd, ofs, towrite, &retlen,
-				      brokenbuf);
+			mtd_write(c->mtd, ofs, towrite, &retlen, brokenbuf);
 			ret = -EIO;
 		} else
 #endif
-			ret = c->mtd->write(c->mtd, ofs, towrite, &retlen,
-					    rewrite_buf);
+			ret = mtd_write(c->mtd, ofs, towrite, &retlen,
+					rewrite_buf);
 
 		if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) {
 			/* Argh. We tried. Really we did. */
@@ -620,13 +619,14 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
 	if (breakme++ == 20) {
 		printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs);
 		breakme = 0;
-		c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen,
-			      brokenbuf);
+		mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen,
+			  brokenbuf);
 		ret = -EIO;
 	} else
 #endif
 
-		ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf);
+		ret = mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize,
+				&retlen, c->wbuf);
 
 	if (ret) {
 		printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n", ret);
@@ -862,8 +862,8 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs,
 		v += wbuf_retlen;
 
 		if (vlen >= c->wbuf_pagesize) {
-			ret = c->mtd->write(c->mtd, outvec_to, PAGE_DIV(vlen),
-					    &wbuf_retlen, v);
+			ret = mtd_write(c->mtd, outvec_to, PAGE_DIV(vlen),
+					&wbuf_retlen, v);
 			if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen))
 				goto outfile;
 
diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c
index b9276b11bac6..b05710fd552a 100644
--- a/fs/jffs2/writev.c
+++ b/fs/jffs2/writev.c
@@ -26,7 +26,8 @@ static inline int mtd_fake_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	for (i=0; i<count; i++) {
 		if (!vecs[i].iov_len)
 			continue;
-		ret = mtd->write(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base);
+		ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen,
+				vecs[i].iov_base);
 		totlen += thislen;
 		if (ret || thislen != vecs[i].iov_len)
 			break;
@@ -61,7 +62,7 @@ int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len,
 			size_t *retlen, const u_char *buf)
 {
 	int ret;
-	ret = c->mtd->write(c->mtd, ofs, len, retlen, buf);
+	ret = mtd_write(c->mtd, ofs, len, retlen, buf);
 
 	if (jffs2_sum_active()) {
 		struct kvec vecs[1];
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 3ee64351685f..1842440d6564 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -49,7 +49,7 @@ static int loffs_mtd_write(struct super_block *sb, loff_t ofs, size_t len,
 	BUG_ON(len > PAGE_CACHE_SIZE);
 	page_start = ofs & PAGE_CACHE_MASK;
 	page_end = PAGE_CACHE_ALIGN(ofs + len) - 1;
-	ret = mtd->write(mtd, ofs, len, &retlen, buf);
+	ret = mtd_write(mtd, ofs, len, &retlen, buf);
 	if (ret || (retlen != len))
 		return -EIO;
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 56478eb4bbc0..1da7f4a6ef88 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -184,6 +184,8 @@ struct mtd_info {
 					    unsigned long flags);
 	int (*read) (struct mtd_info *mtd, loff_t from, size_t len,
 		     size_t *retlen, u_char *buf);
+	int (*write) (struct mtd_info *mtd, loff_t to, size_t len,
+		      size_t *retlen, const u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -191,7 +193,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 
-	int (*write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf);
 
 	/* In blackbox flight recorder like scenarios we want to make successful
 	   writes in interrupt context. panic_write() is only intended to be
@@ -308,6 +309,12 @@ static inline int mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
 	return mtd->read(mtd, from, len, retlen, buf);
 }
 
+static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
+			    size_t *retlen, const u_char *buf)
+{
+	return mtd->write(mtd, to, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 7ae79d7ff1769a3e9c47076b46e4eaa11204a2ee Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:03:17 +0200
Subject: mtd: introduce mtd_panic_write interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdoops.c   |  4 ++--
 drivers/mtd/mtdpart.c   |  4 ++--
 include/linux/mtd/mtd.h | 25 +++++++++++++++----------
 3 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 9c9d58617c98..7be2018ffbcc 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -222,8 +222,8 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
 	hdr[1] = MTDOOPS_KERNMSG_MAGIC;
 
 	if (panic)
-		ret = mtd->panic_write(mtd, cxt->nextpage * record_size,
-					record_size, &retlen, cxt->oops_buf);
+		ret = mtd_panic_write(mtd, cxt->nextpage * record_size,
+				      record_size, &retlen, cxt->oops_buf);
 	else
 		ret = mtd_write(mtd, cxt->nextpage * record_size,
 				record_size, &retlen, cxt->oops_buf);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 96574a036567..9ed58f7d7466 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -201,8 +201,8 @@ static int part_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 		len = 0;
 	else if (to + len > mtd->size)
 		len = mtd->size - to;
-	return part->master->panic_write(part->master, to + part->offset,
-				    len, retlen, buf);
+	return mtd_panic_write(part->master, to + part->offset, len, retlen,
+			       buf);
 }
 
 static int part_write_oob(struct mtd_info *mtd, loff_t to,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 1da7f4a6ef88..2fb83cd3d264 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -186,6 +186,8 @@ struct mtd_info {
 		     size_t *retlen, u_char *buf);
 	int (*write) (struct mtd_info *mtd, loff_t to, size_t len,
 		      size_t *retlen, const u_char *buf);
+	int (*panic_write) (struct mtd_info *mtd, loff_t to, size_t len,
+			    size_t *retlen, const u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -193,16 +195,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 
-
-	/* In blackbox flight recorder like scenarios we want to make successful
-	   writes in interrupt context. panic_write() is only intended to be
-	   called when its known the kernel is about to panic and we need the
-	   write to succeed. Since the kernel is not going to be running for much
-	   longer, this function can break locks and delay to ensure the write
-	   succeeds (but not sleep). */
-
-	int (*panic_write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf);
-
 	int (*read_oob) (struct mtd_info *mtd, loff_t from,
 			 struct mtd_oob_ops *ops);
 	int (*write_oob) (struct mtd_info *mtd, loff_t to,
@@ -315,6 +307,19 @@ static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 	return mtd->write(mtd, to, len, retlen, buf);
 }
 
+/*
+ * In blackbox flight recorder like scenarios we want to make successful writes
+ * in interrupt context. panic_write() is only intended to be called when its
+ * known the kernel is about to panic and we need the write to succeed. Since
+ * the kernel is not going to be running for much longer, this function can
+ * break locks and delay to ensure the write succeeds (but not sleep).
+ */
+static inline int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
+				  size_t *retlen, const u_char *buf)
+{
+	return mtd->panic_write(mtd, to, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From fd2819bbc92fc98bed5d612e4acbe16b6326f6bf Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:27:05 +0200
Subject: mtd: introduce mtd_read_oob interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/inftlcore.c           |  2 +-
 drivers/mtd/mtdchar.c             |  4 ++--
 drivers/mtd/mtdconcat.c           |  2 +-
 drivers/mtd/mtdpart.c             |  2 +-
 drivers/mtd/mtdswap.c             |  4 ++--
 drivers/mtd/nand/nand_bbt.c       |  6 +++---
 drivers/mtd/nftlcore.c            |  2 +-
 drivers/mtd/sm_ftl.c              |  2 +-
 drivers/mtd/ssfdc.c               |  2 +-
 drivers/mtd/tests/mtd_oobtest.c   | 14 +++++++-------
 drivers/mtd/tests/mtd_readtest.c  |  2 +-
 drivers/staging/spectra/lld_mtd.c | 12 ++++++------
 fs/jffs2/wbuf.c                   |  4 ++--
 include/linux/mtd/mtd.h           | 10 ++++++++--
 14 files changed, 37 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 0b038bed7b9c..07646e1273e2 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -158,7 +158,7 @@ int inftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->read_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
+	res = mtd_read_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 922da31d2c6b..e74f570a7b93 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -227,7 +227,7 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
 			ops.oobbuf = NULL;
 			ops.len = len;
 
-			ret = mtd->read_oob(mtd, *ppos, &ops);
+			ret = mtd_read_oob(mtd, *ppos, &ops);
 			retlen = ops.retlen;
 			break;
 		}
@@ -471,7 +471,7 @@ static int mtdchar_readoob(struct file *file, struct mtd_info *mtd,
 		return -ENOMEM;
 
 	start &= ~((uint64_t)mtd->writesize - 1);
-	ret = mtd->read_oob(mtd, start, &ops);
+	ret = mtd_read_oob(mtd, start, &ops);
 
 	if (put_user(ops.oobretlen, retp))
 		ret = -EFAULT;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 45215501c4c7..cf35642e5f49 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -273,7 +273,7 @@ concat_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops)
 		if (from + devops.len > subdev->size)
 			devops.len = subdev->size - from;
 
-		err = subdev->read_oob(subdev, from, &devops);
+		err = mtd_read_oob(subdev, from, &devops);
 		ops->retlen += devops.retlen;
 		ops->oobretlen += devops.oobretlen;
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 9ed58f7d7466..6fdc74ef19c1 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -138,7 +138,7 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from,
 			return -EINVAL;
 	}
 
-	res = part->master->read_oob(part->master, from + part->offset, ops);
+	res = mtd_read_oob(part->master, from + part->offset, ops);
 	if (unlikely(res)) {
 		if (mtd_is_bitflip(res))
 			mtd->ecc_stats.corrected++;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 6ff823e29c0c..0f0ab18d4405 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -312,7 +312,7 @@ static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
 			struct mtd_oob_ops *ops)
 {
-	int ret = d->mtd->read_oob(d->mtd, from, ops);
+	int ret = mtd_read_oob(d->mtd, from, ops);
 
 	if (mtd_is_bitflip(ret))
 		return ret;
@@ -955,7 +955,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 
 		pos = base;
 		for (i = 0; i < mtd_pages; i++) {
-			ret = mtd->read_oob(mtd, pos, &ops);
+			ret = mtd_read_oob(mtd, pos, &ops);
 			if (ret)
 				goto error;
 
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 1bcd6bc6798c..fcab50e80b90 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -317,7 +317,7 @@ static int scan_read_raw_oob(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
 		ops.len = min(len, (size_t)mtd->writesize);
 		ops.oobbuf = buf + ops.len;
 
-		res = mtd->read_oob(mtd, offs, &ops);
+		res = mtd_read_oob(mtd, offs, &ops);
 
 		if (res)
 			return res;
@@ -434,7 +434,7 @@ static int scan_block_fast(struct mtd_info *mtd, struct nand_bbt_descr *bd,
 		 * Read the full oob until read_oob is fixed to handle single
 		 * byte reads for 16 bit buswidth.
 		 */
-		ret = mtd->read_oob(mtd, offs, &ops);
+		ret = mtd_read_oob(mtd, offs, &ops);
 		/* Ignore ECC errors when checking for BBM */
 		if (ret && !mtd_is_bitflip_or_eccerr(ret))
 			return ret;
@@ -769,7 +769,7 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf,
 			/* Read oob data */
 			ops.ooblen = (len >> this->page_shift) * mtd->oobsize;
 			ops.oobbuf = &buf[len];
-			res = mtd->read_oob(mtd, to + mtd->writesize, &ops);
+			res = mtd_read_oob(mtd, to + mtd->writesize, &ops);
 			if (res < 0 || ops.oobretlen != ops.ooblen)
 				goto outerr;
 
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 1a9d9c1d3a74..7497f5efc26b 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -153,7 +153,7 @@ int nftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->read_oob(mtd, offs & ~mask, &ops);
+	res = mtd_read_oob(mtd, offs & ~mask, &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 2f1acb1ab5e8..748aa4416691 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -278,7 +278,7 @@ again:
 
 	/* Unfortunately, oob read will _always_ succeed,
 		despite card removal..... */
-	ret = mtd->read_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops);
+	ret = mtd_read_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops);
 
 	/* Test for unknown errors */
 	if (ret != 0 && !mtd_is_bitflip_or_eccerr(ret)) {
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index 293e22a5710f..0e6881338357 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -175,7 +175,7 @@ static int read_raw_oob(struct mtd_info *mtd, loff_t offs, uint8_t *buf)
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	ret = mtd->read_oob(mtd, offs, &ops);
+	ret = mtd_read_oob(mtd, offs, &ops);
 	if (ret < 0 || ops.oobretlen != OOB_SIZE)
 		return -1;
 
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index 7d52854c16dd..962d27a64e64 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -192,7 +192,7 @@ static int verify_eraseblock(int ebnum)
 		ops.ooboffs   = use_offset;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = readbuf;
-		err = mtd->read_oob(mtd, addr, &ops);
+		err = mtd_read_oob(mtd, addr, &ops);
 		if (err || ops.oobretlen != use_len) {
 			printk(PRINT_PREF "error: readoob failed at %#llx\n",
 			       (long long)addr);
@@ -219,7 +219,7 @@ static int verify_eraseblock(int ebnum)
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
 			ops.oobbuf    = readbuf;
-			err = mtd->read_oob(mtd, addr, &ops);
+			err = mtd_read_oob(mtd, addr, &ops);
 			if (err || ops.oobretlen != mtd->ecclayout->oobavail) {
 				printk(PRINT_PREF "error: readoob failed at "
 				       "%#llx\n", (long long)addr);
@@ -284,7 +284,7 @@ static int verify_eraseblock_in_one_go(int ebnum)
 	ops.ooboffs   = 0;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = readbuf;
-	err = mtd->read_oob(mtd, addr, &ops);
+	err = mtd_read_oob(mtd, addr, &ops);
 	if (err || ops.oobretlen != len) {
 		printk(PRINT_PREF "error: readoob failed at %#llx\n",
 		       (long long)addr);
@@ -544,7 +544,7 @@ static int __init mtd_oobtest_init(void)
 	ops.oobbuf    = readbuf;
 	printk(PRINT_PREF "attempting to start read past end of OOB\n");
 	printk(PRINT_PREF "an error is expected...\n");
-	err = mtd->read_oob(mtd, addr0, &ops);
+	err = mtd_read_oob(mtd, addr0, &ops);
 	if (err) {
 		printk(PRINT_PREF "error occurred as expected\n");
 		err = 0;
@@ -588,7 +588,7 @@ static int __init mtd_oobtest_init(void)
 		ops.oobbuf    = readbuf;
 		printk(PRINT_PREF "attempting to read past end of device\n");
 		printk(PRINT_PREF "an error is expected...\n");
-		err = mtd->read_oob(mtd, mtd->size - mtd->writesize, &ops);
+		err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
 			printk(PRINT_PREF "error occurred as expected\n");
 			err = 0;
@@ -632,7 +632,7 @@ static int __init mtd_oobtest_init(void)
 		ops.oobbuf    = readbuf;
 		printk(PRINT_PREF "attempting to read past end of device\n");
 		printk(PRINT_PREF "an error is expected...\n");
-		err = mtd->read_oob(mtd, mtd->size - mtd->writesize, &ops);
+		err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
 			printk(PRINT_PREF "error occurred as expected\n");
 			err = 0;
@@ -698,7 +698,7 @@ static int __init mtd_oobtest_init(void)
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = readbuf;
-		err = mtd->read_oob(mtd, addr, &ops);
+		err = mtd_read_oob(mtd, addr, &ops);
 		if (err)
 			goto out;
 		if (memcmp(readbuf, writebuf, mtd->ecclayout->oobavail * 2)) {
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
index 0c58d2976c76..5eaeada84284 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/mtd_readtest.c
@@ -74,7 +74,7 @@ static int read_eraseblock_by_page(int ebnum)
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
 			ops.oobbuf    = oobbuf;
-			ret = mtd->read_oob(mtd, addr, &ops);
+			ret = mtd_read_oob(mtd, addr, &ops);
 			if ((ret && !mtd_is_bitflip(ret)) ||
 					ops.oobretlen != mtd->oobsize) {
 				printk(PRINT_PREF "error: read oob failed at "
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index 2eb032131960..ed8e5f067087 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -351,9 +351,9 @@ u16 mtd_Read_Page_Main_Spare(u8 *read_data, u32 Block,
 		ops.ooblen = BTSIG_BYTES;
 		ops.ooboffs = 0;
 
-		ret = spectra_mtd->read_oob(spectra_mtd,
-					    (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					    &ops);
+		ret = mtd_read_oob(spectra_mtd,
+				   (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+				   &ops);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
@@ -484,9 +484,9 @@ u16 mtd_Read_Page_Spare(u8 *read_data, u32 Block,
 		ops.ooblen = BTSIG_BYTES;
 		ops.ooboffs = 0;
 
-		ret = spectra_mtd->read_oob(spectra_mtd,
-					    (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					    &ops);
+		ret = mtd_read_oob(spectra_mtd,
+				   (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+				   &ops);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 3ea2f8db9358..efc0cb370306 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1032,7 +1032,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
 	ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
 	ops.datbuf = NULL;
 
-	ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
+	ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
 	if (ret || ops.oobretlen != ops.ooblen) {
 		printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
 				" bytes, read %zd bytes, error %d\n",
@@ -1075,7 +1075,7 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
 	ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
 	ops.datbuf = NULL;
 
-	ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
+	ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
 	if (ret || ops.oobretlen != ops.ooblen) {
 		printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
 				" bytes, read %zd bytes, error %d\n",
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 2fb83cd3d264..0db8d87ce451 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -188,6 +188,8 @@ struct mtd_info {
 		      size_t *retlen, const u_char *buf);
 	int (*panic_write) (struct mtd_info *mtd, loff_t to, size_t len,
 			    size_t *retlen, const u_char *buf);
+	int (*read_oob) (struct mtd_info *mtd, loff_t from,
+			 struct mtd_oob_ops *ops);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -195,8 +197,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 
-	int (*read_oob) (struct mtd_info *mtd, loff_t from,
-			 struct mtd_oob_ops *ops);
 	int (*write_oob) (struct mtd_info *mtd, loff_t to,
 			 struct mtd_oob_ops *ops);
 
@@ -320,6 +320,12 @@ static inline int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 	return mtd->panic_write(mtd, to, len, retlen, buf);
 }
 
+static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from,
+			       struct mtd_oob_ops *ops)
+{
+	return mtd->read_oob(mtd, from, ops);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From a2cc5ba075f9bc837d0b4d4ec7328dcefc11859d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:29:55 +0200
Subject: mtd: introduce mtd_write_oob interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/inftlcore.c           |  4 ++--
 drivers/mtd/mtdchar.c             |  6 +++---
 drivers/mtd/mtdconcat.c           |  2 +-
 drivers/mtd/mtdpart.c             |  2 +-
 drivers/mtd/mtdswap.c             |  4 ++--
 drivers/mtd/nand/nand_bbt.c       |  2 +-
 drivers/mtd/nand/sm_common.c      |  2 +-
 drivers/mtd/nftlcore.c            |  4 ++--
 drivers/mtd/sm_ftl.c              |  2 +-
 drivers/mtd/tests/mtd_oobtest.c   | 10 +++++-----
 drivers/staging/spectra/lld_mtd.c |  6 +++---
 fs/jffs2/wbuf.c                   |  2 +-
 include/linux/mtd/mtd.h           | 12 ++++++++----
 13 files changed, 31 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 07646e1273e2..28646c95cfb8 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -178,7 +178,7 @@ int inftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
+	res = mtd_write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
@@ -199,7 +199,7 @@ static int inftl_write(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.datbuf = buf;
 	ops.len = len;
 
-	res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
+	res = mtd_write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
 	*retlen = ops.retlen;
 	return res;
 }
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index e74f570a7b93..234e3d27143c 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -325,7 +325,7 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
 			ops.ooboffs = 0;
 			ops.len = len;
 
-			ret = mtd->write_oob(mtd, *ppos, &ops);
+			ret = mtd_write_oob(mtd, *ppos, &ops);
 			retlen = ops.retlen;
 			break;
 		}
@@ -426,7 +426,7 @@ static int mtdchar_writeoob(struct file *file, struct mtd_info *mtd,
 		return PTR_ERR(ops.oobbuf);
 
 	start &= ~((uint64_t)mtd->writesize - 1);
-	ret = mtd->write_oob(mtd, start, &ops);
+	ret = mtd_write_oob(mtd, start, &ops);
 
 	if (ops.oobretlen > 0xFFFFFFFFU)
 		ret = -EOVERFLOW;
@@ -609,7 +609,7 @@ static int mtdchar_write_ioctl(struct mtd_info *mtd,
 		ops.oobbuf = NULL;
 	}
 
-	ret = mtd->write_oob(mtd, (loff_t)req.start, &ops);
+	ret = mtd_write_oob(mtd, (loff_t)req.start, &ops);
 
 	kfree(ops.datbuf);
 	kfree(ops.oobbuf);
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index cf35642e5f49..3d9c1ffdbbbf 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -333,7 +333,7 @@ concat_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops)
 		if (to + devops.len > subdev->size)
 			devops.len = subdev->size - to;
 
-		err = subdev->write_oob(subdev, to, &devops);
+		err = mtd_write_oob(subdev, to, &devops);
 		ops->retlen += devops.oobretlen;
 		if (err)
 			return err;
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 6fdc74ef19c1..8a46cd2bb78f 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -217,7 +217,7 @@ static int part_write_oob(struct mtd_info *mtd, loff_t to,
 		return -EINVAL;
 	if (ops->datbuf && to + ops->len > mtd->size)
 		return -EINVAL;
-	return part->master->write_oob(part->master, to + part->offset, ops);
+	return mtd_write_oob(part->master, to + part->offset, ops);
 }
 
 static int part_write_user_prot_reg(struct mtd_info *mtd, loff_t from,
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 0f0ab18d4405..85797390e3dd 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -403,7 +403,7 @@ static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
 		offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
 	}
 
-	ret = d->mtd->write_oob(d->mtd, offset , &ops);
+	ret = mtd_write_oob(d->mtd, offset, &ops);
 
 	if (ret) {
 		dev_warn(d->dev, "Write OOB failed for block at %08llx "
@@ -946,7 +946,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 			patt = mtdswap_test_patt(test + i);
 			memset(d->page_buf, patt, mtd->writesize);
 			memset(d->oob_buf, patt, mtd->ecclayout->oobavail);
-			ret = mtd->write_oob(mtd, pos, &ops);
+			ret = mtd_write_oob(mtd, pos, &ops);
 			if (ret)
 				goto error;
 
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index fcab50e80b90..20a112f591fe 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -350,7 +350,7 @@ static int scan_write_bbt(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = oob;
 	ops.len = len;
 
-	return mtd->write_oob(mtd, offs, &ops);
+	return mtd_write_oob(mtd, offs, &ops);
 }
 
 static u32 bbt_get_ver_offs(struct mtd_info *mtd, struct nand_bbt_descr *td)
diff --git a/drivers/mtd/nand/sm_common.c b/drivers/mtd/nand/sm_common.c
index 32ae5af7444f..774c3c266713 100644
--- a/drivers/mtd/nand/sm_common.c
+++ b/drivers/mtd/nand/sm_common.c
@@ -55,7 +55,7 @@ static int sm_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	ops.datbuf = NULL;
 
 
-	ret = mtd->write_oob(mtd, ofs, &ops);
+	ret = mtd_write_oob(mtd, ofs, &ops);
 	if (ret < 0 || ops.oobretlen != SM_OOB_SIZE) {
 		printk(KERN_NOTICE
 			"sm_common: can't mark sector at %i as bad\n",
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 7497f5efc26b..8847e60ad167 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -174,7 +174,7 @@ int nftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->write_oob(mtd, offs & ~mask, &ops);
+	res = mtd_write_oob(mtd, offs & ~mask, &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
@@ -198,7 +198,7 @@ static int nftl_write(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.datbuf = buf;
 	ops.len = len;
 
-	res = mtd->write_oob(mtd, offs & ~mask, &ops);
+	res = mtd_write_oob(mtd, offs & ~mask, &ops);
 	*retlen = ops.retlen;
 	return res;
 }
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 748aa4416691..4ec2af7fb845 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -343,7 +343,7 @@ static int sm_write_sector(struct sm_ftl *ftl,
 	ops.ooblen = SM_OOB_SIZE;
 	ops.oobbuf = (void *)oob;
 
-	ret = mtd->write_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops);
+	ret = mtd_write_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops);
 
 	/* Now we assume that hardware will catch write bitflip errors */
 	/* If you are paranoid, use CONFIG_MTD_NAND_VERIFY_WRITE */
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index 962d27a64e64..81113885e086 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -139,7 +139,7 @@ static int write_eraseblock(int ebnum)
 		ops.ooboffs   = use_offset;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = writebuf;
-		err = mtd->write_oob(mtd, addr, &ops);
+		err = mtd_write_oob(mtd, addr, &ops);
 		if (err || ops.oobretlen != use_len) {
 			printk(PRINT_PREF "error: writeoob failed at %#llx\n",
 			       (long long)addr);
@@ -524,7 +524,7 @@ static int __init mtd_oobtest_init(void)
 	ops.oobbuf    = writebuf;
 	printk(PRINT_PREF "attempting to start write past end of OOB\n");
 	printk(PRINT_PREF "an error is expected...\n");
-	err = mtd->write_oob(mtd, addr0, &ops);
+	err = mtd_write_oob(mtd, addr0, &ops);
 	if (err) {
 		printk(PRINT_PREF "error occurred as expected\n");
 		err = 0;
@@ -568,7 +568,7 @@ static int __init mtd_oobtest_init(void)
 		ops.oobbuf    = writebuf;
 		printk(PRINT_PREF "attempting to write past end of device\n");
 		printk(PRINT_PREF "an error is expected...\n");
-		err = mtd->write_oob(mtd, mtd->size - mtd->writesize, &ops);
+		err = mtd_write_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
 			printk(PRINT_PREF "error occurred as expected\n");
 			err = 0;
@@ -612,7 +612,7 @@ static int __init mtd_oobtest_init(void)
 		ops.oobbuf    = writebuf;
 		printk(PRINT_PREF "attempting to write past end of device\n");
 		printk(PRINT_PREF "an error is expected...\n");
-		err = mtd->write_oob(mtd, mtd->size - mtd->writesize, &ops);
+		err = mtd_write_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
 			printk(PRINT_PREF "error occurred as expected\n");
 			err = 0;
@@ -670,7 +670,7 @@ static int __init mtd_oobtest_init(void)
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
 			ops.oobbuf    = writebuf;
-			err = mtd->write_oob(mtd, addr, &ops);
+			err = mtd_write_oob(mtd, addr, &ops);
 			if (err)
 				goto out;
 			if (i % 256 == 0)
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index ed8e5f067087..4aa48ddf979c 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -411,9 +411,9 @@ u16 mtd_Write_Page_Main_Spare(u8 *write_data, u32 Block,
 		ops.ooblen = BTSIG_BYTES;
 		ops.ooboffs = 0;
 
-		ret = spectra_mtd->write_oob(spectra_mtd,
-					     (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					     &ops);
+		ret = mtd_write_oob(spectra_mtd,
+				    (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+				    &ops);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index efc0cb370306..eae5be483682 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1101,7 +1101,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
 	ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
 	ops.datbuf = NULL;
 
-	ret = c->mtd->write_oob(c->mtd, jeb->offset, &ops);
+	ret = mtd_write_oob(c->mtd, jeb->offset, &ops);
 	if (ret || ops.oobretlen != ops.ooblen) {
 		printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd"
 				" bytes, read %zd bytes, error %d\n",
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 0db8d87ce451..abbc96ad3b2c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -190,16 +190,14 @@ struct mtd_info {
 			    size_t *retlen, const u_char *buf);
 	int (*read_oob) (struct mtd_info *mtd, loff_t from,
 			 struct mtd_oob_ops *ops);
+	int (*write_oob) (struct mtd_info *mtd, loff_t to,
+			  struct mtd_oob_ops *ops);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-
-	int (*write_oob) (struct mtd_info *mtd, loff_t to,
-			 struct mtd_oob_ops *ops);
-
 	/*
 	 * Methods to access the protection register area, present in some
 	 * flash devices. The user data is one time programmable but the
@@ -326,6 +324,12 @@ static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from,
 	return mtd->read_oob(mtd, from, ops);
 }
 
+static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
+				struct mtd_oob_ops *ops)
+{
+	return mtd->write_oob(mtd, to, ops);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From a750b5ce5e1174ea68f66bf79962c479f7f23998 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:33:28 +0200
Subject: mtd: introduce mtd_get_fact_prot_info interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  2 +-
 drivers/mtd/mtdpart.c   |  2 +-
 include/linux/mtd/mtd.h | 19 +++++++++++++------
 3 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 234e3d27143c..4b1772feeafc 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -925,7 +925,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		switch (mfi->mode) {
 		case MTD_FILE_MODE_OTP_FACTORY:
 			if (mtd->get_fact_prot_info)
-				ret = mtd->get_fact_prot_info(mtd, buf, 4096);
+				ret = mtd_get_fact_prot_info(mtd, buf, 4096);
 			break;
 		case MTD_FILE_MODE_OTP_USER:
 			if (mtd->get_user_prot_info)
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 8a46cd2bb78f..6bed8bb3b15d 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -175,7 +175,7 @@ static int part_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf,
 		size_t len)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->get_fact_prot_info(part->master, buf, len);
+	return mtd_get_fact_prot_info(part->master, buf, len);
 }
 
 static int part_write(struct mtd_info *mtd, loff_t to, size_t len,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index abbc96ad3b2c..9a7a7f2d2296 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -192,18 +192,14 @@ struct mtd_info {
 			 struct mtd_oob_ops *ops);
 	int (*write_oob) (struct mtd_info *mtd, loff_t to,
 			  struct mtd_oob_ops *ops);
+	int (*get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf,
+				   size_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/*
-	 * Methods to access the protection register area, present in some
-	 * flash devices. The user data is one time programmable but the
-	 * factory data is read only.
-	 */
-	int (*get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len);
 	int (*read_fact_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len);
 	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
@@ -330,6 +326,17 @@ static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
 	return mtd->write_oob(mtd, to, ops);
 }
 
+/*
+ * Method to access the protection register area, present in some flash
+ * devices. The user data is one time programmable but the factory data is read
+ * only.
+ */
+static inline int mtd_get_fact_prot_info(struct mtd_info *mtd,
+					 struct otp_info *buf, size_t len)
+{
+	return mtd->get_fact_prot_info(mtd, buf, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From d264f72ae56245358025109d9d066d159589802d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:40:06 +0200
Subject: mtd: introduce mtd_read_fact_prot_reg interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  3 ++-
 drivers/mtd/mtdpart.c   |  3 +--
 include/linux/mtd/mtd.h | 10 +++++++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 4b1772feeafc..6afb05469bbd 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -213,7 +213,8 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
 
 		switch (mfi->mode) {
 		case MTD_FILE_MODE_OTP_FACTORY:
-			ret = mtd->read_fact_prot_reg(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_read_fact_prot_reg(mtd, *ppos, len,
+						     &retlen, kbuf);
 			break;
 		case MTD_FILE_MODE_OTP_USER:
 			ret = mtd->read_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 6bed8bb3b15d..4f2c9137cd49 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -167,8 +167,7 @@ static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->read_fact_prot_reg(part->master, from,
-					len, retlen, buf);
+	return mtd_read_fact_prot_reg(part->master, from, len, retlen, buf);
 }
 
 static int part_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 9a7a7f2d2296..d77a7f83270f 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -194,13 +194,14 @@ struct mtd_info {
 			  struct mtd_oob_ops *ops);
 	int (*get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf,
 				   size_t len);
+	int (*read_fact_prot_reg) (struct mtd_info *mtd, loff_t from,
+				   size_t len, size_t *retlen, u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*read_fact_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len);
 	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
@@ -337,6 +338,13 @@ static inline int mtd_get_fact_prot_info(struct mtd_info *mtd,
 	return mtd->get_fact_prot_info(mtd, buf, len);
 }
 
+static inline int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
+					 size_t len, size_t *retlen,
+					 u_char *buf)
+{
+	return mtd->read_fact_prot_reg(mtd, from, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 855e5d8cfebc21f45c9446a88b61e29d94c03781 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:45:11 +0200
Subject: mtd: introduce mtd_get_user_prot_info interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  2 +-
 drivers/mtd/mtdpart.c   |  2 +-
 include/linux/mtd/mtd.h | 10 +++++++++-
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 6afb05469bbd..002a8b5428cc 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -930,7 +930,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			break;
 		case MTD_FILE_MODE_OTP_USER:
 			if (mtd->get_user_prot_info)
-				ret = mtd->get_user_prot_info(mtd, buf, 4096);
+				ret = mtd_get_user_prot_info(mtd, buf, 4096);
 			break;
 		default:
 			break;
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 4f2c9137cd49..bf1ab56afb8e 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -160,7 +160,7 @@ static int part_get_user_prot_info(struct mtd_info *mtd,
 		struct otp_info *buf, size_t len)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->get_user_prot_info(part->master, buf, len);
+	return mtd_get_user_prot_info(part->master, buf, len);
 }
 
 static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index d77a7f83270f..ff0a3a18f397 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -196,13 +196,14 @@ struct mtd_info {
 				   size_t len);
 	int (*read_fact_prot_reg) (struct mtd_info *mtd, loff_t from,
 				   size_t len, size_t *retlen, u_char *buf);
+	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf,
+				   size_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len);
 	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len);
@@ -345,6 +346,13 @@ static inline int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 	return mtd->read_fact_prot_reg(mtd, from, len, retlen, buf);
 }
 
+static inline int mtd_get_user_prot_info(struct mtd_info *mtd,
+					 struct otp_info *buf,
+					 size_t len)
+{
+	return mtd->get_user_prot_info(mtd, buf, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 4ea1cabb926f03a8dbd6e3f064538d9a290ee9fd Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:47:59 +0200
Subject: mtd: introduce mtd_read_user_prot_reg interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  3 ++-
 drivers/mtd/mtdpart.c   |  3 +--
 include/linux/mtd/mtd.h | 10 +++++++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 002a8b5428cc..6aa3fb4a0292 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -217,7 +217,8 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
 						     &retlen, kbuf);
 			break;
 		case MTD_FILE_MODE_OTP_USER:
-			ret = mtd->read_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_read_user_prot_reg(mtd, *ppos, len,
+						     &retlen, kbuf);
 			break;
 		case MTD_FILE_MODE_RAW:
 		{
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index bf1ab56afb8e..f018373ef3b4 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -152,8 +152,7 @@ static int part_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->read_user_prot_reg(part->master, from,
-					len, retlen, buf);
+	return mtd_read_user_prot_reg(part->master, from, len, retlen, buf);
 }
 
 static int part_get_user_prot_info(struct mtd_info *mtd,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index ff0a3a18f397..855fb7fab697 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -198,13 +198,14 @@ struct mtd_info {
 				   size_t len, size_t *retlen, u_char *buf);
 	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf,
 				   size_t len);
+	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from,
+				   size_t len, size_t *retlen, u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len);
 
@@ -353,6 +354,13 @@ static inline int mtd_get_user_prot_info(struct mtd_info *mtd,
 	return mtd->get_user_prot_info(mtd, buf, len);
 }
 
+static inline int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
+					 size_t len, size_t *retlen,
+					 u_char *buf)
+{
+	return mtd->read_user_prot_reg(mtd, from, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 482b43adbb7b124316ec72c161b0d1655e759368 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:50:04 +0200
Subject: mtd: introduce mtd_write_user_prot_reg interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  3 ++-
 drivers/mtd/mtdpart.c   |  3 +--
 include/linux/mtd/mtd.h | 10 +++++++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 6aa3fb4a0292..d8881707ca60 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -314,7 +314,8 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
 				ret = -EOPNOTSUPP;
 				break;
 			}
-			ret = mtd->write_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_write_user_prot_reg(mtd, *ppos, len,
+						      &retlen, kbuf);
 			break;
 
 		case MTD_FILE_MODE_RAW:
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index f018373ef3b4..1e7b8d1693aa 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -222,8 +222,7 @@ static int part_write_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->write_user_prot_reg(part->master, from,
-					len, retlen, buf);
+	return mtd_write_user_prot_reg(part->master, from, len, retlen, buf);
 }
 
 static int part_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 855fb7fab697..554960793e37 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -200,13 +200,14 @@ struct mtd_info {
 				   size_t len);
 	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from,
 				   size_t len, size_t *retlen, u_char *buf);
+	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t to, size_t len,
+				    size_t *retlen, u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len);
 
 	/* kvec-based read/write methods.
@@ -361,6 +362,13 @@ static inline int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	return mtd->read_user_prot_reg(mtd, from, len, retlen, buf);
 }
 
+static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
+					  size_t len, size_t *retlen,
+					  u_char *buf)
+{
+	return mtd->write_user_prot_reg(mtd, to, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 4403dbfb4541d34e5db33db709094d57d09f7467 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:55:49 +0200
Subject: mtd: introduce mtd_lock_user_prot_reg interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  2 +-
 drivers/mtd/mtdpart.c   |  2 +-
 include/linux/mtd/mtd.h | 10 ++++++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index d8881707ca60..86308acb40e0 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -960,7 +960,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			return -EFAULT;
 		if (!mtd->lock_user_prot_reg)
 			return -EOPNOTSUPP;
-		ret = mtd->lock_user_prot_reg(mtd, oinfo.start, oinfo.length);
+		ret = mtd_lock_user_prot_reg(mtd, oinfo.start, oinfo.length);
 		break;
 	}
 #endif
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 1e7b8d1693aa..0bb16d6ed08a 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -229,7 +229,7 @@ static int part_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->lock_user_prot_reg(part->master, from, len);
+	return mtd_lock_user_prot_reg(part->master, from, len);
 }
 
 static int part_writev(struct mtd_info *mtd, const struct kvec *vecs,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 554960793e37..b58e5e8746ec 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -202,14 +202,14 @@ struct mtd_info {
 				   size_t len, size_t *retlen, u_char *buf);
 	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t to, size_t len,
 				    size_t *retlen, u_char *buf);
+	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from,
+				   size_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len);
-
 	/* kvec-based read/write methods.
 	   NB: The 'count' parameter is the number of _vectors_, each of
 	   which contains an (ofs, len) tuple.
@@ -369,6 +369,12 @@ static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
 	return mtd->write_user_prot_reg(mtd, to, len, retlen, buf);
 }
 
+static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
+					 size_t len)
+{
+	return mtd->lock_user_prot_reg(mtd, from, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From b0a31f7b2a668f00a8d0546dfeed65fac871b2da Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:59:12 +0200
Subject: mtd: introduce mtd_writev interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdconcat.c |  5 +++--
 drivers/mtd/mtdpart.c   |  4 ++--
 fs/jffs2/writev.c       |  2 +-
 include/linux/mtd/mtd.h | 18 ++++++++++++------
 4 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 3d9c1ffdbbbf..6fdae191e1ba 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -227,8 +227,9 @@ concat_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		if (!(subdev->flags & MTD_WRITEABLE))
 			err = -EROFS;
 		else
-			err = subdev->writev(subdev, &vecs_copy[entry_low],
-				entry_high - entry_low + 1, to, &retsize);
+			err = mtd_writev(subdev, &vecs_copy[entry_low],
+					 entry_high - entry_low + 1, to,
+					 &retsize);
 
 		vecs_copy[entry_high].iov_len = old_iov_len - size;
 		vecs_copy[entry_high].iov_base += size;
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 0bb16d6ed08a..c0bfa88c82f3 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -238,8 +238,8 @@ static int part_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	struct mtd_part *part = PART(mtd);
 	if (!(mtd->flags & MTD_WRITEABLE))
 		return -EROFS;
-	return part->master->writev(part->master, vecs, count,
-					to + part->offset, retlen);
+	return mtd_writev(part->master, vecs, count, to + part->offset,
+			  retlen);
 }
 
 static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c
index b05710fd552a..d0ef068709ad 100644
--- a/fs/jffs2/writev.c
+++ b/fs/jffs2/writev.c
@@ -52,7 +52,7 @@ int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs,
 	}
 
 	if (c->mtd->writev)
-		return c->mtd->writev(c->mtd, vecs, count, to, retlen);
+		return mtd_writev(c->mtd, vecs, count, to, retlen);
 	else {
 		return mtd_fake_writev(c->mtd, vecs, count, to, retlen);
 	}
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index b58e5e8746ec..4129cb5c3de4 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -204,18 +204,14 @@ struct mtd_info {
 				    size_t *retlen, u_char *buf);
 	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from,
 				   size_t len);
+	int (*writev) (struct mtd_info *mtd, const struct kvec *vecs,
+			unsigned long count, loff_t to, size_t *retlen);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* kvec-based read/write methods.
-	   NB: The 'count' parameter is the number of _vectors_, each of
-	   which contains an (ofs, len) tuple.
-	*/
-	int (*writev) (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen);
-
 	/* Sync */
 	void (*sync) (struct mtd_info *mtd);
 
@@ -375,6 +371,16 @@ static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	return mtd->lock_user_prot_reg(mtd, from, len);
 }
 
+/*
+ * kvec-based read/write method. NB: The 'count' parameter is the number of
+ * _vectors_, each of which contains an (ofs, len) tuple.
+ */
+static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
+			     unsigned long count, loff_t to, size_t *retlen)
+{
+	return mtd->writev(mtd, vecs, count, to, retlen);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 85f2f2a809d658c15b574df02ede92090f45a1f2 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:03:12 +0200
Subject: mtd: introduce mtd_sync interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/ftl.c       | 2 +-
 drivers/mtd/mtdblock.c  | 4 ++--
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/mtdconcat.c | 2 +-
 drivers/mtd/mtdpart.c   | 2 +-
 drivers/mtd/mtdswap.c   | 2 +-
 drivers/mtd/rfd_ftl.c   | 2 +-
 drivers/mtd/ubi/kapi.c  | 2 +-
 fs/jffs2/super.c        | 2 +-
 fs/logfs/dev_mtd.c      | 2 +-
 include/linux/mtd/mtd.h | 9 ++++++---
 11 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index d591b1d0a6c1..c9c90299c9e2 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -651,7 +651,7 @@ static int reclaim_block(partition_t *part)
 		pr_debug("ftl_cs: waiting for transfer "
 		      "unit to be prepared...\n");
 		if (part->mbd.mtd->sync)
-			part->mbd.mtd->sync(part->mbd.mtd);
+			mtd_sync(part->mbd.mtd);
 	    } else {
 		static int ne = 0;
 		if (++ne < 5)
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index ac7f1f1faa2d..496e1a6e8029 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -323,7 +323,7 @@ static int mtdblock_release(struct mtd_blktrans_dev *mbd)
 	if (!--mtdblk->count) {
 		/* It was the last usage. Free the cache */
 		if (mbd->mtd->sync)
-			mbd->mtd->sync(mbd->mtd);
+			mtd_sync(mbd->mtd);
 		vfree(mtdblk->cache_data);
 	}
 
@@ -343,7 +343,7 @@ static int mtdblock_flush(struct mtd_blktrans_dev *dev)
 	mutex_unlock(&mtdblk->cache_mutex);
 
 	if (dev->mtd->sync)
-		dev->mtd->sync(dev->mtd);
+		mtd_sync(dev->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 86308acb40e0..b5722ecf19d3 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -155,7 +155,7 @@ static int mtdchar_close(struct inode *inode, struct file *file)
 
 	/* Only sync if opened RW */
 	if ((file->f_mode & FMODE_WRITE) && mtd->sync)
-		mtd->sync(mtd);
+		mtd_sync(mtd);
 
 	iput(mfi->ino);
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 6fdae191e1ba..cc2336edfe28 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -620,7 +620,7 @@ static void concat_sync(struct mtd_info *mtd)
 
 	for (i = 0; i < concat->num_subdev; i++) {
 		struct mtd_info *subdev = concat->subdev[i];
-		subdev->sync(subdev);
+		mtd_sync(subdev);
 	}
 }
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index c0bfa88c82f3..2b545052795e 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -301,7 +301,7 @@ static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 static void part_sync(struct mtd_info *mtd)
 {
 	struct mtd_part *part = PART(mtd);
-	part->master->sync(part->master);
+	mtd_sync(part->master);
 }
 
 static int part_suspend(struct mtd_info *mtd)
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 85797390e3dd..cb794e761012 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -1048,7 +1048,7 @@ static int mtdswap_flush(struct mtd_blktrans_dev *dev)
 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
 
 	if (d->mtd->sync)
-		d->mtd->sync(d->mtd);
+		mtd_sync(d->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index c594bb7abfa3..5426d42cdea7 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -449,7 +449,7 @@ static int reclaim_block(struct partition *part, u_long *old_sector)
 
 	/* we have a race if sync doesn't exist */
 	if (part->mbd.mtd->sync)
-		part->mbd.mtd->sync(part->mbd.mtd);
+		mtd_sync(part->mbd.mtd);
 
 	score = 0x7fffffff; /* MAX_INT */
 	best_block = -1;
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 1a35fc5e3b40..9f265cc1a0d3 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -715,7 +715,7 @@ int ubi_sync(int ubi_num)
 		return -ENODEV;
 
 	if (ubi->mtd->sync)
-		ubi->mtd->sync(ubi->mtd);
+		mtd_sync(ubi->mtd);
 
 	ubi_put_device(ubi);
 	return 0;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index e7e974454115..e78bf3cd1b73 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -337,7 +337,7 @@ static void jffs2_put_super (struct super_block *sb)
 	kfree(c->inocache_list);
 	jffs2_clear_xattr_subsystem(c);
 	if (c->mtd->sync)
-		c->mtd->sync(c->mtd);
+		mtd_sync(c->mtd);
 
 	D1(printk(KERN_DEBUG "jffs2_put_super returning\n"));
 }
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 1842440d6564..0ca7a07db6c1 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -120,7 +120,7 @@ static void logfs_mtd_sync(struct super_block *sb)
 	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
 
 	if (mtd->sync)
-		mtd->sync(mtd);
+		mtd_sync(mtd);
 }
 
 static int logfs_mtd_readpage(void *_sb, struct page *page)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 4129cb5c3de4..47ea19c1e523 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -206,15 +206,13 @@ struct mtd_info {
 				   size_t len);
 	int (*writev) (struct mtd_info *mtd, const struct kvec *vecs,
 			unsigned long count, loff_t to, size_t *retlen);
+	void (*sync) (struct mtd_info *mtd);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* Sync */
-	void (*sync) (struct mtd_info *mtd);
-
 	/* Chip-supported device locking */
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
@@ -381,6 +379,11 @@ static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	return mtd->writev(mtd, vecs, count, to, retlen);
 }
 
+static inline void mtd_sync(struct mtd_info *mtd)
+{
+	mtd->sync(mtd);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 7799f9ac8d8ff2db14736950275249df442baeac Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:15:39 +0200
Subject: mtd: introduce mtd_lock interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/scb2_flash.c | 2 +-
 drivers/mtd/mtdchar.c         | 2 +-
 drivers/mtd/mtdconcat.c       | 2 +-
 drivers/mtd/mtdpart.c         | 2 +-
 include/linux/mtd/mtd.h       | 9 +++++++--
 5 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/scb2_flash.c b/drivers/mtd/maps/scb2_flash.c
index d88c8426bb0f..01af34778de3 100644
--- a/drivers/mtd/maps/scb2_flash.c
+++ b/drivers/mtd/maps/scb2_flash.c
@@ -205,7 +205,7 @@ scb2_flash_remove(struct pci_dev *dev)
 
 	/* disable flash writes */
 	if (scb2_mtd->lock)
-		scb2_mtd->lock(scb2_mtd, 0, scb2_mtd->size);
+		mtd_lock(scb2_mtd, 0, scb2_mtd->size);
 
 	mtd_device_unregister(scb2_mtd);
 	map_destroy(scb2_mtd);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index b5722ecf19d3..870f2cb415cb 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -824,7 +824,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->lock)
 			ret = -EOPNOTSUPP;
 		else
-			ret = mtd->lock(mtd, einfo.start, einfo.length);
+			ret = mtd_lock(mtd, einfo.start, einfo.length);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index cc2336edfe28..97d6360986c8 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -556,7 +556,7 @@ static int concat_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 			size = len;
 
 		if (subdev->lock) {
-			err = subdev->lock(subdev, ofs, size);
+			err = mtd_lock(subdev, ofs, size);
 			if (err)
 				break;
 		} else
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 2b545052795e..a5e7a2103dcf 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -279,7 +279,7 @@ static int part_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	struct mtd_part *part = PART(mtd);
 	if ((len + ofs) > mtd->size)
 		return -EINVAL;
-	return part->master->lock(part->master, ofs + part->offset, len);
+	return mtd_lock(part->master, ofs + part->offset, len);
 }
 
 static int part_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 47ea19c1e523..167bac2e380e 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -207,14 +207,13 @@ struct mtd_info {
 	int (*writev) (struct mtd_info *mtd, const struct kvec *vecs,
 			unsigned long count, loff_t to, size_t *retlen);
 	void (*sync) (struct mtd_info *mtd);
+	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* Chip-supported device locking */
-	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
@@ -384,6 +383,12 @@ static inline void mtd_sync(struct mtd_info *mtd)
 	mtd->sync(mtd);
 }
 
+/* Chip-supported device locking */
+static inline int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	return mtd->lock(mtd, ofs, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From b66005cd3e6f104e0a1b6492110c337269b53ec3 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:18:22 +0200
Subject: mtd: introduce mtd_unlock interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/mtdconcat.c | 2 +-
 drivers/mtd/mtdcore.c   | 2 +-
 drivers/mtd/mtdpart.c   | 2 +-
 include/linux/mtd/mtd.h | 7 ++++++-
 5 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 870f2cb415cb..fe09cd2a4540 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -838,7 +838,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->unlock)
 			ret = -EOPNOTSUPP;
 		else
-			ret = mtd->unlock(mtd, einfo.start, einfo.length);
+			ret = mtd_unlock(mtd, einfo.start, einfo.length);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 97d6360986c8..272ebc01f95b 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -596,7 +596,7 @@ static int concat_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 			size = len;
 
 		if (subdev->unlock) {
-			err = subdev->unlock(subdev, ofs, size);
+			err = mtd_unlock(subdev, ofs, size);
 			if (err)
 				break;
 		} else
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index e36191ab47c3..4a2155748fa3 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -340,7 +340,7 @@ int add_mtd_device(struct mtd_info *mtd)
 	/* Some chips always power up locked. Unlock them now */
 	if ((mtd->flags & MTD_WRITEABLE)
 	    && (mtd->flags & MTD_POWERUP_LOCK) && mtd->unlock) {
-		if (mtd->unlock(mtd, 0, mtd->size))
+		if (mtd_unlock(mtd, 0, mtd->size))
 			printk(KERN_WARNING
 			       "%s: unlock failed, writes may not work\n",
 			       mtd->name);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index a5e7a2103dcf..d65af3752331 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -287,7 +287,7 @@ static int part_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	struct mtd_part *part = PART(mtd);
 	if ((len + ofs) > mtd->size)
 		return -EINVAL;
-	return part->master->unlock(part->master, ofs + part->offset, len);
+	return mtd_unlock(part->master, ofs + part->offset, len);
 }
 
 static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 167bac2e380e..f30c35886f7c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -208,13 +208,13 @@ struct mtd_info {
 			unsigned long count, loff_t to, size_t *retlen);
 	void (*sync) (struct mtd_info *mtd);
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
 	/* Power Management functions */
@@ -389,6 +389,11 @@ static inline int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return mtd->lock(mtd, ofs, len);
 }
 
+static inline int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	return mtd->unlock(mtd, ofs, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From e95e9786455c11c8eac30d76e5289d4e40187f9a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:21:16 +0200
Subject: mtd: introduce mtd_is_locked interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/mtdpart.c   | 2 +-
 include/linux/mtd/mtd.h | 8 ++++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index fe09cd2a4540..6d598b23cf3a 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -852,7 +852,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->is_locked)
 			ret = -EOPNOTSUPP;
 		else
-			ret = mtd->is_locked(mtd, einfo.start, einfo.length);
+			ret = mtd_is_locked(mtd, einfo.start, einfo.length);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index d65af3752331..ad487fcd423f 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -295,7 +295,7 @@ static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	struct mtd_part *part = PART(mtd);
 	if ((len + ofs) > mtd->size)
 		return -EINVAL;
-	return part->master->is_locked(part->master, ofs + part->offset, len);
+	return mtd_is_locked(part->master, ofs + part->offset, len);
 }
 
 static void part_sync(struct mtd_info *mtd)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index f30c35886f7c..8b9901986c86 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -209,14 +209,13 @@ struct mtd_info {
 	void (*sync) (struct mtd_info *mtd);
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
-
 	/* Power Management functions */
 	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
@@ -394,6 +393,11 @@ static inline int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return mtd->unlock(mtd, ofs, len);
 }
 
+static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	return mtd->is_locked(mtd, ofs, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 3fe4bae88460869a8e553397cd9057a4ee7ca341 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:25:16 +0200
Subject: mtd: introduce mtd_suspend interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap.c        | 2 +-
 drivers/mtd/maps/pxa2xx-flash.c   | 2 +-
 drivers/mtd/maps/rbtx4939-flash.c | 2 +-
 drivers/mtd/maps/sa1100-flash.c   | 2 +-
 drivers/mtd/mtdconcat.c           | 2 +-
 drivers/mtd/mtdcore.c             | 2 +-
 drivers/mtd/mtdpart.c             | 2 +-
 drivers/mtd/nand/nomadik_nand.c   | 2 +-
 drivers/mtd/nand/pxa3xx_nand.c    | 2 +-
 include/linux/mtd/mtd.h           | 7 ++++++-
 10 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 1f749d58ae6f..b7f0cd14ae2b 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -191,7 +191,7 @@ static void physmap_flash_shutdown(struct platform_device *dev)
 
 	for (i = 0; i < MAX_RESOURCES && info->mtd[i]; i++)
 		if (info->mtd[i]->suspend && info->mtd[i]->resume)
-			if (info->mtd[i]->suspend(info->mtd[i]) == 0)
+			if (mtd_suspend(info->mtd[i]) == 0)
 				info->mtd[i]->resume(info->mtd[i]);
 }
 #else
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index 274e39914332..9cb427320c04 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -125,7 +125,7 @@ static void pxa2xx_flash_shutdown(struct platform_device *dev)
 {
 	struct pxa2xx_flash_info *info = platform_get_drvdata(dev);
 
-	if (info && info->mtd->suspend(info->mtd) == 0)
+	if (info && mtd_suspend(info->mtd) == 0)
 		info->mtd->resume(info->mtd);
 }
 #else
diff --git a/drivers/mtd/maps/rbtx4939-flash.c b/drivers/mtd/maps/rbtx4939-flash.c
index bb7d2042affa..5856aa2d99f7 100644
--- a/drivers/mtd/maps/rbtx4939-flash.c
+++ b/drivers/mtd/maps/rbtx4939-flash.c
@@ -120,7 +120,7 @@ static void rbtx4939_flash_shutdown(struct platform_device *dev)
 	struct rbtx4939_flash_info *info = platform_get_drvdata(dev);
 
 	if (info->mtd->suspend && info->mtd->resume)
-		if (info->mtd->suspend(info->mtd) == 0)
+		if (mtd_suspend(info->mtd) == 0)
 			info->mtd->resume(info->mtd);
 }
 #else
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index ac3a290748cd..20944f054867 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -377,7 +377,7 @@ static int __exit sa1100_mtd_remove(struct platform_device *pdev)
 static void sa1100_mtd_shutdown(struct platform_device *dev)
 {
 	struct sa_info *info = platform_get_drvdata(dev);
-	if (info && info->mtd->suspend(info->mtd) == 0)
+	if (info && mtd_suspend(info->mtd) == 0)
 		info->mtd->resume(info->mtd);
 }
 #else
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 272ebc01f95b..36bb1c99925b 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -631,7 +631,7 @@ static int concat_suspend(struct mtd_info *mtd)
 
 	for (i = 0; i < concat->num_subdev; i++) {
 		struct mtd_info *subdev = concat->subdev[i];
-		if ((rc = subdev->suspend(subdev)) < 0)
+		if ((rc = mtd_suspend(subdev)) < 0)
 			return rc;
 	}
 	return rc;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 4a2155748fa3..0db455d31148 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -119,7 +119,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 	struct mtd_info *mtd = dev_to_mtd(dev);
 
 	if (mtd && mtd->suspend)
-		return mtd->suspend(mtd);
+		return mtd_suspend(mtd);
 	else
 		return 0;
 }
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index ad487fcd423f..c5e556a92641 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -307,7 +307,7 @@ static void part_sync(struct mtd_info *mtd)
 static int part_suspend(struct mtd_info *mtd)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->suspend(part->master);
+	return mtd_suspend(part->master);
 }
 
 static void part_resume(struct mtd_info *mtd)
diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c
index 673dc6c68f9a..9461babdb308 100644
--- a/drivers/mtd/nand/nomadik_nand.c
+++ b/drivers/mtd/nand/nomadik_nand.c
@@ -201,7 +201,7 @@ static int nomadik_nand_suspend(struct device *dev)
 	struct nomadik_nand_host *host = dev_get_drvdata(dev);
 	int ret = 0;
 	if (host)
-		ret = host->mtd.suspend(&host->mtd);
+		ret = mtd_suspend(&host->mtd);
 	return ret;
 }
 
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 90d60169ae40..7a028cf1206e 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1258,7 +1258,7 @@ static int pxa3xx_nand_suspend(struct platform_device *pdev, pm_message_t state)
 
 	for (cs = 0; cs < pdata->num_cs; cs++) {
 		mtd = info->host[cs]->mtd;
-		mtd->suspend(mtd);
+		mtd_suspend(mtd);
 	}
 
 	return 0;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 8b9901986c86..8e01bad44e25 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -210,6 +210,7 @@ struct mtd_info {
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*suspend) (struct mtd_info *mtd);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -217,7 +218,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 	/* Power Management functions */
-	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
 
 	/* Bad block management functions */
@@ -398,6 +398,11 @@ static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return mtd->is_locked(mtd, ofs, len);
 }
 
+static inline int mtd_suspend(struct mtd_info *mtd)
+{
+	return mtd->suspend(mtd);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From ead995f8d4da1e2f1ef40b0e5f4133fee38a3d3d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:31:25 +0200
Subject: mtd: introduce mtd_resume interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap.c        | 2 +-
 drivers/mtd/maps/pxa2xx-flash.c   | 2 +-
 drivers/mtd/maps/rbtx4939-flash.c | 2 +-
 drivers/mtd/maps/sa1100-flash.c   | 2 +-
 drivers/mtd/mtdconcat.c           | 2 +-
 drivers/mtd/mtdcore.c             | 2 +-
 drivers/mtd/mtdpart.c             | 2 +-
 drivers/mtd/nand/nomadik_nand.c   | 2 +-
 drivers/mtd/nand/pxa3xx_nand.c    | 2 +-
 include/linux/mtd/mtd.h           | 9 ++++++---
 10 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index b7f0cd14ae2b..d94cc62186c1 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -192,7 +192,7 @@ static void physmap_flash_shutdown(struct platform_device *dev)
 	for (i = 0; i < MAX_RESOURCES && info->mtd[i]; i++)
 		if (info->mtd[i]->suspend && info->mtd[i]->resume)
 			if (mtd_suspend(info->mtd[i]) == 0)
-				info->mtd[i]->resume(info->mtd[i]);
+				mtd_resume(info->mtd[i]);
 }
 #else
 #define physmap_flash_shutdown NULL
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index 9cb427320c04..436d121185b1 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -126,7 +126,7 @@ static void pxa2xx_flash_shutdown(struct platform_device *dev)
 	struct pxa2xx_flash_info *info = platform_get_drvdata(dev);
 
 	if (info && mtd_suspend(info->mtd) == 0)
-		info->mtd->resume(info->mtd);
+		mtd_resume(info->mtd);
 }
 #else
 #define pxa2xx_flash_shutdown NULL
diff --git a/drivers/mtd/maps/rbtx4939-flash.c b/drivers/mtd/maps/rbtx4939-flash.c
index 5856aa2d99f7..717628312040 100644
--- a/drivers/mtd/maps/rbtx4939-flash.c
+++ b/drivers/mtd/maps/rbtx4939-flash.c
@@ -121,7 +121,7 @@ static void rbtx4939_flash_shutdown(struct platform_device *dev)
 
 	if (info->mtd->suspend && info->mtd->resume)
 		if (mtd_suspend(info->mtd) == 0)
-			info->mtd->resume(info->mtd);
+			mtd_resume(info->mtd);
 }
 #else
 #define rbtx4939_flash_shutdown NULL
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index 20944f054867..502821997707 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -378,7 +378,7 @@ static void sa1100_mtd_shutdown(struct platform_device *dev)
 {
 	struct sa_info *info = platform_get_drvdata(dev);
 	if (info && mtd_suspend(info->mtd) == 0)
-		info->mtd->resume(info->mtd);
+		mtd_resume(info->mtd);
 }
 #else
 #define sa1100_mtd_shutdown NULL
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 36bb1c99925b..4b7f825ce015 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -644,7 +644,7 @@ static void concat_resume(struct mtd_info *mtd)
 
 	for (i = 0; i < concat->num_subdev; i++) {
 		struct mtd_info *subdev = concat->subdev[i];
-		subdev->resume(subdev);
+		mtd_resume(subdev);
 	}
 }
 
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 0db455d31148..376fbfdb09aa 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -129,7 +129,7 @@ static int mtd_cls_resume(struct device *dev)
 	struct mtd_info *mtd = dev_to_mtd(dev);
 	
 	if (mtd && mtd->resume)
-		mtd->resume(mtd);
+		mtd_resume(mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index c5e556a92641..8610750852ac 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -313,7 +313,7 @@ static int part_suspend(struct mtd_info *mtd)
 static void part_resume(struct mtd_info *mtd)
 {
 	struct mtd_part *part = PART(mtd);
-	part->master->resume(part->master);
+	mtd_resume(part->master);
 }
 
 static int part_block_isbad(struct mtd_info *mtd, loff_t ofs)
diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c
index 9461babdb308..a86aa812ca13 100644
--- a/drivers/mtd/nand/nomadik_nand.c
+++ b/drivers/mtd/nand/nomadik_nand.c
@@ -209,7 +209,7 @@ static int nomadik_nand_resume(struct device *dev)
 {
 	struct nomadik_nand_host *host = dev_get_drvdata(dev);
 	if (host)
-		host->mtd.resume(&host->mtd);
+		mtd_resume(&host->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 7a028cf1206e..8544d6bf50a0 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1291,7 +1291,7 @@ static int pxa3xx_nand_resume(struct platform_device *pdev)
 	nand_writel(info, NDSR, NDSR_MASK);
 	for (cs = 0; cs < pdata->num_cs; cs++) {
 		mtd = info->host[cs]->mtd;
-		mtd->resume(mtd);
+		mtd_resume(mtd);
 	}
 
 	return 0;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 8e01bad44e25..d6b4aa177505 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -211,15 +211,13 @@ struct mtd_info {
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*suspend) (struct mtd_info *mtd);
+	void (*resume) (struct mtd_info *mtd);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* Power Management functions */
-	void (*resume) (struct mtd_info *mtd);
-
 	/* Bad block management functions */
 	int (*block_isbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
@@ -403,6 +401,11 @@ static inline int mtd_suspend(struct mtd_info *mtd)
 	return mtd->suspend(mtd);
 }
 
+static inline void mtd_resume(struct mtd_info *mtd)
+{
+	mtd->resume(mtd);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 7086c19d07429d697057587caf1e5e0345442d16 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:35:30 +0200
Subject: mtd: introduce mtd_block_isbad interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/cris/arch-v32/drivers/axisflashmap.c | 3 +--
 drivers/mtd/inftlmount.c                  | 3 ++-
 drivers/mtd/mtdchar.c                     | 2 +-
 drivers/mtd/mtdconcat.c                   | 2 +-
 drivers/mtd/mtdoops.c                     | 4 ++--
 drivers/mtd/mtdpart.c                     | 5 ++---
 drivers/mtd/mtdswap.c                     | 4 ++--
 drivers/mtd/nftlmount.c                   | 3 ++-
 drivers/mtd/redboot.c                     | 4 ++--
 drivers/mtd/ssfdc.c                       | 4 ++--
 drivers/mtd/tests/mtd_oobtest.c           | 2 +-
 drivers/mtd/tests/mtd_pagetest.c          | 2 +-
 drivers/mtd/tests/mtd_readtest.c          | 2 +-
 drivers/mtd/tests/mtd_speedtest.c         | 2 +-
 drivers/mtd/tests/mtd_stresstest.c        | 2 +-
 drivers/mtd/tests/mtd_subpagetest.c       | 2 +-
 drivers/mtd/tests/mtd_torturetest.c       | 3 +--
 drivers/mtd/ubi/io.c                      | 2 +-
 fs/jffs2/scan.c                           | 2 +-
 fs/logfs/dev_mtd.c                        | 4 ++--
 include/linux/mtd/mtd.h                   | 7 ++++++-
 21 files changed, 34 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index 011bddbf073f..b34438e026be 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -404,8 +404,7 @@ static int __init init_axis_flash(void)
 		 */
 		int blockstat;
 		do {
-			blockstat = main_mtd->block_isbad(main_mtd,
-				ptable_sector);
+			blockstat = mtd_block_isbad(main_mtd, ptable_sector);
 			if (blockstat < 0)
 				ptable_sector = 0; /* read error */
 			else if (blockstat)
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 9bfbca5d88d6..38519401196b 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -306,7 +306,8 @@ static int find_boot_record(struct INFTLrecord *inftl)
 			/* If any of the physical eraseblocks are bad, don't
 			   use the unit. */
 			for (physblock = 0; physblock < inftl->EraseSize; physblock += inftl->mbd.mtd->erasesize) {
-				if (inftl->mbd.mtd->block_isbad(inftl->mbd.mtd, i * inftl->EraseSize + physblock))
+				if (mtd_block_isbad(inftl->mbd.mtd,
+						    i * inftl->EraseSize + physblock))
 					inftl->PUtable[i] = BLOCK_RESERVED;
 			}
 		}
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 6d598b23cf3a..a499bf7a8214 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -886,7 +886,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->block_isbad)
 			ret = -EOPNOTSUPP;
 		else
-			return mtd->block_isbad(mtd, offs);
+			return mtd_block_isbad(mtd, offs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 4b7f825ce015..d0db5e61d5af 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -667,7 +667,7 @@ static int concat_block_isbad(struct mtd_info *mtd, loff_t ofs)
 			continue;
 		}
 
-		res = subdev->block_isbad(subdev, ofs);
+		res = mtd_block_isbad(subdev, ofs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 7be2018ffbcc..bc43d2f7272c 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -170,7 +170,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work)
 	}
 
 	while (mtd->block_isbad) {
-		ret = mtd->block_isbad(mtd, cxt->nextpage * record_size);
+		ret = mtd_block_isbad(mtd, cxt->nextpage * record_size);
 		if (!ret)
 			break;
 		if (ret < 0) {
@@ -254,7 +254,7 @@ static void find_next_position(struct mtdoops_context *cxt)
 
 	for (page = 0; page < cxt->oops_pages; page++) {
 		if (mtd->block_isbad &&
-		    mtd->block_isbad(mtd, page * record_size))
+		    mtd_block_isbad(mtd, page * record_size))
 			continue;
 		/* Assume the page is used */
 		mark_page_used(cxt, page);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 8610750852ac..0e7dfc79d337 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -322,7 +322,7 @@ static int part_block_isbad(struct mtd_info *mtd, loff_t ofs)
 	if (ofs >= mtd->size)
 		return -EINVAL;
 	ofs += part->offset;
-	return part->master->block_isbad(part->master, ofs);
+	return mtd_block_isbad(part->master, ofs);
 }
 
 static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
@@ -553,8 +553,7 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 		uint64_t offs = 0;
 
 		while (offs < slave->mtd.size) {
-			if (master->block_isbad(master,
-						offs + slave->offset))
+			if (mtd_block_isbad(master, offs + slave->offset))
 				slave->mtd.ecc_stats.badblocks++;
 			offs += slave->mtd.erasesize;
 		}
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index cb794e761012..87aa0a6323c3 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -343,7 +343,7 @@ static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 	offset = mtdswap_eb_offset(d, eb);
 
 	/* Check first if the block is bad. */
-	if (d->mtd->block_isbad && d->mtd->block_isbad(d->mtd, offset))
+	if (d->mtd->block_isbad && mtd_block_isbad(d->mtd, offset))
 		return MTDSWAP_SCANNED_BAD;
 
 	ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
@@ -1061,7 +1061,7 @@ static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
 
 	if (mtd->block_isbad)
 		for (offset = 0; offset < size; offset += mtd->erasesize)
-			if (mtd->block_isbad(mtd, offset))
+			if (mtd_block_isbad(mtd, offset))
 				badcnt++;
 
 	return badcnt;
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index b068dc8a3666..156af9f87961 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -242,7 +242,8 @@ The new DiskOnChip driver already scanned the bad block table.  Just query it.
 			if (buf[i & (SECTORSIZE - 1)] != 0xff)
 				nftl->ReplUnitTable[i] = BLOCK_RESERVED;
 #endif
-			if (nftl->mbd.mtd->block_isbad(nftl->mbd.mtd, i * nftl->EraseSize))
+			if (mtd_block_isbad(nftl->mbd.mtd,
+					    i * nftl->EraseSize))
 				nftl->ReplUnitTable[i] = BLOCK_RESERVED;
 		}
 
diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c
index 623d9b86d0d9..09bb81ea3a7e 100644
--- a/drivers/mtd/redboot.c
+++ b/drivers/mtd/redboot.c
@@ -79,7 +79,7 @@ static int parse_redboot_partitions(struct mtd_info *master,
 	if ( directory < 0 ) {
 		offset = master->size + directory * master->erasesize;
 		while (master->block_isbad && 
-		       master->block_isbad(master, offset)) {
+		       mtd_block_isbad(master, offset)) {
 			if (!offset) {
 			nogood:
 				printk(KERN_NOTICE "Failed to find a non-bad block to check for RedBoot partition table\n");
@@ -90,7 +90,7 @@ static int parse_redboot_partitions(struct mtd_info *master,
 	} else {
 		offset = directory * master->erasesize;
 		while (master->block_isbad && 
-		       master->block_isbad(master, offset)) {
+		       mtd_block_isbad(master, offset)) {
 			offset += master->erasesize;
 			if (offset == master->size)
 				goto nogood;
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index 0e6881338357..ab2a52a039c3 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -122,7 +122,7 @@ static int get_valid_cis_sector(struct mtd_info *mtd)
 	 * is not SSFDC formatted
 	 */
 	for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) {
-		if (!mtd->block_isbad(mtd, offset)) {
+		if (mtd_block_isbad(mtd, offset)) {
 			ret = mtd_read(mtd, offset, SECTOR_SIZE, &retlen,
 				       sect_buf);
 
@@ -255,7 +255,7 @@ static int build_logical_block_map(struct ssfdcr_record *ssfdc)
 	for (phys_block = ssfdc->cis_block + 1; phys_block < ssfdc->map_len;
 			phys_block++) {
 		offset = (unsigned long)phys_block * ssfdc->erase_size;
-		if (mtd->block_isbad(mtd, offset))
+		if (mtd_block_isbad(mtd, offset))
 			continue;	/* skip bad blocks */
 
 		ret = read_raw_oob(mtd, offset, oob_buf);
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index 81113885e086..ed9b62827f1b 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -329,7 +329,7 @@ static int is_block_bad(int ebnum)
 	int ret;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index 83da97e54f97..8024eaf4c1ac 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -469,7 +469,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
index 5eaeada84284..ad5fd0df86ee 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/mtd_readtest.c
@@ -132,7 +132,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index c7b18e189082..ecb287847505 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -296,7 +296,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index f8aac4b7e59a..4789c0ee3e9a 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -132,7 +132,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index b90c01036b49..4b873d49fe6a 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -344,7 +344,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index dd34a519fa7a..30c4ed9855ec 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -292,8 +292,7 @@ static int __init tort_init(void)
 	memset(&bad_ebs[0], 0, sizeof(int) * ebcnt);
 	if (mtd->block_isbad) {
 		for (i = eb; i < eb + ebcnt; i++) {
-			err = mtd->block_isbad(mtd,
-					       (loff_t)i * mtd->erasesize);
+			err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize);
 
 			if (err < 0) {
 				printk(PRINT_PREF "block_isbad() returned %d "
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 8d832fc9e9e4..a1b683ad639e 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -634,7 +634,7 @@ int ubi_io_is_bad(const struct ubi_device *ubi, int pnum)
 	if (ubi->bad_allowed) {
 		int ret;
 
-		ret = mtd->block_isbad(mtd, (loff_t)pnum * ubi->peb_size);
+		ret = mtd_block_isbad(mtd, (loff_t)pnum * ubi->peb_size);
 		if (ret < 0)
 			ubi_err("error %d while checking if PEB %d is bad",
 				ret, pnum);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 72f3960f44a9..83e1665e2574 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -455,7 +455,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
 	if (jffs2_cleanmarker_oob(c)) {
 		int ret;
 
-		if (c->mtd->block_isbad(c->mtd, jeb->offset))
+		if (mtd_block_isbad(c->mtd, jeb->offset))
 			return BLK_STATE_BADBLOCK;
 
 		ret = jffs2_check_nand_cleanmarker(c, jeb);
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 0ca7a07db6c1..136c7360a9b6 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -157,7 +157,7 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs)
 		return NULL;
 
 	*ofs = 0;
-	while (mtd->block_isbad(mtd, *ofs)) {
+	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs += mtd->erasesize;
 		if (*ofs >= mtd->size)
 			return NULL;
@@ -177,7 +177,7 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs)
 		return NULL;
 
 	*ofs = mtd->size - mtd->erasesize;
-	while (mtd->block_isbad(mtd, *ofs)) {
+	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs -= mtd->erasesize;
 		if (*ofs <= 0)
 			return NULL;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index d6b4aa177505..a307ad093a54 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -210,6 +210,7 @@ struct mtd_info {
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*block_isbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
 
@@ -219,7 +220,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 	/* Bad block management functions */
-	int (*block_isbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
 
 	struct notifier_block reboot_notifier;  /* default mode before reboot */
@@ -406,6 +406,11 @@ static inline void mtd_resume(struct mtd_info *mtd)
 	mtd->resume(mtd);
 }
 
+static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
+{
+	return mtd->block_isbad(mtd, ofs);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 5942ddbc500d1c9b75e571b656be97f65b26adfe Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:37:38 +0200
Subject: mtd: introduce mtd_block_markbad interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/inftlmount.c           | 2 +-
 drivers/mtd/mtdchar.c              | 2 +-
 drivers/mtd/mtdconcat.c            | 2 +-
 drivers/mtd/mtdoops.c              | 2 +-
 drivers/mtd/mtdpart.c              | 2 +-
 drivers/mtd/mtdswap.c              | 2 +-
 drivers/mtd/nand/nandsim.c         | 2 +-
 drivers/mtd/nftlmount.c            | 2 +-
 drivers/mtd/onenand/onenand_base.c | 2 +-
 drivers/mtd/ubi/io.c               | 2 +-
 fs/jffs2/wbuf.c                    | 2 +-
 include/linux/mtd/mtd.h            | 9 ++++++---
 12 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 38519401196b..4adc0374fb6b 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -424,7 +424,7 @@ int INFTL_formatblock(struct INFTLrecord *inftl, int block)
 fail:
 	/* could not format, update the bad block table (caller is responsible
 	   for setting the PUtable to BLOCK_RESERVED on failure) */
-	inftl->mbd.mtd->block_markbad(inftl->mbd.mtd, instr->addr);
+	mtd_block_markbad(inftl->mbd.mtd, instr->addr);
 	return -1;
 }
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index a499bf7a8214..15a3f6224be4 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -899,7 +899,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->block_markbad)
 			ret = -EOPNOTSUPP;
 		else
-			return mtd->block_markbad(mtd, offs);
+			return mtd_block_markbad(mtd, offs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index d0db5e61d5af..f694b51e7856 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -693,7 +693,7 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs)
 			continue;
 		}
 
-		err = subdev->block_markbad(subdev, ofs);
+		err = mtd_block_markbad(subdev, ofs);
 		if (!err)
 			mtd->ecc_stats.badblocks++;
 		break;
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index bc43d2f7272c..69532a34e563 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -200,7 +200,7 @@ badblock:
 	}
 
 	if (mtd->block_markbad && ret == -EIO) {
-		ret = mtd->block_markbad(mtd, cxt->nextpage * record_size);
+		ret = mtd_block_markbad(mtd, cxt->nextpage * record_size);
 		if (ret < 0) {
 			printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n");
 			return;
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 0e7dfc79d337..a3d44c3416b4 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -335,7 +335,7 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	if (ofs >= mtd->size)
 		return -EINVAL;
 	ofs += part->offset;
-	res = part->master->block_markbad(part->master, ofs);
+	res = mtd_block_markbad(part->master, ofs);
 	if (!res)
 		mtd->ecc_stats.badblocks++;
 	return res;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 87aa0a6323c3..4441c08b082d 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -279,7 +279,7 @@ static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
 
 	offset = mtdswap_eb_offset(d, eb);
 	dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
-	ret = d->mtd->block_markbad(d->mtd, offset);
+	ret = mtd_block_markbad(d->mtd, offset);
 
 	if (ret) {
 		dev_warn(d->dev, "Mark block bad failed for block at %08llx "
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 34c03be77301..261f478f8cc3 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -737,7 +737,7 @@ static int parse_badblocks(struct nandsim *ns, struct mtd_info *mtd)
 			return -EINVAL;
 		}
 		offset = erase_block_no * ns->geom.secsz;
-		if (mtd->block_markbad(mtd, offset)) {
+		if (mtd_block_markbad(mtd, offset)) {
 			NS_ERR("invalid badblocks.\n");
 			return -EINVAL;
 		}
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index 156af9f87961..51b9d6af307f 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -356,7 +356,7 @@ int NFTL_formatblock(struct NFTLrecord *nftl, int block)
 fail:
 	/* could not format, update the bad block table (caller is responsible
 	   for setting the ReplUnitTable to BLOCK_RESERVED on failure) */
-	nftl->mbd.mtd->block_markbad(nftl->mbd.mtd, instr->addr);
+	mtd_block_markbad(nftl->mbd.mtd, instr->addr);
 	return -1;
 }
 
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index a8394730b4b6..dd278a284136 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -2645,7 +2645,7 @@ static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	}
 
 	onenand_get_device(mtd, FL_WRITING);
-	ret = this->block_markbad(mtd, ofs);
+	ret = mtd_block_markbad(mtd, ofs);
 	onenand_release_device(mtd);
 	return ret;
 }
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index a1b683ad639e..5cde4e5ca3e5 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -669,7 +669,7 @@ int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum)
 	if (!ubi->bad_allowed)
 		return 0;
 
-	err = mtd->block_markbad(mtd, (loff_t)pnum * ubi->peb_size);
+	err = mtd_block_markbad(mtd, (loff_t)pnum * ubi->peb_size);
 	if (err)
 		ubi_err("cannot mark PEB %d bad, error %d", pnum, err);
 	return err;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index eae5be483682..fd96b757433f 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1134,7 +1134,7 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *
 		return 1; // What else can we do?
 
 	printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset);
-	ret = c->mtd->block_markbad(c->mtd, bad_offset);
+	ret = mtd_block_markbad(c->mtd, bad_offset);
 
 	if (ret) {
 		D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret));
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a307ad093a54..64aa54fba2df 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -211,6 +211,7 @@ struct mtd_info {
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*block_isbad) (struct mtd_info *mtd, loff_t ofs);
+	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
 
@@ -219,9 +220,6 @@ struct mtd_info {
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* Bad block management functions */
-	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
-
 	struct notifier_block reboot_notifier;  /* default mode before reboot */
 
 	/* ECC status information */
@@ -411,6 +409,11 @@ static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
 	return mtd->block_isbad(mtd, ofs);
 }
 
+static inline int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs)
+{
+	return mtd->block_markbad(mtd, ofs);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From a88d2dc672192247a6f42c82d558db9bf9258bed Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 11:06:10 +0200
Subject: mtd: move mtd->{get,put}_device functions up

Move the 'get_device()' and 'put_device()' functions up within
'struct mtd_info' to make them be close to other functions.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 64aa54fba2df..8ae37e9d45de 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -214,6 +214,12 @@ struct mtd_info {
 	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
+	/*
+	 * If the driver is something smart, like UBI, it may need to maintain
+	 * its own reference counting. The below functions are only for driver.
+	 */
+	int (*get_device) (struct mtd_info *mtd);
+	void (*put_device) (struct mtd_info *mtd);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -232,13 +238,6 @@ struct mtd_info {
 	struct module *owner;
 	struct device dev;
 	int usecount;
-
-	/* If the driver is something smart, like UBI, it may need to maintain
-	 * its own reference counting. The below functions are only for driver.
-	 * The driver may register its callbacks. These callbacks are not
-	 * supposed to be called by MTD users */
-	int (*get_device) (struct mtd_info *mtd);
-	void (*put_device) (struct mtd_info *mtd);
 };
 
 /*
-- 
cgit v1.2.3


From 9cf075f8656524abc44ad3ff2ec3834fe76f186f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 18:14:49 +0200
Subject: mtd: always initialize retlen to zero

Make sure that the retlen is set to 0 in case of error. This harmonizes
drivers - some set it to 0 in some error cases and do not write anything
in other error cases. Now we can do this consistently for all drivers.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 8ae37e9d45de..a09077aca45b 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -258,6 +258,7 @@ static inline int mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
 static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len,
 			    size_t *retlen, void **virt, resource_size_t *phys)
 {
+	*retlen = 0;
 	return mtd->point(mtd, from, len, retlen, virt, phys);
 }
 
@@ -289,6 +290,7 @@ static inline int mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
 static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 			    size_t *retlen, const u_char *buf)
 {
+	*retlen = 0;
 	return mtd->write(mtd, to, len, retlen, buf);
 }
 
@@ -302,18 +304,21 @@ static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 static inline int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 				  size_t *retlen, const u_char *buf)
 {
+	*retlen = 0;
 	return mtd->panic_write(mtd, to, len, retlen, buf);
 }
 
 static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from,
 			       struct mtd_oob_ops *ops)
 {
+	ops->retlen = ops->oobretlen = 0;
 	return mtd->read_oob(mtd, from, ops);
 }
 
 static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
 				struct mtd_oob_ops *ops)
 {
+	ops->retlen = ops->oobretlen = 0;
 	return mtd->write_oob(mtd, to, ops);
 }
 
@@ -332,6 +337,7 @@ static inline int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 					 size_t len, size_t *retlen,
 					 u_char *buf)
 {
+	*retlen = 0;
 	return mtd->read_fact_prot_reg(mtd, from, len, retlen, buf);
 }
 
@@ -346,6 +352,7 @@ static inline int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 					 size_t len, size_t *retlen,
 					 u_char *buf)
 {
+	*retlen = 0;
 	return mtd->read_user_prot_reg(mtd, from, len, retlen, buf);
 }
 
@@ -353,6 +360,7 @@ static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
 					  size_t len, size_t *retlen,
 					  u_char *buf)
 {
+	*retlen = 0;
 	return mtd->write_user_prot_reg(mtd, to, len, retlen, buf);
 }
 
@@ -369,6 +377,7 @@ static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 			     unsigned long count, loff_t to, size_t *retlen)
 {
+	*retlen = 0;
 	return mtd->writev(mtd, vecs, count, to, retlen);
 }
 
-- 
cgit v1.2.3


From d5de20a9a1c5ad68c07e017d11f6dbb5e289750c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 18:00:29 +0200
Subject: mtd: kill dev_to_mtd helper

... since it is not needed because the generic 'dev_get_drvdata()' can be
used instead.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 25 +++++++++++++------------
 include/linux/mtd/mtd.h |  5 -----
 2 files changed, 13 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 376fbfdb09aa..8bea2d0fdb20 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -107,7 +107,8 @@ static LIST_HEAD(mtd_notifiers);
  */
 static void mtd_release(struct device *dev)
 {
-	dev_t index = MTD_DEVT(dev_to_mtd(dev)->index);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
+	dev_t index = MTD_DEVT(mtd->index);
 
 	/* remove /dev/mtdXro node if needed */
 	if (index)
@@ -116,7 +117,7 @@ static void mtd_release(struct device *dev)
 
 static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	if (mtd && mtd->suspend)
 		return mtd_suspend(mtd);
@@ -126,7 +127,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 
 static int mtd_cls_resume(struct device *dev)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 	
 	if (mtd && mtd->resume)
 		mtd_resume(mtd);
@@ -136,7 +137,7 @@ static int mtd_cls_resume(struct device *dev)
 static ssize_t mtd_type_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 	char *type;
 
 	switch (mtd->type) {
@@ -172,7 +173,7 @@ static DEVICE_ATTR(type, S_IRUGO, mtd_type_show, NULL);
 static ssize_t mtd_flags_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)mtd->flags);
 
@@ -182,7 +183,7 @@ static DEVICE_ATTR(flags, S_IRUGO, mtd_flags_show, NULL);
 static ssize_t mtd_size_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%llu\n",
 		(unsigned long long)mtd->size);
@@ -193,7 +194,7 @@ static DEVICE_ATTR(size, S_IRUGO, mtd_size_show, NULL);
 static ssize_t mtd_erasesize_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->erasesize);
 
@@ -203,7 +204,7 @@ static DEVICE_ATTR(erasesize, S_IRUGO, mtd_erasesize_show, NULL);
 static ssize_t mtd_writesize_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->writesize);
 
@@ -213,7 +214,7 @@ static DEVICE_ATTR(writesize, S_IRUGO, mtd_writesize_show, NULL);
 static ssize_t mtd_subpagesize_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 	unsigned int subpagesize = mtd->writesize >> mtd->subpage_sft;
 
 	return snprintf(buf, PAGE_SIZE, "%u\n", subpagesize);
@@ -224,7 +225,7 @@ static DEVICE_ATTR(subpagesize, S_IRUGO, mtd_subpagesize_show, NULL);
 static ssize_t mtd_oobsize_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->oobsize);
 
@@ -234,7 +235,7 @@ static DEVICE_ATTR(oobsize, S_IRUGO, mtd_oobsize_show, NULL);
 static ssize_t mtd_numeraseregions_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%u\n", mtd->numeraseregions);
 
@@ -245,7 +246,7 @@ static DEVICE_ATTR(numeraseregions, S_IRUGO, mtd_numeraseregions_show,
 static ssize_t mtd_name_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%s\n", mtd->name);
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a09077aca45b..cb33cc12e18f 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -422,11 +422,6 @@ static inline int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	return mtd->block_markbad(mtd, ofs);
 }
 
-static inline struct mtd_info *dev_to_mtd(struct device *dev)
-{
-	return dev ? dev_get_drvdata(dev) : NULL;
-}
-
 static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
 {
 	if (mtd->erasesize_shift)
-- 
cgit v1.2.3


From bac972777403f810d83062dd0d0303746e466ece Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 15:46:40 +0200
Subject: mtd: remove unused default_mtd_readv prototype

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index cb33cc12e18f..671c89289fc3 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -485,9 +485,6 @@ extern int unregister_mtd_user (struct mtd_notifier *old);
 int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		       unsigned long count, loff_t to, size_t *retlen);
 
-int default_mtd_readv(struct mtd_info *mtd, struct kvec *vecs,
-		      unsigned long count, loff_t from, size_t *retlen);
-
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size);
 
 void mtd_erase_callback(struct erase_info *instr);
-- 
cgit v1.2.3


From 52b020317f65114eeba2ee2cfa70a51a286f1d8a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 15:57:25 +0200
Subject: mtd: clean-up the default_mtd_writev function

1. Teach 'mtd_write()' function to return '-EROFS' if the write method
   is undefined, and remove the corresponding check from
   'default_mtd_writev()'.
2. Do not test 'retlen' for NULL - it cannot be NULL.
3. Few minor coding stile clean-ups.
4. Add a kerneldoc comment

Additionally, minor fixes to the kerneldoc comments of the neighbor function.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 41 ++++++++++++++++++++++-------------------
 include/linux/mtd/mtd.h |  2 ++
 2 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 8bea2d0fdb20..85a3f197e7f0 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -683,10 +683,17 @@ void __put_mtd_device(struct mtd_info *mtd)
 	module_put(mtd->owner);
 }
 
-/* default_mtd_writev - default mtd writev method for MTD devices that
- *			don't implement their own
+/*
+ * default_mtd_writev - the default writev method
+ * @mtd: mtd device description object pointer
+ * @vecs: the vectors to write
+ * @count: count of vectors in @vecs
+ * @to: the MTD device offset to write to
+ * @retlen: on exit contains the count of bytes written to the MTD device.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
  */
-
 int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		       unsigned long count, loff_t to, size_t *retlen)
 {
@@ -694,28 +701,24 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	size_t totlen = 0, thislen;
 	int ret = 0;
 
-	if(!mtd->write) {
-		ret = -EROFS;
-	} else {
-		for (i=0; i<count; i++) {
-			if (!vecs[i].iov_len)
-				continue;
-			ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen,
-					vecs[i].iov_base);
-			totlen += thislen;
-			if (ret || thislen != vecs[i].iov_len)
-				break;
-			to += vecs[i].iov_len;
-		}
+	for (i = 0; i < count; i++) {
+		if (!vecs[i].iov_len)
+			continue;
+		ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen,
+				vecs[i].iov_base);
+		totlen += thislen;
+		if (ret || thislen != vecs[i].iov_len)
+			break;
+		to += vecs[i].iov_len;
 	}
-	if (retlen)
-		*retlen = totlen;
+	*retlen = totlen;
 	return ret;
 }
 
 /**
  * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size
- * @size: A pointer to the ideal or maximum size of the allocation. Points
+ * @mtd: mtd device description object pointer
+ * @size: a pointer to the ideal or maximum size of the allocation, points
  *        to the actual allocation size on success.
  *
  * This routine attempts to allocate a contiguous kernel buffer up to
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 671c89289fc3..f0dd5a305b89 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -291,6 +291,8 @@ static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 			    size_t *retlen, const u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->write)
+		return -EROFS;
 	return mtd->write(mtd, to, len, retlen, buf);
 }
 
-- 
cgit v1.2.3


From fc002e3c320602d0e206f607aca0460540d7637a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 18:35:07 +0200
Subject: mtd: introduce mtd_has_oob helper

We are working in the direction of making sure that MTD clients to not
use 'mtd->func' pointers directly. In some places we want to know if
OOB operations are supported by an MTD device. Introduce 'mtd_has_oob()'
helper for these purposes.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/sm_ftl.c    | 4 ++--
 include/linux/mtd/mtd.h | 5 +++++
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 83b0c82e9c94..c501eec17b38 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1004,7 +1004,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			break;
 
 		case MTD_FILE_MODE_RAW:
-			if (!mtd->read_oob || !mtd->write_oob)
+			if (!mtd_has_oob(mtd))
 				return -EOPNOTSUPP;
 			mfi->mode = arg;
 
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 4ec2af7fb845..072ed5970e2f 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -645,8 +645,8 @@ int sm_get_media_info(struct sm_ftl *ftl, struct mtd_info *mtd)
 	if (!ftl->smallpagenand && mtd->oobsize < SM_OOB_SIZE)
 		return -ENODEV;
 
-	/* We use these functions for IO */
-	if (!mtd->read_oob || !mtd->write_oob)
+	/* We use OOB */
+	if (!mtd_has_oob(mtd))
 		return -ENODEV;
 
 	/* Find geometry information */
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index f0dd5a305b89..478701566ba7 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -454,6 +454,11 @@ static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd)
 	return do_div(sz, mtd->writesize);
 }
 
+static inline int mtd_has_oob(const struct mtd_info *mtd)
+{
+	return mtd->read_oob && mtd->write_oob;
+}
+
 	/* Kernel-side ioctl definitions */
 
 struct mtd_partition;
-- 
cgit v1.2.3


From 10934478e44d9a5a7b16dadd89094fb608cf101e Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 15:55:42 +0200
Subject: mtd: do use mtd->point directly

Remove direct usage of the "mtd->point" function pointer. Instead,
test the mtd_point() return code for '-EOPNOTSUPP'.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/erase.c        |  9 ++++-----
 fs/jffs2/readinode.c    | 18 ++++++++----------
 fs/jffs2/scan.c         |  2 +-
 include/linux/mtd/mtd.h |  2 ++
 4 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index c59d642cade2..a01cdad6aad1 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -336,12 +336,11 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 	uint32_t ofs;
 	size_t retlen;
 	int ret = -EIO;
+	unsigned long *wordebuf;
 
-	if (c->mtd->point) {
-		unsigned long *wordebuf;
-
-		ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen,
-				&ebuf, NULL);
+	ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen,
+			&ebuf, NULL);
+	if (ret != -EOPNOTSUPP) {
 		if (ret) {
 			D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
 			goto do_flash_read;
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index fca2f84e1add..3093ac4fb24c 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -62,17 +62,15 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 #ifndef __ECOS
 	/* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
 	 * adding and jffs2_flash_read_end() interface. */
-	if (c->mtd->point) {
-		err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer,
-				NULL);
-		if (!err && retlen < len) {
-			JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
-			mtd_unpoint(c->mtd, ofs, retlen);
-		} else if (err)
+	err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer, NULL);
+	if (!err && retlen < len) {
+		JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
+		mtd_unpoint(c->mtd, ofs, retlen);
+	} else if (err) {
+		if (err != -EOPNOTSUPP)
 			JFFS2_WARNING("MTD point failed: error code %d.\n", err);
-		else
-			pointed = 1; /* succefully pointed to device */
-	}
+	} else
+		pointed = 1; /* succefully pointed to device */
 #endif
 
 	if (!pointed) {
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 83e1665e2574..f99464833bb2 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -105,7 +105,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 			mtd_unpoint(c->mtd, 0, pointlen);
 			flashbuf = NULL;
 		}
-		if (ret)
+		if (ret && ret != -EOPNOTSUPP)
 			D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
 	}
 #endif
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 478701566ba7..b355a83e7cc2 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -259,6 +259,8 @@ static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len,
 			    size_t *retlen, void **virt, resource_size_t *phys)
 {
 	*retlen = 0;
+	if (!mtd->point)
+		return -EOPNOTSUPP;
 	return mtd->point(mtd, from, len, retlen, virt, phys);
 }
 
-- 
cgit v1.2.3


From cd621274b0ec747db8dedbf857624c067f481976 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 14:31:57 +0200
Subject: mtd: do not use mtd->get_unmapped_area directly

Remove direct usage of mtd->get_unmapped_area. Instead, just call
'mtd_get_unmapped_area()' which will return -EOPNOTSUPP if the function
is not implemented and test for this error code.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 26 +++++++++++---------------
 drivers/mtd/mtdconcat.c |  6 +-----
 include/linux/mtd/mtd.h |  2 ++
 3 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index c501eec17b38..55f0961103a7 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1124,25 +1124,21 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file,
 {
 	struct mtd_file_info *mfi = file->private_data;
 	struct mtd_info *mtd = mfi->mtd;
+	unsigned long offset;
+	int ret;
 
-	if (mtd->get_unmapped_area) {
-		unsigned long offset;
-
-		if (addr != 0)
-			return (unsigned long) -EINVAL;
-
-		if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT))
-			return (unsigned long) -EINVAL;
+	if (addr != 0)
+		return (unsigned long) -EINVAL;
 
-		offset = pgoff << PAGE_SHIFT;
-		if (offset > mtd->size - len)
-			return (unsigned long) -EINVAL;
+	if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT))
+		return (unsigned long) -EINVAL;
 
-		return mtd_get_unmapped_area(mtd, len, offset, flags);
-	}
+	offset = pgoff << PAGE_SHIFT;
+	if (offset > mtd->size - len)
+		return (unsigned long) -EINVAL;
 
-	/* can't map directly */
-	return (unsigned long) -ENOSYS;
+	ret = mtd_get_unmapped_area(mtd, len, offset, flags);
+	return ret == -EOPNOTSUPP ? -ENOSYS : ret;
 }
 #endif
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index f694b51e7856..9119f76f87ff 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -726,11 +726,7 @@ static unsigned long concat_get_unmapped_area(struct mtd_info *mtd,
 		if (offset + len > subdev->size)
 			return (unsigned long) -EINVAL;
 
-		if (subdev->get_unmapped_area)
-			return mtd_get_unmapped_area(subdev, len, offset,
-						     flags);
-
-		break;
+		return mtd_get_unmapped_area(subdev, len, offset, flags);
 	}
 
 	return (unsigned long) -ENOSYS;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index b355a83e7cc2..2c2a92247e5a 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -280,6 +280,8 @@ static inline unsigned long mtd_get_unmapped_area(struct mtd_info *mtd,
 						  unsigned long offset,
 						  unsigned long flags)
 {
+	if (!mtd->get_unmapped_area)
+		return -EOPNOTSUPP;
 	return mtd->get_unmapped_area(mtd, len, offset, flags);
 }
 
-- 
cgit v1.2.3


From 016c1291ce70a22f15f666441a4fd2f0b450375b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 17:27:18 +0200
Subject: mtd: mtdoops: do not use mtd->panic_write directly

Instead of checking if 'mtd->panic_write' is defined, call 'mtd_panic_write()'
and check the error code - '-EOPNOTSUPP' will be returned if the function is
not defined.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdoops.c   | 17 ++++++++---------
 include/linux/mtd/mtd.h |  2 ++
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 69532a34e563..c8540b8a7fc6 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -221,10 +221,14 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
 	hdr[0] = cxt->nextcount;
 	hdr[1] = MTDOOPS_KERNMSG_MAGIC;
 
-	if (panic)
+	if (panic) {
 		ret = mtd_panic_write(mtd, cxt->nextpage * record_size,
 				      record_size, &retlen, cxt->oops_buf);
-	else
+		if (ret == -EOPNOTSUPP) {
+			printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n");
+			return;
+		}
+	} else
 		ret = mtd_write(mtd, cxt->nextpage * record_size,
 				record_size, &retlen, cxt->oops_buf);
 
@@ -330,13 +334,8 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
 	memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
 
 	/* Panics must be written immediately */
-	if (reason != KMSG_DUMP_OOPS) {
-		if (!cxt->mtd->panic_write)
-			printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n");
-		else
-			mtdoops_write(cxt, 1);
-		return;
-	}
+	if (reason != KMSG_DUMP_OOPS)
+		mtdoops_write(cxt, 1);
 
 	/* For other cases, schedule work to write it "nicely" */
 	schedule_work(&cxt->work_write);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 2c2a92247e5a..b72964049cdc 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -311,6 +311,8 @@ static inline int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 				  size_t *retlen, const u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->panic_write)
+		return -EOPNOTSUPP;
 	return mtd->panic_write(mtd, to, len, retlen, buf);
 }
 
-- 
cgit v1.2.3


From dac2639f9833e858139d7e07f6ee45fb2191a9f2 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 17:50:34 +0200
Subject: mtd: do not use mtd->read_oob directly

Instead of checking whether 'mtd->read_oob' is defined, just call
'mtd_read_oob()' and handle the '-EOPNOTSUPP' error which will be returned
if the function is undefined.

Additionally, make 'mtd_write_oob()' return '-EOPNOTSUPP' if the function
is undefined.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 9 ++-------
 include/linux/mtd/mtd.h | 4 ++++
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 55f0961103a7..287ff0d35848 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -452,13 +452,8 @@ static int mtdchar_readoob(struct file *file, struct mtd_info *mtd,
 	if (length > 4096)
 		return -EINVAL;
 
-	if (!mtd->read_oob)
-		ret = -EOPNOTSUPP;
-	else
-		ret = access_ok(VERIFY_WRITE, ptr,
-				length) ? 0 : -EFAULT;
-	if (ret)
-		return ret;
+	if (!access_ok(VERIFY_WRITE, ptr, length))
+		return -EFAULT;
 
 	ops.ooblen = length;
 	ops.ooboffs = start & (mtd->writesize - 1);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index b72964049cdc..721a63ffeb96 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -320,6 +320,8 @@ static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from,
 			       struct mtd_oob_ops *ops)
 {
 	ops->retlen = ops->oobretlen = 0;
+	if (!mtd->read_oob)
+		return -EOPNOTSUPP;
 	return mtd->read_oob(mtd, from, ops);
 }
 
@@ -327,6 +329,8 @@ static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
 				struct mtd_oob_ops *ops)
 {
 	ops->retlen = ops->oobretlen = 0;
+	if (!mtd->write_oob)
+		return -EOPNOTSUPP;
 	return mtd->write_oob(mtd, to, ops);
 }
 
-- 
cgit v1.2.3


From 87e858a97e8a7010aedc01db7cd31cc7c02b0b6a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 18:47:46 +0200
Subject: mtd: do not use mtd->get_*_prot_info directly

Instead, call 'mtd_get_*_prot_info()' and check for '-EOPNOTSUPP'. While
on it, fix the return code from '-EOPNOTSUPP' to '-EINVAL' for the case
when the mode parameter is invalid.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 8 +++-----
 include/linux/mtd/mtd.h | 4 ++++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 287ff0d35848..49340dc1b107 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -919,17 +919,15 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		struct otp_info *buf = kmalloc(4096, GFP_KERNEL);
 		if (!buf)
 			return -ENOMEM;
-		ret = -EOPNOTSUPP;
 		switch (mfi->mode) {
 		case MTD_FILE_MODE_OTP_FACTORY:
-			if (mtd->get_fact_prot_info)
-				ret = mtd_get_fact_prot_info(mtd, buf, 4096);
+			ret = mtd_get_fact_prot_info(mtd, buf, 4096);
 			break;
 		case MTD_FILE_MODE_OTP_USER:
-			if (mtd->get_user_prot_info)
-				ret = mtd_get_user_prot_info(mtd, buf, 4096);
+			ret = mtd_get_user_prot_info(mtd, buf, 4096);
 			break;
 		default:
+			ret = -EINVAL;
 			break;
 		}
 		if (ret >= 0) {
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 721a63ffeb96..7122efdc6d99 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -342,6 +342,8 @@ static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
 static inline int mtd_get_fact_prot_info(struct mtd_info *mtd,
 					 struct otp_info *buf, size_t len)
 {
+	if (!mtd->get_fact_prot_info)
+		return -EOPNOTSUPP;
 	return mtd->get_fact_prot_info(mtd, buf, len);
 }
 
@@ -357,6 +359,8 @@ static inline int mtd_get_user_prot_info(struct mtd_info *mtd,
 					 struct otp_info *buf,
 					 size_t len)
 {
+	if (!mtd->get_user_prot_info)
+		return -EOPNOTSUPP;
 	return mtd->get_user_prot_info(mtd, buf, len);
 }
 
-- 
cgit v1.2.3


From b6de3d6cb63427178c4f1df88b81d1ceee637e6f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 10:06:32 +0200
Subject: mtd: do not use mtd->read_*_prot_reg directly

Instead, call 'mtd_read_*_prot_info()' and check for -EOPNOTSUPP.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 18 ++++++++++--------
 include/linux/mtd/mtd.h |  4 ++++
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 49340dc1b107..4e8e5fbc1e13 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -366,20 +366,22 @@ static void mtdchar_erase_callback (struct erase_info *instr)
 static int otp_select_filemode(struct mtd_file_info *mfi, int mode)
 {
 	struct mtd_info *mtd = mfi->mtd;
+	size_t retlen;
 	int ret = 0;
 
+	/*
+	 * Make a fake call to mtd_read_fact_prot_reg() to check if OTP
+	 * operations are supported.
+	 */
+	if (mtd_read_fact_prot_reg(mtd, -1, -1, &retlen, NULL) == -EOPNOTSUPP)
+		return -EOPNOTSUPP;
+
 	switch (mode) {
 	case MTD_OTP_FACTORY:
-		if (!mtd->read_fact_prot_reg)
-			ret = -EOPNOTSUPP;
-		else
-			mfi->mode = MTD_FILE_MODE_OTP_FACTORY;
+		mfi->mode = MTD_FILE_MODE_OTP_FACTORY;
 		break;
 	case MTD_OTP_USER:
-		if (!mtd->read_fact_prot_reg)
-			ret = -EOPNOTSUPP;
-		else
-			mfi->mode = MTD_FILE_MODE_OTP_USER;
+		mfi->mode = MTD_FILE_MODE_OTP_USER;
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7122efdc6d99..e488cf910914 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -352,6 +352,8 @@ static inline int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 					 u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->read_fact_prot_reg)
+		return -EOPNOTSUPP;
 	return mtd->read_fact_prot_reg(mtd, from, len, retlen, buf);
 }
 
@@ -369,6 +371,8 @@ static inline int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 					 u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->read_user_prot_reg)
+		return -EOPNOTSUPP;
 	return mtd->read_user_prot_reg(mtd, from, len, retlen, buf);
 }
 
-- 
cgit v1.2.3


From 27c151a5e52efaa46d0938984f2ef591bdcb6d5b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 10:39:20 +0200
Subject: mtd: mtd->write_user_prot_reg directly

Instead, just call 'mtd_write_user_prot_reg()' and check the '-EOPNOTSUPP' return
code.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 4 ----
 include/linux/mtd/mtd.h | 2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 4e8e5fbc1e13..25bbbc3aa665 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -310,10 +310,6 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
 			ret = -EROFS;
 			break;
 		case MTD_FILE_MODE_OTP_USER:
-			if (!mtd->write_user_prot_reg) {
-				ret = -EOPNOTSUPP;
-				break;
-			}
 			ret = mtd_write_user_prot_reg(mtd, *ppos, len,
 						      &retlen, kbuf);
 			break;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index e488cf910914..7cd56d2b9419 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -381,6 +381,8 @@ static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
 					  u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->write_user_prot_reg)
+		return -EOPNOTSUPP;
 	return mtd->write_user_prot_reg(mtd, to, len, retlen, buf);
 }
 
-- 
cgit v1.2.3


From e2936b2af5562c8c66060e2bc2ae2e209d0acd3d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 10:45:04 +0200
Subject: mtd: do not use mtd->lock_user_prot_reg directly

Instead, check the -EOPNOTSUPP return code of 'mtd_lock_user_prot_reg()'.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 2 --
 include/linux/mtd/mtd.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 25bbbc3aa665..2020a169ed9c 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -949,8 +949,6 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			return -EINVAL;
 		if (copy_from_user(&oinfo, argp, sizeof(oinfo)))
 			return -EFAULT;
-		if (!mtd->lock_user_prot_reg)
-			return -EOPNOTSUPP;
 		ret = mtd_lock_user_prot_reg(mtd, oinfo.start, oinfo.length);
 		break;
 	}
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7cd56d2b9419..a994129ede55 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -389,6 +389,8 @@ static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
 static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 					 size_t len)
 {
+	if (!mtd->lock_user_prot_reg)
+		return -EOPNOTSUPP;
 	return mtd->lock_user_prot_reg(mtd, from, len);
 }
 
-- 
cgit v1.2.3


From 1dbebd32562b3c2caeca35960e5cb00bfcc12900 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 16:23:41 +0200
Subject: mtd: harmonize mtd_writev usage

This patch makes the 'mtd_writev()' function more usable and logical. We first
teach it to fall-back to the 'default_mtd_writev()' function if the MTD driver
does not define its own '->writev()' method. Then we make block2mtd and JFFS2
just 'mtd_writev()' instead of 'default_mtd_writev()' function. This means we
can now stop exporting 'default_mtd_writev()' and instead, export
'mtd_writev()'. This is much cleaner and more logical, as well as allows us to
get read of another direct 'mtd->writev' access in JFFS2.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/block2mtd.c |  2 +-
 drivers/mtd/mtdcore.c           | 26 +++++++++++++++++++++++---
 fs/jffs2/writev.c               |  6 +-----
 include/linux/mtd/mtd.h         | 16 ++--------------
 4 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index b78f23169d4e..c16f6b4e8938 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -288,7 +288,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
 	dev->mtd.flags = MTD_CAP_RAM;
 	dev->mtd.erase = block2mtd_erase;
 	dev->mtd.write = block2mtd_write;
-	dev->mtd.writev = default_mtd_writev;
+	dev->mtd.writev = mtd_writev;
 	dev->mtd.sync = block2mtd_sync;
 	dev->mtd.read = block2mtd_read;
 	dev->mtd.priv = dev;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 53a200f722b6..4d0f3e557bd1 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -696,8 +696,8 @@ EXPORT_SYMBOL_GPL(__put_mtd_device);
  * This function returns zero in case of success and a negative error code in
  * case of failure.
  */
-int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
-		       unsigned long count, loff_t to, size_t *retlen)
+static int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
+			      unsigned long count, loff_t to, size_t *retlen)
 {
 	unsigned long i;
 	size_t totlen = 0, thislen;
@@ -716,7 +716,27 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	*retlen = totlen;
 	return ret;
 }
-EXPORT_SYMBOL_GPL(default_mtd_writev);
+
+/*
+ * mtd_writev - the vector-based MTD write method
+ * @mtd: mtd device description object pointer
+ * @vecs: the vectors to write
+ * @count: count of vectors in @vecs
+ * @to: the MTD device offset to write to
+ * @retlen: on exit contains the count of bytes written to the MTD device.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
+	       unsigned long count, loff_t to, size_t *retlen)
+{
+	*retlen = 0;
+	if (!mtd->writev)
+		return default_mtd_writev(mtd, vecs, count, to, retlen);
+	return mtd->writev(mtd, vecs, count, to, retlen);
+}
+EXPORT_SYMBOL_GPL(mtd_writev);
 
 /**
  * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size
diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c
index 8d704073f8b0..a1bda9dab3f8 100644
--- a/fs/jffs2/writev.c
+++ b/fs/jffs2/writev.c
@@ -26,11 +26,7 @@ int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs,
 		}
 	}
 
-	if (c->mtd->writev)
-		return mtd_writev(c->mtd, vecs, count, to, retlen);
-	else {
-		return default_mtd_writev(c->mtd, vecs, count, to, retlen);
-	}
+	return mtd_writev(c->mtd, vecs, count, to, retlen);
 }
 
 int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a994129ede55..a58ecf4d1f80 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -394,16 +394,8 @@ static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	return mtd->lock_user_prot_reg(mtd, from, len);
 }
 
-/*
- * kvec-based read/write method. NB: The 'count' parameter is the number of
- * _vectors_, each of which contains an (ofs, len) tuple.
- */
-static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
-			     unsigned long count, loff_t to, size_t *retlen)
-{
-	*retlen = 0;
-	return mtd->writev(mtd, vecs, count, to, retlen);
-}
+int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
+	       unsigned long count, loff_t to, size_t *retlen);
 
 static inline void mtd_sync(struct mtd_info *mtd)
 {
@@ -510,10 +502,6 @@ struct mtd_notifier {
 
 extern void register_mtd_user (struct mtd_notifier *new);
 extern int unregister_mtd_user (struct mtd_notifier *old);
-
-int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
-		       unsigned long count, loff_t to, size_t *retlen);
-
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size);
 
 void mtd_erase_callback(struct erase_info *instr);
-- 
cgit v1.2.3


From 327cf2922b4edf0439b219469722d2a502e37349 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 16:35:35 +0200
Subject: mtd: do not use mtd->sync directly

This patch teaches 'mtd_sync()' to do nothing when the MTD driver does
not have the '->sync()' method, which allows us to remove all direct
'mtd->sync' accesses.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/ftl.c       | 3 +--
 drivers/mtd/mtdblock.c  | 7 ++-----
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/mtdswap.c   | 3 +--
 drivers/mtd/rfd_ftl.c   | 3 +--
 drivers/mtd/ubi/kapi.c  | 4 +---
 fs/jffs2/super.c        | 4 +---
 fs/logfs/dev_mtd.c      | 3 +--
 include/linux/mtd/mtd.h | 3 ++-
 9 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index c9c90299c9e2..19d637266fcd 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -650,8 +650,7 @@ static int reclaim_block(partition_t *part)
 	    if (queued) {
 		pr_debug("ftl_cs: waiting for transfer "
 		      "unit to be prepared...\n");
-		if (part->mbd.mtd->sync)
-			mtd_sync(part->mbd.mtd);
+		mtd_sync(part->mbd.mtd);
 	    } else {
 		static int ne = 0;
 		if (++ne < 5)
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 496e1a6e8029..af6591237b9b 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -322,8 +322,7 @@ static int mtdblock_release(struct mtd_blktrans_dev *mbd)
 
 	if (!--mtdblk->count) {
 		/* It was the last usage. Free the cache */
-		if (mbd->mtd->sync)
-			mtd_sync(mbd->mtd);
+		mtd_sync(mbd->mtd);
 		vfree(mtdblk->cache_data);
 	}
 
@@ -341,9 +340,7 @@ static int mtdblock_flush(struct mtd_blktrans_dev *dev)
 	mutex_lock(&mtdblk->cache_mutex);
 	write_cached_data(mtdblk);
 	mutex_unlock(&mtdblk->cache_mutex);
-
-	if (dev->mtd->sync)
-		mtd_sync(dev->mtd);
+	mtd_sync(dev->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 2020a169ed9c..23a51104aeb5 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -154,7 +154,7 @@ static int mtdchar_close(struct inode *inode, struct file *file)
 	pr_debug("MTD_close\n");
 
 	/* Only sync if opened RW */
-	if ((file->f_mode & FMODE_WRITE) && mtd->sync)
+	if ((file->f_mode & FMODE_WRITE))
 		mtd_sync(mtd);
 
 	iput(mfi->ino);
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 4441c08b082d..fe4426c1c736 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -1047,8 +1047,7 @@ static int mtdswap_flush(struct mtd_blktrans_dev *dev)
 {
 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
 
-	if (d->mtd->sync)
-		mtd_sync(d->mtd);
+	mtd_sync(d->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index 5426d42cdea7..233b946e5d66 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -448,8 +448,7 @@ static int reclaim_block(struct partition *part, u_long *old_sector)
 	int rc;
 
 	/* we have a race if sync doesn't exist */
-	if (part->mbd.mtd->sync)
-		mtd_sync(part->mbd.mtd);
+	mtd_sync(part->mbd.mtd);
 
 	score = 0x7fffffff; /* MAX_INT */
 	best_block = -1;
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 9f265cc1a0d3..9fdb35367fe0 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -714,9 +714,7 @@ int ubi_sync(int ubi_num)
 	if (!ubi)
 		return -ENODEV;
 
-	if (ubi->mtd->sync)
-		mtd_sync(ubi->mtd);
-
+	mtd_sync(ubi->mtd);
 	ubi_put_device(ubi);
 	return 0;
 }
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index e78bf3cd1b73..5863a369d929 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -336,9 +336,7 @@ static void jffs2_put_super (struct super_block *sb)
 	jffs2_flash_cleanup(c);
 	kfree(c->inocache_list);
 	jffs2_clear_xattr_subsystem(c);
-	if (c->mtd->sync)
-		mtd_sync(c->mtd);
-
+	mtd_sync(c->mtd);
 	D1(printk(KERN_DEBUG "jffs2_put_super returning\n"));
 }
 
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 136c7360a9b6..3f465882ee70 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -119,8 +119,7 @@ static void logfs_mtd_sync(struct super_block *sb)
 {
 	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
 
-	if (mtd->sync)
-		mtd_sync(mtd);
+	mtd_sync(mtd);
 }
 
 static int logfs_mtd_readpage(void *_sb, struct page *page)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a58ecf4d1f80..305f12b940f4 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -399,7 +399,8 @@ int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 
 static inline void mtd_sync(struct mtd_info *mtd)
 {
-	mtd->sync(mtd);
+	if (mtd->sync)
+		mtd->sync(mtd);
 }
 
 /* Chip-supported device locking */
-- 
cgit v1.2.3


From 381345652fca688aeaa967c231e5075cf68d05b6 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 17:00:35 +0200
Subject: mtd: do not use mtd->lock, unlock and is_locked directly

Instead, call the corresponding MTD API function which will return
'-EOPNOTSUPP' if the operation is not supported.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/scb2_flash.c |  3 +--
 drivers/mtd/mtdchar.c         | 15 +++------------
 drivers/mtd/mtdconcat.c       | 18 ++++++------------
 drivers/mtd/mtdcore.c         |  6 +++---
 include/linux/mtd/mtd.h       |  6 ++++++
 5 files changed, 19 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/scb2_flash.c b/drivers/mtd/maps/scb2_flash.c
index 01af34778de3..934a72c80078 100644
--- a/drivers/mtd/maps/scb2_flash.c
+++ b/drivers/mtd/maps/scb2_flash.c
@@ -204,8 +204,7 @@ scb2_flash_remove(struct pci_dev *dev)
 		return;
 
 	/* disable flash writes */
-	if (scb2_mtd->lock)
-		mtd_lock(scb2_mtd, 0, scb2_mtd->size);
+	mtd_lock(scb2_mtd, 0, scb2_mtd->size);
 
 	mtd_device_unregister(scb2_mtd);
 	map_destroy(scb2_mtd);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 23a51104aeb5..92da621b1425 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -814,10 +814,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (copy_from_user(&einfo, argp, sizeof(einfo)))
 			return -EFAULT;
 
-		if (!mtd->lock)
-			ret = -EOPNOTSUPP;
-		else
-			ret = mtd_lock(mtd, einfo.start, einfo.length);
+		ret = mtd_lock(mtd, einfo.start, einfo.length);
 		break;
 	}
 
@@ -828,10 +825,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (copy_from_user(&einfo, argp, sizeof(einfo)))
 			return -EFAULT;
 
-		if (!mtd->unlock)
-			ret = -EOPNOTSUPP;
-		else
-			ret = mtd_unlock(mtd, einfo.start, einfo.length);
+		ret = mtd_unlock(mtd, einfo.start, einfo.length);
 		break;
 	}
 
@@ -842,10 +836,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (copy_from_user(&einfo, argp, sizeof(einfo)))
 			return -EFAULT;
 
-		if (!mtd->is_locked)
-			ret = -EOPNOTSUPP;
-		else
-			ret = mtd_is_locked(mtd, einfo.start, einfo.length);
+		ret = mtd_is_locked(mtd, einfo.start, einfo.length);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 9119f76f87ff..aaafb5e18765 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -555,12 +555,9 @@ static int concat_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 		else
 			size = len;
 
-		if (subdev->lock) {
-			err = mtd_lock(subdev, ofs, size);
-			if (err)
-				break;
-		} else
-			err = -EOPNOTSUPP;
+		err = mtd_lock(subdev, ofs, size);
+		if (err)
+			break;
 
 		len -= size;
 		if (len == 0)
@@ -595,12 +592,9 @@ static int concat_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 		else
 			size = len;
 
-		if (subdev->unlock) {
-			err = mtd_unlock(subdev, ofs, size);
-			if (err)
-				break;
-		} else
-			err = -EOPNOTSUPP;
+		err = mtd_unlock(subdev, ofs, size);
+		if (err)
+			break;
 
 		len -= size;
 		if (len == 0)
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 4d0f3e557bd1..66494ee5355a 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -339,9 +339,9 @@ int add_mtd_device(struct mtd_info *mtd)
 	mtd->writesize_mask = (1 << mtd->writesize_shift) - 1;
 
 	/* Some chips always power up locked. Unlock them now */
-	if ((mtd->flags & MTD_WRITEABLE)
-	    && (mtd->flags & MTD_POWERUP_LOCK) && mtd->unlock) {
-		if (mtd_unlock(mtd, 0, mtd->size))
+	if ((mtd->flags & MTD_WRITEABLE) && (mtd->flags & MTD_POWERUP_LOCK)) {
+		error = mtd_unlock(mtd, 0, mtd->size);
+		if (error && error != -EOPNOTSUPP)
 			printk(KERN_WARNING
 			       "%s: unlock failed, writes may not work\n",
 			       mtd->name);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 305f12b940f4..6c91ba59c229 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -406,16 +406,22 @@ static inline void mtd_sync(struct mtd_info *mtd)
 /* Chip-supported device locking */
 static inline int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
+	if (!mtd->lock)
+		return -EOPNOTSUPP;
 	return mtd->lock(mtd, ofs, len);
 }
 
 static inline int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
+	if (!mtd->unlock)
+		return -EOPNOTSUPP;
 	return mtd->unlock(mtd, ofs, len);
 }
 
 static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
+	if (!mtd->is_locked)
+		return -EOPNOTSUPP;
 	return mtd->is_locked(mtd, ofs, len);
 }
 
-- 
cgit v1.2.3


From 079c985e7a6f4ce60f931cebfdd5ee3c38347e31 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 17:15:59 +0200
Subject: mtd: do not use mtd->suspend and mtd->resume directly

Just call the 'mtd_suspend()' and 'mtd_resume()' - they will do nothing
if the operation is not defined.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap.c        | 5 ++---
 drivers/mtd/maps/rbtx4939-flash.c | 5 ++---
 drivers/mtd/mtdcore.c             | 5 +----
 include/linux/mtd/mtd.h           | 5 ++++-
 4 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index d94cc62186c1..abc562653b31 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -190,9 +190,8 @@ static void physmap_flash_shutdown(struct platform_device *dev)
 	int i;
 
 	for (i = 0; i < MAX_RESOURCES && info->mtd[i]; i++)
-		if (info->mtd[i]->suspend && info->mtd[i]->resume)
-			if (mtd_suspend(info->mtd[i]) == 0)
-				mtd_resume(info->mtd[i]);
+		if (mtd_suspend(info->mtd[i]) == 0)
+			mtd_resume(info->mtd[i]);
 }
 #else
 #define physmap_flash_shutdown NULL
diff --git a/drivers/mtd/maps/rbtx4939-flash.c b/drivers/mtd/maps/rbtx4939-flash.c
index 717628312040..3da63fc6f16e 100644
--- a/drivers/mtd/maps/rbtx4939-flash.c
+++ b/drivers/mtd/maps/rbtx4939-flash.c
@@ -119,9 +119,8 @@ static void rbtx4939_flash_shutdown(struct platform_device *dev)
 {
 	struct rbtx4939_flash_info *info = platform_get_drvdata(dev);
 
-	if (info->mtd->suspend && info->mtd->resume)
-		if (mtd_suspend(info->mtd) == 0)
-			mtd_resume(info->mtd);
+	if (mtd_suspend(info->mtd) == 0)
+		mtd_resume(info->mtd);
 }
 #else
 #define rbtx4939_flash_shutdown NULL
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 66494ee5355a..6ae9ca01388b 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -119,10 +119,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 {
 	struct mtd_info *mtd = dev_get_drvdata(dev);
 
-	if (mtd && mtd->suspend)
-		return mtd_suspend(mtd);
-	else
-		return 0;
+	return mtd_suspend(mtd);
 }
 
 static int mtd_cls_resume(struct device *dev)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 6c91ba59c229..089370758fc9 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -427,12 +427,15 @@ static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 
 static inline int mtd_suspend(struct mtd_info *mtd)
 {
+	if (!mtd->suspend)
+		return -EOPNOTSUPP;
 	return mtd->suspend(mtd);
 }
 
 static inline void mtd_resume(struct mtd_info *mtd)
 {
-	mtd->resume(mtd);
+	if (mtd->resume)
+		mtd->resume(mtd);
 }
 
 static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
-- 
cgit v1.2.3


From 8f461a730242c528ca221948edceca49266a3ffb Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 2 Jan 2012 13:48:54 +0200
Subject: mtd: introduce mtd_can_have_bb helper

This patch introduces new 'mtd_can_have_bb()' helper function which checks
whether the flash can have bad eraseblocks. Then it changes all the
direct 'mtd->block_isbad' use cases with 'mtd_can_have_bb()'.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c               | 5 +----
 drivers/mtd/mtdconcat.c             | 2 +-
 drivers/mtd/mtdoops.c               | 4 ++--
 drivers/mtd/mtdswap.c               | 4 ++--
 drivers/mtd/nftlcore.c              | 2 +-
 drivers/mtd/redboot.c               | 4 ++--
 drivers/mtd/tests/mtd_readtest.c    | 3 +--
 drivers/mtd/tests/mtd_speedtest.c   | 3 +--
 drivers/mtd/tests/mtd_stresstest.c  | 3 +--
 drivers/mtd/tests/mtd_torturetest.c | 2 +-
 drivers/mtd/ubi/build.c             | 2 +-
 include/linux/mtd/mtd.h             | 7 +++++++
 12 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 92da621b1425..64efcbf087e9 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -867,10 +867,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 
 		if (copy_from_user(&offs, argp, sizeof(loff_t)))
 			return -EFAULT;
-		if (!mtd->block_isbad)
-			ret = -EOPNOTSUPP;
-		else
-			return mtd_block_isbad(mtd, offs);
+		return mtd_block_isbad(mtd, offs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index aaafb5e18765..fbf3cb124a93 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -647,7 +647,7 @@ static int concat_block_isbad(struct mtd_info *mtd, loff_t ofs)
 	struct mtd_concat *concat = CONCAT(mtd);
 	int i, res = 0;
 
-	if (!concat->subdev[0]->block_isbad)
+	if (!mtd_can_have_bb(concat->subdev[0]))
 		return res;
 
 	if (ofs > mtd->size)
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index c8540b8a7fc6..a4c8f67560e0 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -169,7 +169,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work)
 			cxt->nextpage = 0;
 	}
 
-	while (mtd->block_isbad) {
+	while (mtd_can_have_bb(mtd)) {
 		ret = mtd_block_isbad(mtd, cxt->nextpage * record_size);
 		if (!ret)
 			break;
@@ -257,7 +257,7 @@ static void find_next_position(struct mtdoops_context *cxt)
 	size_t retlen;
 
 	for (page = 0; page < cxt->oops_pages; page++) {
-		if (mtd->block_isbad &&
+		if (mtd_can_have_bb(mtd) &&
 		    mtd_block_isbad(mtd, page * record_size))
 			continue;
 		/* Assume the page is used */
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index fe4426c1c736..3fc8cb2756c0 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -343,7 +343,7 @@ static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 	offset = mtdswap_eb_offset(d, eb);
 
 	/* Check first if the block is bad. */
-	if (d->mtd->block_isbad && mtd_block_isbad(d->mtd, offset))
+	if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
 		return MTDSWAP_SCANNED_BAD;
 
 	ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
@@ -1058,7 +1058,7 @@ static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
 
 	badcnt = 0;
 
-	if (mtd->block_isbad)
+	if (mtd_can_have_bb(mtd))
 		for (offset = 0; offset < size; offset += mtd->erasesize)
 			if (mtd_block_isbad(mtd, offset))
 				badcnt++;
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 8847e60ad167..a75382aff5f6 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -56,7 +56,7 @@ static void nftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	if (memcmp(mtd->name, "DiskOnChip", 10))
 		return;
 
-	if (!mtd->block_isbad) {
+	if (!mtd_can_have_bb(mtd)) {
 		printk(KERN_ERR
 "NFTL no longer supports the old DiskOnChip drivers loaded via docprobe.\n"
 "Please use the new diskonchip driver under the NAND subsystem.\n");
diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c
index 09bb81ea3a7e..48970c14beff 100644
--- a/drivers/mtd/redboot.c
+++ b/drivers/mtd/redboot.c
@@ -78,7 +78,7 @@ static int parse_redboot_partitions(struct mtd_info *master,
 
 	if ( directory < 0 ) {
 		offset = master->size + directory * master->erasesize;
-		while (master->block_isbad && 
+		while (mtd_can_have_bb(master) &&
 		       mtd_block_isbad(master, offset)) {
 			if (!offset) {
 			nogood:
@@ -89,7 +89,7 @@ static int parse_redboot_partitions(struct mtd_info *master,
 		}
 	} else {
 		offset = directory * master->erasesize;
-		while (master->block_isbad && 
+		while (mtd_can_have_bb(master) &&
 		       mtd_block_isbad(master, offset)) {
 			offset += master->erasesize;
 			if (offset == master->size)
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
index 4228eb4e54c7..121aba189cec 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/mtd_readtest.c
@@ -148,8 +148,7 @@ static int scan_for_bad_eraseblocks(void)
 		return -ENOMEM;
 	}
 
-	/* NOR flash does not implement block_isbad */
-	if (mtd->block_isbad == NULL)
+	if (!mtd_can_have_bb(mtd))
 		return 0;
 
 	printk(PRINT_PREF "scanning for bad eraseblocks\n");
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 4d2ed5c0807d..2aec4f3b72be 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -336,8 +336,7 @@ static int scan_for_bad_eraseblocks(void)
 		return -ENOMEM;
 	}
 
-	/* NOR flash does not implement block_isbad */
-	if (mtd->block_isbad == NULL)
+	if (!mtd_can_have_bb(mtd))
 		goto out;
 
 	printk(PRINT_PREF "scanning for bad eraseblocks\n");
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index 399aa2bf220d..7b33f22d0b58 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -227,8 +227,7 @@ static int scan_for_bad_eraseblocks(void)
 		return -ENOMEM;
 	}
 
-	/* NOR flash does not implement block_isbad */
-	if (mtd->block_isbad == NULL)
+	if (!mtd_can_have_bb(mtd))
 		return 0;
 
 	printk(PRINT_PREF "scanning for bad eraseblocks\n");
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index 557105f2ead3..b65861bc7b8e 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -290,7 +290,7 @@ static int __init tort_init(void)
 	 * Check if there is a bad eraseblock among those we are going to test.
 	 */
 	memset(&bad_ebs[0], 0, sizeof(int) * ebcnt);
-	if (mtd->block_isbad) {
+	if (mtd_can_have_bb(mtd)) {
 		for (i = eb; i < eb + ebcnt; i++) {
 			err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize);
 
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 6c3fb5ab20f5..115749f20f9e 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -664,7 +664,7 @@ static int io_init(struct ubi_device *ubi)
 	ubi->peb_count  = mtd_div_by_eb(ubi->mtd->size, ubi->mtd);
 	ubi->flash_size = ubi->mtd->size;
 
-	if (ubi->mtd->block_isbad && ubi->mtd->block_markbad)
+	if (mtd_can_have_bb(ubi->mtd))
 		ubi->bad_allowed = 1;
 
 	if (ubi->mtd->type == MTD_NORFLASH) {
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 089370758fc9..7e35755f6931 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -440,6 +440,8 @@ static inline void mtd_resume(struct mtd_info *mtd)
 
 static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
 {
+	if (!mtd->block_isbad)
+		return -EOPNOTSUPP;
 	return mtd->block_isbad(mtd, ofs);
 }
 
@@ -483,6 +485,11 @@ static inline int mtd_has_oob(const struct mtd_info *mtd)
 	return mtd->read_oob && mtd->write_oob;
 }
 
+static inline int mtd_can_have_bb(const struct mtd_info *mtd)
+{
+	return !!mtd->block_isbad;
+}
+
 	/* Kernel-side ioctl definitions */
 
 struct mtd_partition;
-- 
cgit v1.2.3


From 800ffd3496987e91f599a135060ef49731e045ac Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 2 Jan 2012 13:59:12 +0200
Subject: mtd: do not use mtd->block_markbad directly

Instead, use the new 'mtd_can_have_bb()', or just rely on 'mtd_block_markbad()'
return code, which will be -EOPNOTSUPP if bad blocks are not supported.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 5 +----
 drivers/mtd/mtdconcat.c | 2 +-
 drivers/mtd/mtdoops.c   | 2 +-
 drivers/mtd/mtdswap.c   | 2 +-
 fs/jffs2/wbuf.c         | 3 ---
 include/linux/mtd/mtd.h | 2 ++
 6 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 64efcbf087e9..50c6a1e7f675 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -877,10 +877,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 
 		if (copy_from_user(&offs, argp, sizeof(loff_t)))
 			return -EFAULT;
-		if (!mtd->block_markbad)
-			ret = -EOPNOTSUPP;
-		else
-			return mtd_block_markbad(mtd, offs);
+		return mtd_block_markbad(mtd, offs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index fbf3cb124a93..1ed5103b219b 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -673,7 +673,7 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	struct mtd_concat *concat = CONCAT(mtd);
 	int i, err = -EINVAL;
 
-	if (!concat->subdev[0]->block_markbad)
+	if (!mtd_can_have_bb(concat->subdev[0]))
 		return 0;
 
 	if (ofs > mtd->size)
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index a4c8f67560e0..db8e8272d69b 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -199,7 +199,7 @@ badblock:
 		return;
 	}
 
-	if (mtd->block_markbad && ret == -EIO) {
+	if (mtd_can_have_bb(mtd) && ret == -EIO) {
 		ret = mtd_block_markbad(mtd, cxt->nextpage * record_size);
 		if (ret < 0) {
 			printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n");
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 3fc8cb2756c0..c92f0f6bc130 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -274,7 +274,7 @@ static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
 	eb->root = NULL;
 
 	/* badblocks not supported */
-	if (!d->mtd->block_markbad)
+	if (!mtd_can_have_bb(d->mtd))
 		return 1;
 
 	offset = mtdswap_eb_offset(d, eb);
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index fd96b757433f..30e8f47e8a23 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1130,9 +1130,6 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *
 	if( ++jeb->bad_count < MAX_ERASE_FAILURES)
 		return 0;
 
-	if (!c->mtd->block_markbad)
-		return 1; // What else can we do?
-
 	printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset);
 	ret = mtd_block_markbad(c->mtd, bad_offset);
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7e35755f6931..1a81fde8f333 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -447,6 +447,8 @@ static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
 
 static inline int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs)
 {
+	if (!mtd->block_markbad)
+		return -EOPNOTSUPP;
 	return mtd->block_markbad(mtd, ofs);
 }
 
-- 
cgit v1.2.3


From 2429f7ac2ef429378536d87fcbbf6f424aa5b47f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 9 Jan 2012 06:36:54 +0000
Subject: net: introduce netif_addr_lock_nested() and call if when appropriate

dev_uc_sync() and dev_mc_sync() are acquiring netif_addr_lock for
destination device of synchronization. Since netif_addr_lock is
already held at the time for source device, this triggers lockdep
deadlock warning.

There's no way this deadlock can happen so use spin_lock_nested() to
silence the warning.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  5 +++++
 net/core/dev_addr_lists.c | 12 ++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a1d109590da4..d0522bb2d4a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2450,6 +2450,11 @@ static inline void netif_addr_lock(struct net_device *dev)
 	spin_lock(&dev->addr_list_lock);
 }
 
+static inline void netif_addr_lock_nested(struct net_device *dev)
+{
+	spin_lock_nested(&dev->addr_list_lock, SINGLE_DEPTH_NESTING);
+}
+
 static inline void netif_addr_lock_bh(struct net_device *dev)
 {
 	spin_lock_bh(&dev->addr_list_lock);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index c34ce9f9c976..29c07fef9228 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -439,11 +439,11 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_bh(to);
+	netif_addr_lock_nested(to);
 	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
+	netif_addr_unlock(to);
 	return err;
 }
 EXPORT_SYMBOL(dev_uc_sync);
@@ -463,7 +463,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
 		return;
 
 	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
+	netif_addr_lock_nested(to);
 	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
 	__dev_set_rx_mode(to);
 	netif_addr_unlock(to);
@@ -602,11 +602,11 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_bh(to);
+	netif_addr_lock_nested(to);
 	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
+	netif_addr_unlock(to);
 	return err;
 }
 EXPORT_SYMBOL(dev_mc_sync);
@@ -626,7 +626,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
 		return;
 
 	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
+	netif_addr_lock_nested(to);
 	__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
 	__dev_set_rx_mode(to);
 	netif_addr_unlock(to);
-- 
cgit v1.2.3


From 3969eb3859e4fad4b32ca8f96d4ec8551c20704a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 9 Jan 2012 13:44:23 -0800
Subject: net: Fix build with INET disabled.

> net/core/sock.c: In function 'sk_update_clone':
> net/core/sock.c:1278:3: error: implicit declaration of function 'sock_update_memcg'

Reported-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h | 2 --
 net/core/sock.c            | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9b296ea41bb8..f944591765eb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -390,7 +390,6 @@ enum {
 	OVER_LIMIT,
 };
 
-#ifdef CONFIG_INET
 struct sock;
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
 void sock_update_memcg(struct sock *sk);
@@ -403,6 +402,5 @@ static inline void sock_release_memcg(struct sock *sk)
 {
 }
 #endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
-#endif /* CONFIG_INET */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/net/core/sock.c b/net/core/sock.c
index c3ae73de0239..5c5af9988f94 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -112,6 +112,7 @@
 #include <linux/highmem.h>
 #include <linux/user_namespace.h>
 #include <linux/jump_label.h>
+#include <linux/memcontrol.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-- 
cgit v1.2.3


From 4dee6b64ee7cfef94b47733c6d9fef07f8051c7c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 9 Jan 2012 17:15:42 -0500
Subject: tracing/mm: Move include of trace/events/kmem.h out of header into
 slab.c

Including trace/events/*.h TRACE_EVENT() macro headers in other headers
can cause strange side effects if another trace/event/*.h header
includes that header.  Having trace/events/kmem.h inside slab_def.h
caused a compile error in sparc64 when changes were done to some header
files.  Moving the kmem.h trace header out of slab.h and into slab.c
fixes the problem.

Note, both slub.c and slob.c already include the trace/events/kmem.h
file. Only slab.c had it missing.

Link: http://lkml.kernel.org/r/20120105190405.1e3191fb5a43b2a0f1655e1f@canb.auug.org.au

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab_def.h | 2 --
 mm/slab.c                | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index d00e0bacda93..fbd1117fdfde 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -15,8 +15,6 @@
 #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
 
-#include <trace/events/kmem.h>
-
 /*
  * struct kmem_cache
  *
diff --git a/mm/slab.c b/mm/slab.c
index 83311c9aaf9d..2acfa0d90943 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -121,6 +121,8 @@
 #include	<asm/tlbflush.h>
 #include	<asm/page.h>
 
+#include <trace/events/kmem.h>
+
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
  *		  0 for faster, smaller code (especially in the critical paths).
-- 
cgit v1.2.3


From adc0e91ab142abe93f5b0d7980ada8a7676231fe Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 8 Jan 2012 16:49:21 -0500
Subject: vfs: new helper - d_make_root()

d_alloc_root() with iput() in case of allocation failure...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 17 +++++++++++++++++
 include/linux/dcache.h |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index b209d73f9a98..3c6d3113a255 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1440,6 +1440,23 @@ struct dentry * d_alloc_root(struct inode * root_inode)
 }
 EXPORT_SYMBOL(d_alloc_root);
 
+struct dentry *d_make_root(struct inode *root_inode)
+{
+	struct dentry *res = NULL;
+
+	if (root_inode) {
+		static const struct qstr name = { .name = "/", .len = 1 };
+
+		res = __d_alloc(root_inode->i_sb, &name);
+		if (res)
+			d_instantiate(res, root_inode);
+		else
+			iput(root_inode);
+	}
+	return res;
+}
+EXPORT_SYMBOL(d_make_root);
+
 static struct dentry * __d_find_any_alias(struct inode *inode)
 {
 	struct dentry *alias;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ed9f74f6c519..a47bda5f76db 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -249,6 +249,7 @@ extern int d_invalidate(struct dentry *);
 
 /* only used at mount-time */
 extern struct dentry * d_alloc_root(struct inode *);
+extern struct dentry * d_make_root(struct inode *);
 
 /* <clickety>-<click> the ramfs-type tree */
 extern void d_genocide(struct dentry *);
-- 
cgit v1.2.3


From eaf5f9073533cde21c7121c136f1c3f072d9cf59 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 10 Jan 2012 18:22:25 +0100
Subject: fix shrink_dcache_parent() livelock

Two (or more) concurrent calls of shrink_dcache_parent() on the same dentry may
cause shrink_dcache_parent() to loop forever.

Here's what appears to happen:

1 - CPU0: select_parent(P) finds C and puts it on dispose list, returns 1

2 - CPU1: select_parent(P) locks P->d_lock

3 - CPU0: shrink_dentry_list() locks C->d_lock
   dentry_kill(C) tries to lock P->d_lock but fails, unlocks C->d_lock

4 - CPU1: select_parent(P) locks C->d_lock,
         moves C from dispose list being processed on CPU0 to the new
dispose list, returns 1

5 - CPU0: shrink_dentry_list() finds dispose list empty, returns

6 - Goto 2 with CPU0 and CPU1 switched

Basically select_parent() steals the dentry from shrink_dentry_list() and thinks
it found a new one, causing shrink_dentry_list() to think it's making progress
and loop over and over.

One way to trigger this is to make udev calls stat() on the sysfs file while it
is going away.

Having a file in /lib/udev/rules.d/ with only this one rule seems to the trick:

ATTR{vendor}=="0x8086", ATTR{device}=="0x10ca", ENV{PCI_SLOT_NAME}="%k", ENV{MATCHADDR}="$attr{address}", RUN+="/bin/true"

Then execute the following loop:

while true; do
        echo -bond0 > /sys/class/net/bonding_masters
        echo +bond0 > /sys/class/net/bonding_masters
        echo -bond1 > /sys/class/net/bonding_masters
        echo +bond1 > /sys/class/net/bonding_masters
done

One fix would be to check all callers and prevent concurrent calls to
shrink_dcache_parent().  But I think a better solution is to stop the
stealing behavior.

This patch adds a new dentry flag that is set when the dentry is added to the
dispose list.  The flag is cleared in dentry_lru_del() in case the dentry gets a
new reference just before being pruned.

If the dentry has this flag, select_parent() will skip it and let
shrink_dentry_list() retry pruning it.  With select_parent() skipping those
dentries there will not be the appearance of progress (new dentries found) when
there is none, hence shrink_dcache_parent() will not loop forever.

Set the flag is also set in prune_dcache_sb() for consistency as suggested by
Linus.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 15 +++++++++++----
 include/linux/dcache.h |  1 +
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 3c6d3113a255..616fedff011a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -243,6 +243,7 @@ static void dentry_lru_add(struct dentry *dentry)
 static void __dentry_lru_del(struct dentry *dentry)
 {
 	list_del_init(&dentry->d_lru);
+	dentry->d_flags &= ~DCACHE_SHRINK_LIST;
 	dentry->d_sb->s_nr_dentry_unused--;
 	dentry_stat.nr_unused--;
 }
@@ -806,6 +807,7 @@ relock:
 			spin_unlock(&dentry->d_lock);
 		} else {
 			list_move_tail(&dentry->d_lru, &tmp);
+			dentry->d_flags |= DCACHE_SHRINK_LIST;
 			spin_unlock(&dentry->d_lock);
 			if (!--count)
 				break;
@@ -1097,14 +1099,19 @@ resume:
 
 		/*
 		 * move only zero ref count dentries to the dispose list.
+		 *
+		 * Those which are presently on the shrink list, being processed
+		 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
+		 * loop in shrink_dcache_parent() might not make any progress
+		 * and loop forever.
 		 */
-		if (!dentry->d_count) {
+		if (dentry->d_count) {
+			dentry_lru_del(dentry);
+		} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
 			dentry_lru_move_list(dentry, dispose);
+			dentry->d_flags |= DCACHE_SHRINK_LIST;
 			found++;
-		} else {
-			dentry_lru_del(dentry);
 		}
-
 		/*
 		 * We can return to the caller if we have found some (this
 		 * ensures forward progress). We'll be coming back to find
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a47bda5f76db..31f73220e7d7 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -203,6 +203,7 @@ struct dentry_operations {
 
 #define DCACHE_CANT_MOUNT	0x0100
 #define DCACHE_GENOCIDE		0x0200
+#define DCACHE_SHRINK_LIST	0x0400
 
 #define DCACHE_NFSFS_RENAMED	0x1000
      /* this dentry has been "silly renamed" and has to be deleted on the last
-- 
cgit v1.2.3


From 1edf223485c42c99655dcd001db1e46ad5e5d2d7 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 10 Jan 2012 15:06:57 -0800
Subject: mm/page-writeback.c: make determine_dirtyable_memory static again

The tracing ring-buffer used this function briefly, but not anymore.
Make it local to the writeback code again.

Also, move the function so that no forward declaration needs to be
reintroduced.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/writeback.h |   2 -
 mm/page-writeback.c       | 122 +++++++++++++++++++++++-----------------------
 2 files changed, 60 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a378c295851f..34a005515fef 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -138,8 +138,6 @@ extern int vm_highmem_is_dirtyable;
 extern int block_dump;
 extern int laptop_mode;
 
-extern unsigned long determine_dirtyable_memory(void);
-
 extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8616ef3025a4..c081bf62202b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -129,6 +129,66 @@ unsigned long global_dirty_limit;
  */
 static struct prop_descriptor vm_completions;
 
+/*
+ * Work out the current dirty-memory clamping and background writeout
+ * thresholds.
+ *
+ * The main aim here is to lower them aggressively if there is a lot of mapped
+ * memory around.  To avoid stressing page reclaim with lots of unreclaimable
+ * pages.  It is better to clamp down on writers than to start swapping, and
+ * performing lots of scanning.
+ *
+ * We only allow 1/2 of the currently-unmapped memory to be dirtied.
+ *
+ * We don't permit the clamping level to fall below 5% - that is getting rather
+ * excessive.
+ *
+ * We make sure that the background writeout level is below the adjusted
+ * clamping level.
+ */
+static unsigned long highmem_dirtyable_memory(unsigned long total)
+{
+#ifdef CONFIG_HIGHMEM
+	int node;
+	unsigned long x = 0;
+
+	for_each_node_state(node, N_HIGH_MEMORY) {
+		struct zone *z =
+			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
+
+		x += zone_page_state(z, NR_FREE_PAGES) +
+		     zone_reclaimable_pages(z);
+	}
+	/*
+	 * Make sure that the number of highmem pages is never larger
+	 * than the number of the total dirtyable memory. This can only
+	 * occur in very strange VM situations but we want to make sure
+	 * that this does not occur.
+	 */
+	return min(x, total);
+#else
+	return 0;
+#endif
+}
+
+/**
+ * determine_dirtyable_memory - amount of memory that may be used
+ *
+ * Returns the numebr of pages that can currently be freed and used
+ * by the kernel for direct mappings.
+ */
+static unsigned long determine_dirtyable_memory(void)
+{
+	unsigned long x;
+
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
+
+	if (!vm_highmem_is_dirtyable)
+		x -= highmem_dirtyable_memory(x);
+
+	return x + 1;	/* Ensure that we never return 0 */
+}
+
 /*
  * couple the period to the dirty_ratio:
  *
@@ -196,7 +256,6 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
 	return ret;
 }
 
-
 int dirty_bytes_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
@@ -291,67 +350,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 }
 EXPORT_SYMBOL(bdi_set_max_ratio);
 
-/*
- * Work out the current dirty-memory clamping and background writeout
- * thresholds.
- *
- * The main aim here is to lower them aggressively if there is a lot of mapped
- * memory around.  To avoid stressing page reclaim with lots of unreclaimable
- * pages.  It is better to clamp down on writers than to start swapping, and
- * performing lots of scanning.
- *
- * We only allow 1/2 of the currently-unmapped memory to be dirtied.
- *
- * We don't permit the clamping level to fall below 5% - that is getting rather
- * excessive.
- *
- * We make sure that the background writeout level is below the adjusted
- * clamping level.
- */
-
-static unsigned long highmem_dirtyable_memory(unsigned long total)
-{
-#ifdef CONFIG_HIGHMEM
-	int node;
-	unsigned long x = 0;
-
-	for_each_node_state(node, N_HIGH_MEMORY) {
-		struct zone *z =
-			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
-
-		x += zone_page_state(z, NR_FREE_PAGES) +
-		     zone_reclaimable_pages(z);
-	}
-	/*
-	 * Make sure that the number of highmem pages is never larger
-	 * than the number of the total dirtyable memory. This can only
-	 * occur in very strange VM situations but we want to make sure
-	 * that this does not occur.
-	 */
-	return min(x, total);
-#else
-	return 0;
-#endif
-}
-
-/**
- * determine_dirtyable_memory - amount of memory that may be used
- *
- * Returns the numebr of pages that can currently be freed and used
- * by the kernel for direct mappings.
- */
-unsigned long determine_dirtyable_memory(void)
-{
-	unsigned long x;
-
-	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
-
-	if (!vm_highmem_is_dirtyable)
-		x -= highmem_dirtyable_memory(x);
-
-	return x + 1;	/* Ensure that we never return 0 */
-}
-
 static unsigned long dirty_freerun_ceiling(unsigned long thresh,
 					   unsigned long bg_thresh)
 {
-- 
cgit v1.2.3


From cc59850ef940e4ee6a765d28b439b9bafe07cf63 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 10 Jan 2012 15:07:04 -0800
Subject: mm: add free_hot_cold_page_list() helper

This patch adds helper free_hot_cold_page_list() to free list of 0-order
pages.  It frees pages directly from list without temporary page-vector.
It also calls trace_mm_pagevec_free() to simulate pagevec_free()
behaviour.

bloat-o-meter:

add/remove: 1/1 grow/shrink: 1/3 up/down: 267/-295 (-28)
function                                     old     new   delta
free_hot_cold_page_list                        -     264    +264
get_page_from_freelist                      2129    2132      +3
__pagevec_free                               243     239      -4
split_free_page                              380     373      -7
release_pages                                606     510     -96
free_page_list                               188       -    -188

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  1 +
 mm/page_alloc.c     | 13 +++++++++++++
 mm/swap.c           | 14 +++-----------
 mm/vmscan.c         | 20 +-------------------
 4 files changed, 18 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 3a76faf6a3ee..656295865d58 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -358,6 +358,7 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_cold_page(struct page *page, int cold);
+extern void free_hot_cold_page_list(struct list_head *list, int cold);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7990ca154d1b..cd0c95c6cc9e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1188,6 +1188,19 @@ out:
 	local_irq_restore(flags);
 }
 
+/*
+ * Free a list of 0-order pages
+ */
+void free_hot_cold_page_list(struct list_head *list, int cold)
+{
+	struct page *page, *next;
+
+	list_for_each_entry_safe(page, next, list, lru) {
+		trace_mm_pagevec_free(page, cold);
+		free_hot_cold_page(page, cold);
+	}
+}
+
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
diff --git a/mm/swap.c b/mm/swap.c
index a91caf754d9b..67a09a633a09 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -585,11 +585,10 @@ int lru_add_drain_all(void)
 void release_pages(struct page **pages, int nr, int cold)
 {
 	int i;
-	struct pagevec pages_to_free;
+	LIST_HEAD(pages_to_free);
 	struct zone *zone = NULL;
 	unsigned long uninitialized_var(flags);
 
-	pagevec_init(&pages_to_free, cold);
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 
@@ -620,19 +619,12 @@ void release_pages(struct page **pages, int nr, int cold)
 			del_page_from_lru(zone, page);
 		}
 
-		if (!pagevec_add(&pages_to_free, page)) {
-			if (zone) {
-				spin_unlock_irqrestore(&zone->lru_lock, flags);
-				zone = NULL;
-			}
-			__pagevec_free(&pages_to_free);
-			pagevec_reinit(&pages_to_free);
-  		}
+		list_add(&page->lru, &pages_to_free);
 	}
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
-	pagevec_free(&pages_to_free);
+	free_hot_cold_page_list(&pages_to_free, cold);
 }
 EXPORT_SYMBOL(release_pages);
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 753a2dc300b9..3d571df41c79 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -734,24 +734,6 @@ static enum page_references page_check_references(struct page *page,
 	return PAGEREF_RECLAIM;
 }
 
-static noinline_for_stack void free_page_list(struct list_head *free_pages)
-{
-	struct pagevec freed_pvec;
-	struct page *page, *tmp;
-
-	pagevec_init(&freed_pvec, 1);
-
-	list_for_each_entry_safe(page, tmp, free_pages, lru) {
-		list_del(&page->lru);
-		if (!pagevec_add(&freed_pvec, page)) {
-			__pagevec_free(&freed_pvec);
-			pagevec_reinit(&freed_pvec);
-		}
-	}
-
-	pagevec_free(&freed_pvec);
-}
-
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -1015,7 +997,7 @@ keep_lumpy:
 	if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
 		zone_set_flag(zone, ZONE_CONGESTED);
 
-	free_page_list(&free_pages);
+	free_hot_cold_page_list(&free_pages, 1);
 
 	list_splice(&ret_pages, page_list);
 	count_vm_events(PGACTIVATE, pgactivate);
-- 
cgit v1.2.3


From da066ad3570b88e7dee82e76a06ee9a7adffcf0d Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 10 Jan 2012 15:07:06 -0800
Subject: mm: remove unused pagevec_free

It not exported and now nobody uses it.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagevec.h |  7 -------
 mm/page_alloc.c         | 10 ----------
 2 files changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index bab82f4c571c..ed17024d2ebe 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -21,7 +21,6 @@ struct pagevec {
 };
 
 void __pagevec_release(struct pagevec *pvec);
-void __pagevec_free(struct pagevec *pvec);
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 void pagevec_strip(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
@@ -67,12 +66,6 @@ static inline void pagevec_release(struct pagevec *pvec)
 		__pagevec_release(pvec);
 }
 
-static inline void pagevec_free(struct pagevec *pvec)
-{
-	if (pagevec_count(pvec))
-		__pagevec_free(pvec);
-}
-
 static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
 {
 	____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cd0c95c6cc9e..6c77efbca5bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2319,16 +2319,6 @@ unsigned long get_zeroed_page(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(get_zeroed_page);
 
-void __pagevec_free(struct pagevec *pvec)
-{
-	int i = pagevec_count(pvec);
-
-	while (--i >= 0) {
-		trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
-		free_hot_cold_page(pvec->pages[i], pvec->cold);
-	}
-}
-
 void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
-- 
cgit v1.2.3


From f90ac3982a78d36f894824636beeef13361d7c59 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 10 Jan 2012 15:07:15 -0800
Subject: mm: avoid livelock on !__GFP_FS allocations

Colin Cross reported;

  Under the following conditions, __alloc_pages_slowpath can loop forever:
  gfp_mask & __GFP_WAIT is true
  gfp_mask & __GFP_FS is false
  reclaim and compaction make no progress
  order <= PAGE_ALLOC_COSTLY_ORDER

  These conditions happen very often during suspend and resume,
  when pm_restrict_gfp_mask() effectively converts all GFP_KERNEL
  allocations into __GFP_WAIT.

  The oom killer is not run because gfp_mask & __GFP_FS is false,
  but should_alloc_retry will always return true when order is less
  than PAGE_ALLOC_COSTLY_ORDER.

In his fix, he avoided retrying the allocation if reclaim made no progress
and __GFP_FS was not set.  The problem is that this would result in
GFP_NOIO allocations failing that previously succeeded which would be very
unfortunate.

The big difference between GFP_NOIO and suspend converting GFP_KERNEL to
behave like GFP_NOIO is that normally flushers will be cleaning pages and
kswapd reclaims pages allowing GFP_NOIO to succeed after a short delay.
The same does not necessarily apply during suspend as the storage device
may be suspended.

This patch special cases the suspend case to fail the page allocation if
reclaim cannot make progress and adds some documentation on how
gfp_allowed_mask is currently used.  Failing allocations like this may
cause suspend to abort but that is better than a livelock.

[mgorman@suse.de: Rework fix to be suspend specific]
[rientjes@google.com: Move suspended device check to should_alloc_retry]
Reported-by: Colin Cross <ccross@android.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 16 ++++++++++++++++
 mm/page_alloc.c     | 30 ++++++++++++++++++++++--------
 mm/swapfile.c       |  6 +++---
 3 files changed, 41 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 656295865d58..91812df1351a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -368,9 +368,25 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
 void drain_all_pages(void);
 void drain_local_pages(void *dummy);
 
+/*
+ * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
+ * GFP flags are used before interrupts are enabled. Once interrupts are
+ * enabled, it is set to __GFP_BITS_MASK while the system is running. During
+ * hibernation, it is used by PM to avoid I/O during memory allocation while
+ * devices are suspended.
+ */
 extern gfp_t gfp_allowed_mask;
 
 extern void pm_restrict_gfp_mask(void);
 extern void pm_restore_gfp_mask(void);
 
+#ifdef CONFIG_PM_SLEEP
+extern bool pm_suspended_storage(void);
+#else
+static inline bool pm_suspended_storage(void)
+{
+	return false;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 #endif /* __LINUX_GFP_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 671e6c94fed7..3cba4b67203f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -127,6 +127,13 @@ void pm_restrict_gfp_mask(void)
 	saved_gfp_mask = gfp_allowed_mask;
 	gfp_allowed_mask &= ~GFP_IOFS;
 }
+
+bool pm_suspended_storage(void)
+{
+	if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
+		return false;
+	return true;
+}
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -1786,12 +1793,25 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 
 static inline int
 should_alloc_retry(gfp_t gfp_mask, unsigned int order,
+				unsigned long did_some_progress,
 				unsigned long pages_reclaimed)
 {
 	/* Do not loop if specifically requested */
 	if (gfp_mask & __GFP_NORETRY)
 		return 0;
 
+	/* Always retry if specifically requested */
+	if (gfp_mask & __GFP_NOFAIL)
+		return 1;
+
+	/*
+	 * Suspend converts GFP_KERNEL to __GFP_WAIT which can prevent reclaim
+	 * making forward progress without invoking OOM. Suspend also disables
+	 * storage devices so kswapd will not help. Bail if we are suspending.
+	 */
+	if (!did_some_progress && pm_suspended_storage())
+		return 0;
+
 	/*
 	 * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER
 	 * means __GFP_NOFAIL, but that may not be true in other
@@ -1810,13 +1830,6 @@ should_alloc_retry(gfp_t gfp_mask, unsigned int order,
 	if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order))
 		return 1;
 
-	/*
-	 * Don't let big-order allocations loop unless the caller
-	 * explicitly requests that.
-	 */
-	if (gfp_mask & __GFP_NOFAIL)
-		return 1;
-
 	return 0;
 }
 
@@ -2209,7 +2222,8 @@ rebalance:
 
 	/* Check if we should retry the allocation */
 	pages_reclaimed += did_some_progress;
-	if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
+	if (should_alloc_retry(gfp_mask, order, did_some_progress,
+						pages_reclaimed)) {
 		/* Wait for some write requests to complete then retry */
 		wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
 		goto rebalance;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b1cd12060723..9520592d4231 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -667,10 +667,10 @@ int try_to_free_swap(struct page *page)
 	 * original page might be freed under memory pressure, then
 	 * later read back in from swap, now with the wrong data.
 	 *
-	 * Hibernation clears bits from gfp_allowed_mask to prevent
-	 * memory reclaim from writing to disk, so check that here.
+	 * Hibration suspends storage while it is writing the image
+	 * to disk so check that here.
 	 */
-	if (!(gfp_allowed_mask & __GFP_IO))
+	if (pm_suspended_storage())
 		return 0;
 
 	delete_from_swap_cache(page);
-- 
cgit v1.2.3


From 1399ff86f2a2bbacbbe68fa00c5f8c752b344723 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 10 Jan 2012 15:07:25 -0800
Subject: kernel.h: add BUILD_BUG() macro

We can place this in definitions that we expect the compiler to remove by
dead code elimination.  If this assertion fails, we get a nice error
message at build time.

The GCC function attribute error("message") was added in version 4.3, so
we define a new macro __linktime_error(message) to expand to this for
GCC-4.3 and later.  This will give us an error diagnostic from the
compiler on the line that fails.  For other compilers
__linktime_error(message) expands to nothing, and we have to be content
with a link time error, but at least we will still get a build error.

BUILD_BUG() expands to the undefined function __build_bug_failed() and
will fail at link time if the compiler ever emits code for it.  On GCC-4.3
and later, attribute((error())) is used so that the failure will be noted
at compile time instead.

Signed-off-by: David Daney <david.daney@cavium.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: DM <dm.n9107@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h |  1 +
 include/linux/compiler.h      |  4 +++-
 include/linux/kernel.h        | 16 ++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index dfadc96e9d63..2f4079175afb 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -29,6 +29,7 @@
    the kernel context */
 #define __cold			__attribute__((__cold__))
 
+#define __linktime_error(message) __attribute__((__error__(message)))
 
 #if __GNUC_MINOR__ >= 5
 /*
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 320d6c94ff84..4a243546d142 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -293,7 +293,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #ifndef __compiletime_error
 # define __compiletime_error(message)
 #endif
-
+#ifndef __linktime_error
+# define __linktime_error(message)
+#endif
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e8b1597b5cf2..f48e8a528544 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -665,6 +665,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 #define BUILD_BUG_ON_ZERO(e) (0)
 #define BUILD_BUG_ON_NULL(e) ((void*)0)
 #define BUILD_BUG_ON(condition)
+#define BUILD_BUG() (0)
 #else /* __CHECKER__ */
 
 /* Force a compilation error if a constant expression is not a power of 2 */
@@ -703,6 +704,21 @@ extern int __build_bug_on_failed;
 		if (condition) __build_bug_on_failed = 1;	\
 	} while(0)
 #endif
+
+/**
+ * BUILD_BUG - break compile if used.
+ *
+ * If you have some code that you expect the compiler to eliminate at
+ * build time, you should use BUILD_BUG to detect if it is
+ * unexpectedly used.
+ */
+#define BUILD_BUG()						\
+	do {							\
+		extern void __build_bug_failed(void)		\
+			__linktime_error("BUILD_BUG failed");	\
+		__build_bug_failed();				\
+	} while (0)
+
 #endif	/* __CHECKER__ */
 
 /* Trap pasters of __FUNCTION__ at compile-time */
-- 
cgit v1.2.3


From c0a32fc5a2e470d0b02597b23ad79a317735253e Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 10 Jan 2012 15:07:28 -0800
Subject: mm: more intensive memory corruption debugging

With CONFIG_DEBUG_PAGEALLOC configured, the CPU will generate an exception
on access (read,write) to an unallocated page, which permits us to catch
code which corrupts memory.  However the kernel is trying to maximise
memory usage, hence there are usually few free pages in the system and
buggy code usually corrupts some crucial data.

This patch changes the buddy allocator to keep more free/protected pages
and to interlace free/protected and allocated pages to increase the
probability of catching corruption.

When the kernel is compiled with CONFIG_DEBUG_PAGEALLOC,
debug_guardpage_minorder defines the minimum order used by the page
allocator to grant a request.  The requested size will be returned with
the remaining pages used as guard pages.

The default value of debug_guardpage_minorder is zero: no change from
current behaviour.

[akpm@linux-foundation.org: tweak documentation, s/flg/flag/]
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt | 19 ++++++++++
 include/linux/mm.h                  | 17 +++++++++
 include/linux/page-debug-flags.h    |  4 +-
 mm/Kconfig.debug                    |  5 +++
 mm/page_alloc.c                     | 75 ++++++++++++++++++++++++++++++++++---
 5 files changed, 113 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7b2e5c5eefa6..7ed7030e7722 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -623,6 +623,25 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	no_debug_objects
 			[KNL] Disable object debugging
 
+	debug_guardpage_minorder=
+			[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
+			parameter allows control of the order of pages that will
+			be intentionally kept free (and hence protected) by the
+			buddy allocator. Bigger value increase the probability
+			of catching random memory corruption, but reduce the
+			amount of memory for normal system use. The maximum
+			possible value is MAX_ORDER/2.  Setting this parameter
+			to 1 or 2 should be enough to identify most random
+			memory corruption problems caused by bugs in kernel or
+			driver code when a CPU writes to (or reads from) a
+			random memory location. Note that there exists a class
+			of memory corruptions problems caused by buggy H/W or
+			F/W or by drivers badly programing DMA (basically when
+			memory is written at bus level and the CPU MMU is
+			bypassed) which are not detectable by
+			CONFIG_DEBUG_PAGEALLOC, hence this option will not help
+			tracking down these problems.
+
 	debugpat	[X86] Enable PAT debugging
 
 	decnet.addr=	[HW,NET]
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5d9b4c9813bd..5568553a41fd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1618,5 +1618,22 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned int pages_per_huge_page);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+extern unsigned int _debug_guardpage_minorder;
+
+static inline unsigned int debug_guardpage_minorder(void)
+{
+	return _debug_guardpage_minorder;
+}
+
+static inline bool page_is_guard(struct page *page)
+{
+	return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+#else
+static inline unsigned int debug_guardpage_minorder(void) { return 0; }
+static inline bool page_is_guard(struct page *page) { return false; }
+#endif /* CONFIG_DEBUG_PAGEALLOC */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h
index b0638fd91e92..22691f614043 100644
--- a/include/linux/page-debug-flags.h
+++ b/include/linux/page-debug-flags.h
@@ -13,6 +13,7 @@
 
 enum page_debug_flags {
 	PAGE_DEBUG_FLAG_POISON,		/* Page is poisoned */
+	PAGE_DEBUG_FLAG_GUARD,
 };
 
 /*
@@ -21,7 +22,8 @@ enum page_debug_flags {
  */
 
 #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
-#if !defined(CONFIG_PAGE_POISONING) \
+#if !defined(CONFIG_PAGE_POISONING) && \
+    !defined(CONFIG_PAGE_GUARD) \
 /* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */
 #error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features!
 #endif
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 8b1a477162dc..4b2443254de2 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -4,6 +4,7 @@ config DEBUG_PAGEALLOC
 	depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
 	depends on !KMEMCHECK
 	select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	---help---
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  This results in a large slowdown, but helps to find certain types
@@ -22,3 +23,7 @@ config WANT_PAGE_DEBUG_FLAGS
 config PAGE_POISONING
 	bool
 	select WANT_PAGE_DEBUG_FLAGS
+
+config PAGE_GUARD
+	bool
+	select WANT_PAGE_DEBUG_FLAGS
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3cba4b67203f..93baebcc06f3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,6 +57,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/page-debug-flags.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -388,6 +389,37 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 		clear_highpage(page + i);
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+unsigned int _debug_guardpage_minorder;
+
+static int __init debug_guardpage_minorder_setup(char *buf)
+{
+	unsigned long res;
+
+	if (kstrtoul(buf, 10, &res) < 0 ||  res > MAX_ORDER / 2) {
+		printk(KERN_ERR "Bad debug_guardpage_minorder value\n");
+		return 0;
+	}
+	_debug_guardpage_minorder = res;
+	printk(KERN_INFO "Setting debug_guardpage_minorder to %lu\n", res);
+	return 0;
+}
+__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
+
+static inline void set_page_guard_flag(struct page *page)
+{
+	__set_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+
+static inline void clear_page_guard_flag(struct page *page)
+{
+	__clear_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+#else
+static inline void set_page_guard_flag(struct page *page) { }
+static inline void clear_page_guard_flag(struct page *page) { }
+#endif
+
 static inline void set_page_order(struct page *page, int order)
 {
 	set_page_private(page, order);
@@ -445,6 +477,11 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
 	if (page_zone_id(page) != page_zone_id(buddy))
 		return 0;
 
+	if (page_is_guard(buddy) && page_order(buddy) == order) {
+		VM_BUG_ON(page_count(buddy) != 0);
+		return 1;
+	}
+
 	if (PageBuddy(buddy) && page_order(buddy) == order) {
 		VM_BUG_ON(page_count(buddy) != 0);
 		return 1;
@@ -501,11 +538,19 @@ static inline void __free_one_page(struct page *page,
 		buddy = page + (buddy_idx - page_idx);
 		if (!page_is_buddy(page, buddy, order))
 			break;
-
-		/* Our buddy is free, merge with it and move up one order. */
-		list_del(&buddy->lru);
-		zone->free_area[order].nr_free--;
-		rmv_page_order(buddy);
+		/*
+		 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
+		 * merge with it and move up one order.
+		 */
+		if (page_is_guard(buddy)) {
+			clear_page_guard_flag(buddy);
+			set_page_private(page, 0);
+			__mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
+		} else {
+			list_del(&buddy->lru);
+			zone->free_area[order].nr_free--;
+			rmv_page_order(buddy);
+		}
 		combined_idx = buddy_idx & page_idx;
 		page = page + (combined_idx - page_idx);
 		page_idx = combined_idx;
@@ -731,6 +776,23 @@ static inline void expand(struct zone *zone, struct page *page,
 		high--;
 		size >>= 1;
 		VM_BUG_ON(bad_range(zone, &page[size]));
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+		if (high < debug_guardpage_minorder()) {
+			/*
+			 * Mark as guard pages (or page), that will allow to
+			 * merge back to allocator when buddy will be freed.
+			 * Corresponding page table entries will not be touched,
+			 * pages will stay not present in virtual address space
+			 */
+			INIT_LIST_HEAD(&page[size].lru);
+			set_page_guard_flag(&page[size]);
+			set_page_private(&page[size], high);
+			/* Guard pages are not available for any usage */
+			__mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high));
+			continue;
+		}
+#endif
 		list_add(&page[size].lru, &area->free_list[migratetype]);
 		area->nr_free++;
 		set_page_order(&page[size], high);
@@ -1754,7 +1816,8 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 {
 	unsigned int filter = SHOW_MEM_FILTER_NODES;
 
-	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
+	    debug_guardpage_minorder() > 0)
 		return;
 
 	/*
-- 
cgit v1.2.3


From f6d7e0cb3ecc248e98fa11d83253f6174bd7e085 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 10 Jan 2012 15:07:38 -0800
Subject: mm, debug: test for online nid when allocating on single node

Calling alloc_pages_exact_node() means the allocation only passes the
zonelist of a single node into the page allocator.  If that node isn't
online, it's zonelist may never have been initialized causing a strange
oops that may not immediately be clear.

I recently debugged an issue where node 0 wasn't online and an allocator
was passing 0 to alloc_pages_exact_node() and it resulted in a NULL
pointer on zonelist->_zoneref.  If CONFIG_DEBUG_VM is enabled, though, it
would be nice to catch this a bit earlier.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 91812df1351a..66f172fdf5fe 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -313,7 +313,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
-	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid));
 
 	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
 }
-- 
cgit v1.2.3


From ab8fabd46f811d5153d8a0cd2fac9a0d41fb593d Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Tue, 10 Jan 2012 15:07:42 -0800
Subject: mm: exclude reserved pages from dirtyable memory

Per-zone dirty limits try to distribute page cache pages allocated for
writing across zones in proportion to the individual zone sizes, to reduce
the likelihood of reclaim having to write back individual pages from the
LRU lists in order to make progress.

This patch:

The amount of dirtyable pages should not include the full number of free
pages: there is a number of reserved pages that the page allocator and
kswapd always try to keep free.

The closer (reclaimable pages - dirty pages) is to the number of reserved
pages, the more likely it becomes for reclaim to run into dirty pages:

       +----------+ ---
       |   anon   |  |
       +----------+  |
       |          |  |
       |          |  -- dirty limit new    -- flusher new
       |   file   |  |                     |
       |          |  |                     |
       |          |  -- dirty limit old    -- flusher old
       |          |                        |
       +----------+                       --- reclaim
       | reserved |
       +----------+
       |  kernel  |
       +----------+

This patch introduces a per-zone dirty reserve that takes both the lowmem
reserve as well as the high watermark of the zone into account, and a
global sum of those per-zone values that is subtracted from the global
amount of dirtyable pages.  The lowmem reserve is unavailable to page
cache allocations and kswapd tries to keep the high watermark free.  We
don't want to end up in a situation where reclaim has to clean pages in
order to balance zones.

Not treating reserved pages as dirtyable on a global level is only a
conceptual fix.  In reality, dirty pages are not distributed equally
across zones and reclaim runs into dirty pages on a regular basis.

But it is important to get this right before tackling the problem on a
per-zone level, where the distance between reclaim and the dirty pages is
mostly much smaller in absolute numbers.

[akpm@linux-foundation.org: fix highmem build]
Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  6 ++++++
 include/linux/swap.h   |  1 +
 mm/page-writeback.c    |  5 +++--
 mm/page_alloc.c        | 19 +++++++++++++++++++
 4 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3ac040f19369..ca6ca92418a6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -317,6 +317,12 @@ struct zone {
 	 */
 	unsigned long		lowmem_reserve[MAX_NR_ZONES];
 
+	/*
+	 * This is a per-zone reserve of pages that should not be
+	 * considered dirtyable memory.
+	 */
+	unsigned long		dirty_balance_reserve;
+
 #ifdef CONFIG_NUMA
 	int node;
 	/*
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1e22e126d2ac..06061a7f8e69 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -207,6 +207,7 @@ struct swap_list_t {
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
+extern unsigned long dirty_balance_reserve;
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c081bf62202b..9ab6de82d8e6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -157,7 +157,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
 		x += zone_page_state(z, NR_FREE_PAGES) +
-		     zone_reclaimable_pages(z);
+		     zone_reclaimable_pages(z) - z->dirty_balance_reserve;
 	}
 	/*
 	 * Make sure that the number of highmem pages is never larger
@@ -181,7 +181,8 @@ static unsigned long determine_dirtyable_memory(void)
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() -
+	    dirty_balance_reserve;
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 93baebcc06f3..2cb9eb71e282 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -97,6 +97,14 @@ EXPORT_SYMBOL(node_states);
 
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
+/*
+ * When calculating the number of globally allowed dirty pages, there
+ * is a certain number of per-zone reserves that should not be
+ * considered dirtyable memory.  This is the sum of those reserves
+ * over all existing zones that contribute dirtyable memory.
+ */
+unsigned long dirty_balance_reserve __read_mostly;
+
 int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
@@ -4822,8 +4830,19 @@ static void calculate_totalreserve_pages(void)
 			if (max > zone->present_pages)
 				max = zone->present_pages;
 			reserve_pages += max;
+			/*
+			 * Lowmem reserves are not available to
+			 * GFP_HIGHUSER page cache allocations and
+			 * kswapd tries to balance zones to their high
+			 * watermark.  As a result, neither should be
+			 * regarded as dirtyable memory, to prevent a
+			 * situation where reclaim has to clean pages
+			 * in order to balance the zones.
+			 */
+			zone->dirty_balance_reserve = max;
 		}
 	}
+	dirty_balance_reserve = reserve_pages;
 	totalreserve_pages = reserve_pages;
 }
 
-- 
cgit v1.2.3


From a756cf5908530e8b40bdf569eb48b40139e8d7fd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Tue, 10 Jan 2012 15:07:49 -0800
Subject: mm: try to distribute dirty pages fairly across zones

The maximum number of dirty pages that exist in the system at any time is
determined by a number of pages considered dirtyable and a user-configured
percentage of those, or an absolute number in bytes.

This number of dirtyable pages is the sum of memory provided by all the
zones in the system minus their lowmem reserves and high watermarks, so
that the system can retain a healthy number of free pages without having
to reclaim dirty pages.

But there is a flaw in that we have a zoned page allocator which does not
care about the global state but rather the state of individual memory
zones.  And right now there is nothing that prevents one zone from filling
up with dirty pages while other zones are spared, which frequently leads
to situations where kswapd, in order to restore the watermark of free
pages, does indeed have to write pages from that zone's LRU list.  This
can interfere so badly with IO from the flusher threads that major
filesystems (btrfs, xfs, ext4) mostly ignore write requests from reclaim
already, taking away the VM's only possibility to keep such a zone
balanced, aside from hoping the flushers will soon clean pages from that
zone.

Enter per-zone dirty limits.  They are to a zone's dirtyable memory what
the global limit is to the global amount of dirtyable memory, and try to
make sure that no single zone receives more than its fair share of the
globally allowed dirty pages in the first place.  As the number of pages
considered dirtyable excludes the zones' lowmem reserves and high
watermarks, the maximum number of dirty pages in a zone is such that the
zone can always be balanced without requiring page cleaning.

As this is a placement decision in the page allocator and pages are
dirtied only after the allocation, this patch allows allocators to pass
__GFP_WRITE when they know in advance that the page will be written to and
become dirty soon.  The page allocator will then attempt to allocate from
the first zone of the zonelist - which on NUMA is determined by the task's
NUMA memory policy - that has not exceeded its dirty limit.

At first glance, it would appear that the diversion to lower zones can
increase pressure on them, but this is not the case.  With a full high
zone, allocations will be diverted to lower zones eventually, so it is
more of a shift in timing of the lower zone allocations.  Workloads that
previously could fit their dirty pages completely in the higher zone may
be forced to allocate from lower zones, but the amount of pages that
"spill over" are limited themselves by the lower zones' dirty constraints,
and thus unlikely to become a problem.

For now, the problem of unfair dirty page distribution remains for NUMA
configurations where the zones allowed for allocation are in sum not big
enough to trigger the global dirty limits, wake up the flusher threads and
remedy the situation.  Because of this, an allocation that could not
succeed on any of the considered zones is allowed to ignore the dirty
limits before going into direct reclaim or even failing the allocation,
until a future patch changes the global dirty throttling and flusher
thread activation so that they take individual zone states into account.

			Test results

15M DMA + 3246M DMA32 + 504 Normal = 3765M memory
40% dirty ratio
16G USB thumb drive
10 runs of dd if=/dev/zero of=disk/zeroes bs=32k count=$((10 << 15))

		seconds			nr_vmscan_write
		        (stddev)	       min|     median|        max
xfs
vanilla:	 549.747( 3.492)	     0.000|      0.000|      0.000
patched:	 550.996( 3.802)	     0.000|      0.000|      0.000

fuse-ntfs
vanilla:	1183.094(53.178)	 54349.000|  59341.000|  65163.000
patched:	 558.049(17.914)	     0.000|      0.000|     43.000

btrfs
vanilla:	 573.679(14.015)	156657.000| 460178.000| 606926.000
patched:	 563.365(11.368)	     0.000|      0.000|   1362.000

ext4
vanilla:	 561.197(15.782)	     0.000|2725438.000|4143837.000
patched:	 568.806(17.496)	     0.000|      0.000|      0.000

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Tested-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h       |  4 ++-
 include/linux/writeback.h |  1 +
 mm/page-writeback.c       | 82 +++++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c           | 29 +++++++++++++++++
 4 files changed, 115 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 66f172fdf5fe..581e74b7df95 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -36,6 +36,7 @@ struct vm_area_struct;
 #endif
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
+#define ___GFP_WRITE		0x1000000u
 
 /*
  * GFP bitmasks..
@@ -85,6 +86,7 @@ struct vm_area_struct;
 
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
+#define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
 /*
  * This may seem redundant, but it's a way of annotating false positives vs.
@@ -92,7 +94,7 @@ struct vm_area_struct;
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 24	/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 25	/* Room for N __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 34a005515fef..6dff47304971 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -124,6 +124,7 @@ void laptop_mode_timer_fn(unsigned long data);
 static inline void laptop_sync_completion(void) { }
 #endif
 void throttle_vm_writeout(gfp_t gfp_mask);
+bool zone_dirty_ok(struct zone *zone);
 
 extern unsigned long global_dirty_limit;
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 433fa990fe8b..5cdd4f2b0c9d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -147,6 +147,24 @@ static struct prop_descriptor vm_completions;
  * clamping level.
  */
 
+/*
+ * In a memory zone, there is a certain amount of pages we consider
+ * available for the page cache, which is essentially the number of
+ * free and reclaimable pages, minus some zone reserves to protect
+ * lowmem and the ability to uphold the zone's watermarks without
+ * requiring writeback.
+ *
+ * This number of dirtyable pages is the base value of which the
+ * user-configurable dirty ratio is the effictive number of pages that
+ * are allowed to be actually dirtied.  Per individual zone, or
+ * globally by using the sum of dirtyable pages over all zones.
+ *
+ * Because the user is allowed to specify the dirty limit globally as
+ * absolute number of bytes, calculating the per-zone dirty limit can
+ * require translating the configured limit into a percentage of
+ * global dirtyable memory first.
+ */
+
 static unsigned long highmem_dirtyable_memory(unsigned long total)
 {
 #ifdef CONFIG_HIGHMEM
@@ -232,6 +250,70 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
 	trace_global_dirty_state(background, dirty);
 }
 
+/**
+ * zone_dirtyable_memory - number of dirtyable pages in a zone
+ * @zone: the zone
+ *
+ * Returns the zone's number of pages potentially available for dirty
+ * page cache.  This is the base value for the per-zone dirty limits.
+ */
+static unsigned long zone_dirtyable_memory(struct zone *zone)
+{
+	/*
+	 * The effective global number of dirtyable pages may exclude
+	 * highmem as a big-picture measure to keep the ratio between
+	 * dirty memory and lowmem reasonable.
+	 *
+	 * But this function is purely about the individual zone and a
+	 * highmem zone can hold its share of dirty pages, so we don't
+	 * care about vm_highmem_is_dirtyable here.
+	 */
+	return zone_page_state(zone, NR_FREE_PAGES) +
+	       zone_reclaimable_pages(zone) -
+	       zone->dirty_balance_reserve;
+}
+
+/**
+ * zone_dirty_limit - maximum number of dirty pages allowed in a zone
+ * @zone: the zone
+ *
+ * Returns the maximum number of dirty pages allowed in a zone, based
+ * on the zone's dirtyable memory.
+ */
+static unsigned long zone_dirty_limit(struct zone *zone)
+{
+	unsigned long zone_memory = zone_dirtyable_memory(zone);
+	struct task_struct *tsk = current;
+	unsigned long dirty;
+
+	if (vm_dirty_bytes)
+		dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
+			zone_memory / global_dirtyable_memory();
+	else
+		dirty = vm_dirty_ratio * zone_memory / 100;
+
+	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
+		dirty += dirty / 4;
+
+	return dirty;
+}
+
+/**
+ * zone_dirty_ok - tells whether a zone is within its dirty limits
+ * @zone: the zone to check
+ *
+ * Returns %true when the dirty pages in @zone are within the zone's
+ * dirty limit, %false if the limit is exceeded.
+ */
+bool zone_dirty_ok(struct zone *zone)
+{
+	unsigned long limit = zone_dirty_limit(zone);
+
+	return zone_page_state(zone, NR_FILE_DIRTY) +
+	       zone_page_state(zone, NR_UNSTABLE_NFS) +
+	       zone_page_state(zone, NR_WRITEBACK) <= limit;
+}
+
 /*
  * couple the period to the dirty_ratio:
  *
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2cb9eb71e282..4f95bcf0f2b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1735,6 +1735,35 @@ zonelist_scan:
 		if ((alloc_flags & ALLOC_CPUSET) &&
 			!cpuset_zone_allowed_softwall(zone, gfp_mask))
 				continue;
+		/*
+		 * When allocating a page cache page for writing, we
+		 * want to get it from a zone that is within its dirty
+		 * limit, such that no single zone holds more than its
+		 * proportional share of globally allowed dirty pages.
+		 * The dirty limits take into account the zone's
+		 * lowmem reserves and high watermark so that kswapd
+		 * should be able to balance it without having to
+		 * write pages from its LRU list.
+		 *
+		 * This may look like it could increase pressure on
+		 * lower zones by failing allocations in higher zones
+		 * before they are full.  But the pages that do spill
+		 * over are limited as the lower zones are protected
+		 * by this very same mechanism.  It should not become
+		 * a practical burden to them.
+		 *
+		 * XXX: For now, allow allocations to potentially
+		 * exceed the per-zone dirty limit in the slowpath
+		 * (ALLOC_WMARK_LOW unset) before going into reclaim,
+		 * which is important when on a NUMA setup the allowed
+		 * zones are together not big enough to reach the
+		 * global limit.  The proper fix for these situations
+		 * will require awareness of zones in the
+		 * dirty-throttling and the flusher threads.
+		 */
+		if ((alloc_flags & ALLOC_WMARK_LOW) &&
+		    (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone))
+			goto this_zone_full;
 
 		BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
 		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
-- 
cgit v1.2.3


From 948f017b093a9baac23855fcd920d3a970b71bb6 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 10 Jan 2012 15:08:05 -0800
Subject: mremap: enforce rmap src/dst vma ordering in case of vma_merge()
 succeeding in copy_vma()

migrate was doing an rmap_walk with speculative lock-less access on
pagetables.  That could lead it to not serializing properly against mremap
PT locks.  But a second problem remains in the order of vmas in the
same_anon_vma list used by the rmap_walk.

If vma_merge succeeds in copy_vma, the src vma could be placed after the
dst vma in the same_anon_vma list.  That could still lead to migrate
missing some pte.

This patch adds an anon_vma_moveto_tail() function to force the dst vma at
the end of the list before mremap starts to solve the problem.

If the mremap is very large and there are a lots of parents or childs
sharing the anon_vma root lock, this should still scale better than taking
the anon_vma root lock around every pte copy practically for the whole
duration of mremap.

Update: Hugh noticed special care is needed in the error path where
move_page_tables goes in the reverse direction, a second
anon_vma_moveto_tail() call is needed in the error path.

This program exercises the anon_vma_moveto_tail:

===

int main()
{
	static struct timeval oldstamp, newstamp;
	long diffsec;
	char *p, *p2, *p3, *p4;
	if (posix_memalign((void **)&p, 2*1024*1024, SIZE))
		perror("memalign"), exit(1);
	if (posix_memalign((void **)&p2, 2*1024*1024, SIZE))
		perror("memalign"), exit(1);
	if (posix_memalign((void **)&p3, 2*1024*1024, SIZE))
		perror("memalign"), exit(1);

	memset(p, 0xff, SIZE);
	printf("%p\n", p);
	memset(p2, 0xff, SIZE);
	memset(p3, 0x77, 4096);
	if (memcmp(p, p2, SIZE))
		printf("error\n");
	p4 = mremap(p+SIZE/2, SIZE/2, SIZE/2, MREMAP_FIXED|MREMAP_MAYMOVE, p3);
	if (p4 != p3)
		perror("mremap"), exit(1);
	p4 = mremap(p4, SIZE/2, SIZE/2, MREMAP_FIXED|MREMAP_MAYMOVE, p+SIZE/2);
	if (p4 != p+SIZE/2)
		perror("mremap"), exit(1);
	if (memcmp(p, p2, SIZE))
		printf("error\n");
	printf("ok\n");

	return 0;
}
===

$ perf probe -a anon_vma_moveto_tail
Add new event:
  probe:anon_vma_moveto_tail (on anon_vma_moveto_tail)

You can now use it on all perf tools, such as:

        perf record -e probe:anon_vma_moveto_tail -aR sleep 1

$ perf record -e probe:anon_vma_moveto_tail -aR ./anon_vma_moveto_tail
0x7f2ca2800000
ok
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.043 MB perf.data (~1860 samples) ]
$ perf report --stdio
   100.00%  anon_vma_moveto  [kernel.kallsyms]  [k] anon_vma_moveto_tail

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Nai Xia <nai.xia@gmail.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Pawel Sikora <pluto@agmk.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  1 +
 mm/mmap.c            | 24 +++++++++++++++++++++---
 mm/mremap.c          |  9 +++++++++
 mm/rmap.c            | 45 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 2148b122779b..1afb9954bbf1 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -120,6 +120,7 @@ void anon_vma_init(void);	/* create anon_vma_cachep */
 int  anon_vma_prepare(struct vm_area_struct *);
 void unlink_anon_vmas(struct vm_area_struct *);
 int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
+void anon_vma_moveto_tail(struct vm_area_struct *);
 int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index eae90af60ea6..adea3b8880e3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2322,13 +2322,16 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	struct vm_area_struct *new_vma, *prev;
 	struct rb_node **rb_link, *rb_parent;
 	struct mempolicy *pol;
+	bool faulted_in_anon_vma = true;
 
 	/*
 	 * If anonymous vma has not yet been faulted, update new pgoff
 	 * to match new location, to increase its chance of merging.
 	 */
-	if (!vma->vm_file && !vma->anon_vma)
+	if (unlikely(!vma->vm_file && !vma->anon_vma)) {
 		pgoff = addr >> PAGE_SHIFT;
+		faulted_in_anon_vma = false;
+	}
 
 	find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
 	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
@@ -2337,9 +2340,24 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		/*
 		 * Source vma may have been merged into new_vma
 		 */
-		if (vma_start >= new_vma->vm_start &&
-		    vma_start < new_vma->vm_end)
+		if (unlikely(vma_start >= new_vma->vm_start &&
+			     vma_start < new_vma->vm_end)) {
+			/*
+			 * The only way we can get a vma_merge with
+			 * self during an mremap is if the vma hasn't
+			 * been faulted in yet and we were allowed to
+			 * reset the dst vma->vm_pgoff to the
+			 * destination address of the mremap to allow
+			 * the merge to happen. mremap must change the
+			 * vm_pgoff linearity between src and dst vmas
+			 * (in turn preventing a vma_merge) to be
+			 * safe. It is only safe to keep the vm_pgoff
+			 * linear if there are no pages mapped yet.
+			 */
+			VM_BUG_ON(faulted_in_anon_vma);
 			*vmap = new_vma;
+		} else
+			anon_vma_moveto_tail(new_vma);
 	} else {
 		new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 		if (new_vma) {
diff --git a/mm/mremap.c b/mm/mremap.c
index d6959cb4df58..87bb8393e7d2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -220,6 +220,15 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 
 	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
 	if (moved_len < old_len) {
+		/*
+		 * Before moving the page tables from the new vma to
+		 * the old vma, we need to be sure the old vma is
+		 * queued after new vma in the same_anon_vma list to
+		 * prevent SMP races with rmap_walk (that could lead
+		 * rmap_walk to miss some page table).
+		 */
+		anon_vma_moveto_tail(vma);
+
 		/*
 		 * On error, move entries back from new area to old,
 		 * which will succeed since page tables still there,
diff --git a/mm/rmap.c b/mm/rmap.c
index a4fd3680038b..a2e5ce1fa081 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -271,6 +271,51 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 	return -ENOMEM;
 }
 
+/*
+ * Some rmap walk that needs to find all ptes/hugepmds without false
+ * negatives (like migrate and split_huge_page) running concurrent
+ * with operations that copy or move pagetables (like mremap() and
+ * fork()) to be safe. They depend on the anon_vma "same_anon_vma"
+ * list to be in a certain order: the dst_vma must be placed after the
+ * src_vma in the list. This is always guaranteed by fork() but
+ * mremap() needs to call this function to enforce it in case the
+ * dst_vma isn't newly allocated and chained with the anon_vma_clone()
+ * function but just an extension of a pre-existing vma through
+ * vma_merge.
+ *
+ * NOTE: the same_anon_vma list can still be changed by other
+ * processes while mremap runs because mremap doesn't hold the
+ * anon_vma mutex to prevent modifications to the list while it
+ * runs. All we need to enforce is that the relative order of this
+ * process vmas isn't changing (we don't care about other vmas
+ * order). Each vma corresponds to an anon_vma_chain structure so
+ * there's no risk that other processes calling anon_vma_moveto_tail()
+ * and changing the same_anon_vma list under mremap() will screw with
+ * the relative order of this process vmas in the list, because we
+ * they can't alter the order of any vma that belongs to this
+ * process. And there can't be another anon_vma_moveto_tail() running
+ * concurrently with mremap() coming from this process because we hold
+ * the mmap_sem for the whole mremap(). fork() ordering dependency
+ * also shouldn't be affected because fork() only cares that the
+ * parent vmas are placed in the list before the child vmas and
+ * anon_vma_moveto_tail() won't reorder vmas from either the fork()
+ * parent or child.
+ */
+void anon_vma_moveto_tail(struct vm_area_struct *dst)
+{
+	struct anon_vma_chain *pavc;
+	struct anon_vma *root = NULL;
+
+	list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) {
+		struct anon_vma *anon_vma = pavc->anon_vma;
+		VM_BUG_ON(pavc->vma != dst);
+		root = lock_anon_vma_root(root, anon_vma);
+		list_del(&pavc->same_anon_vma);
+		list_add_tail(&pavc->same_anon_vma, &anon_vma->head);
+	}
+	unlock_anon_vma_root(root);
+}
+
 /*
  * Attach vma to its own anon_vma, as well as to the anon_vmas that
  * the corresponding VMA in the parent process is attached to.
-- 
cgit v1.2.3


From fcfb4dcc9698f932836aa63ba0d82e7dbd300fb3 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 10 Jan 2012 15:08:21 -0800
Subject: mm/mempolicy.c: mpol_equal(): use bool

mpol_equal() logically returns a boolean.  Use a bool type to slightly
improve readability.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Stephen Wilson <wilsons@start.ca>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 10 +++++-----
 mm/mempolicy.c            | 14 +++++++-------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 7978eec1b7d9..7c727a90d70d 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -164,11 +164,11 @@ static inline void mpol_get(struct mempolicy *pol)
 		atomic_inc(&pol->refcnt);
 }
 
-extern int __mpol_equal(struct mempolicy *a, struct mempolicy *b);
-static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
+extern bool __mpol_equal(struct mempolicy *a, struct mempolicy *b);
+static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
 {
 	if (a == b)
-		return 1;
+		return true;
 	return __mpol_equal(a, b);
 }
 
@@ -257,9 +257,9 @@ static inline int vma_migratable(struct vm_area_struct *vma)
 
 struct mempolicy {};
 
-static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
+static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
 {
-	return 1;
+	return true;
 }
 
 static inline void mpol_put(struct mempolicy *p)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c3fdbcb17658..e3d58f088466 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1983,28 +1983,28 @@ struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol,
 }
 
 /* Slow path of a mempolicy comparison */
-int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
+bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
 {
 	if (!a || !b)
-		return 0;
+		return false;
 	if (a->mode != b->mode)
-		return 0;
+		return false;
 	if (a->flags != b->flags)
-		return 0;
+		return false;
 	if (mpol_store_user_nodemask(a))
 		if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask))
-			return 0;
+			return false;
 
 	switch (a->mode) {
 	case MPOL_BIND:
 		/* Fall through */
 	case MPOL_INTERLEAVE:
-		return nodes_equal(a->v.nodes, b->v.nodes);
+		return !!nodes_equal(a->v.nodes, b->v.nodes);
 	case MPOL_PREFERRED:
 		return a->v.preferred_node == b->v.preferred_node;
 	default:
 		BUG();
-		return 0;
+		return false;
 	}
 }
 
-- 
cgit v1.2.3


From a6d511e5155406cd214d3af3ff9cffc69548b006 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 10 Jan 2012 15:09:40 -0800
Subject: leds: add driver for TCA6507 LED controller

TI's TCA6507 is the LED driver in the GTA04 Openmoko motherboard.  The
driver provides full support for brightness levels and hardware blinking.

This driver can drive each of 7 outputs as an LED or a GPIO output,
and provides hardware-assist blinking.

[akpm@linux-foundation.org: fix __mod_i2c_device_table alias]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: NeilBrown <neilb@suse.de>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/leds/Kconfig         |   8 +
 drivers/leds/Makefile        |   1 +
 drivers/leds/leds-tca6507.c  | 779 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/leds-tca6507.h |  34 ++
 4 files changed, 822 insertions(+)
 create mode 100644 drivers/leds/leds-tca6507.c
 create mode 100644 include/linux/leds-tca6507.h

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 1b75a56ebd08..897a77dfa9d7 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -388,6 +388,14 @@ config LEDS_RENESAS_TPU
 	  pin function. The latter to support brightness control.
 	  Brightness control is supported but hardware blinking is not.
 
+config LEDS_TCA6507
+	tristate "LED Support for TCA6507 I2C chip"
+	depends on LEDS_CLASS && I2C
+	help
+	  This option enables support for LEDs connected to TC6507
+	  LED driver chips accessed via the I2C bus.
+	  Driver support brightness control and hardware-assisted blinking.
+
 config LEDS_TRIGGERS
 	bool "LED Trigger support"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index e4f6bf568880..5c9dc4b000d5 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_LEDS_GPIO)			+= leds-gpio.o
 obj-$(CONFIG_LEDS_LP3944)		+= leds-lp3944.o
 obj-$(CONFIG_LEDS_LP5521)		+= leds-lp5521.o
 obj-$(CONFIG_LEDS_LP5523)		+= leds-lp5523.o
+obj-$(CONFIG_LEDS_TCA6507)		+= leds-tca6507.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)		+= leds-clevo-mail.o
 obj-$(CONFIG_LEDS_HP6XX)		+= leds-hp6xx.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c
new file mode 100644
index 000000000000..133f89fb7071
--- /dev/null
+++ b/drivers/leds/leds-tca6507.c
@@ -0,0 +1,779 @@
+/*
+ * leds-tca6507
+ *
+ * The TCA6507 is a programmable LED controller that can drive 7
+ * separate lines either by holding them low, or by pulsing them
+ * with modulated width.
+ * The modulation can be varied in a simple pattern to produce a blink or
+ * double-blink.
+ *
+ * This driver can configure each line either as a 'GPIO' which is out-only
+ * (no pull-up) or as an LED with variable brightness and hardware-assisted
+ * blinking.
+ *
+ * Apart from OFF and ON there are three programmable brightness levels which
+ * can be programmed from 0 to 15 and indicate how many 500usec intervals in
+ * each 8msec that the led is 'on'.  The levels are named MASTER, BANK0 and
+ * BANK1.
+ *
+ * There are two different blink rates that can be programmed, each with
+ * separate time for rise, on, fall, off and second-off.  Thus if 3 or more
+ * different non-trivial rates are required, software must be used for the extra
+ * rates. The two different blink rates must align with the two levels BANK0 and
+ * BANK1.
+ * This driver does not support double-blink so 'second-off' always matches
+ * 'off'.
+ *
+ * Only 16 different times can be programmed in a roughly logarithmic scale from
+ * 64ms to 16320ms.  To be precise the possible times are:
+ *    0, 64, 128, 192, 256, 384, 512, 768,
+ *    1024, 1536, 2048, 3072, 4096, 5760, 8128, 16320
+ *
+ * Times that cannot be closely matched with these must be
+ * handled in software.  This driver allows 12.5% error in matching.
+ *
+ * This driver does not allow rise/fall rates to be set explicitly.  When trying
+ * to match a given 'on' or 'off' period, an appropriate pair of 'change' and
+ * 'hold' times are chosen to get a close match.  If the target delay is even,
+ * the 'change' number will be the smaller; if odd, the 'hold' number will be
+ * the smaller.
+
+ * Choosing pairs of delays with 12.5% errors allows us to match delays in the
+ * ranges: 56-72, 112-144, 168-216, 224-27504, 28560-36720.
+ * 26% of the achievable sums can be matched by multiple pairings. For example
+ * 1536 == 1536+0, 1024+512, or 768+768.  This driver will always choose the
+ * pairing with the least maximum - 768+768 in this case.  Other pairings are
+ * not available.
+ *
+ * Access to the 3 levels and 2 blinks are on a first-come, first-served basis.
+ * Access can be shared by multiple leds if they have the same level and
+ * either same blink rates, or some don't blink.
+ * When a led changes, it relinquishes access and tries again, so it might
+ * lose access to hardware blink.
+ * If a blink engine cannot be allocated, software blink is used.
+ * If the desired brightness cannot be allocated, the closest available non-zero
+ * brightness is used.  As 'full' is always available, the worst case would be
+ * to have two different blink rates at '1', with Max at '2', then other leds
+ * will have to choose between '2' and '16'.  Hopefully this is not likely.
+ *
+ * Each bank (BANK0 and BANK1) has two usage counts - LEDs using the brightness
+ * and LEDs using the blink.  It can only be reprogrammed when the appropriate
+ * counter is zero.  The MASTER level has a single usage count.
+ *
+ * Each Led has programmable 'on' and 'off' time as milliseconds.  With each
+ * there is a flag saying if it was explicitly requested or defaulted.
+ * Similarly the banks know if each time was explicit or a default.  Defaults
+ * are permitted to be changed freely - they are not recognised when matching.
+ *
+ *
+ * An led-tca6507 device must be provided with platform data.  This data
+ * lists for each output: the name, default trigger, and whether the signal
+ * is being used as a GPiO rather than an led.  'struct led_plaform_data'
+ * is used for this.  If 'name' is NULL, the output isn't used.  If 'flags'
+ * is TCA6507_MAKE_CPIO, the output is a GPO.
+ * The "struct led_platform_data" can be embedded in a
+ * "struct tca6507_platform_data" which adds a 'gpio_base' for the GPiOs,
+ * and a 'setup' callback which is called once the GPiOs are available.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/leds.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/workqueue.h>
+#include <linux/leds-tca6507.h>
+
+/* LED select registers determine the source that drives LED outputs */
+#define TCA6507_LS_LED_OFF	0x0	/* Output HI-Z (off) */
+#define TCA6507_LS_LED_OFF1	0x1	/* Output HI-Z (off) - not used */
+#define TCA6507_LS_LED_PWM0	0x2	/* Output LOW with Bank0 rate */
+#define TCA6507_LS_LED_PWM1	0x3	/* Output LOW with Bank1 rate */
+#define TCA6507_LS_LED_ON	0x4	/* Output LOW (on) */
+#define TCA6507_LS_LED_MIR	0x5	/* Output LOW with Master Intensity */
+#define TCA6507_LS_BLINK0	0x6	/* Blink at Bank0 rate */
+#define TCA6507_LS_BLINK1	0x7	/* Blink at Bank1 rate */
+
+enum {
+	BANK0,
+	BANK1,
+	MASTER,
+};
+static int bank_source[3] = {
+	TCA6507_LS_LED_PWM0,
+	TCA6507_LS_LED_PWM1,
+	TCA6507_LS_LED_MIR,
+};
+static int blink_source[2] = {
+	TCA6507_LS_BLINK0,
+	TCA6507_LS_BLINK1,
+};
+
+/* PWM registers */
+#define	TCA6507_REG_CNT			11
+
+/*
+ * 0x00, 0x01, 0x02 encode the TCA6507_LS_* values, each output
+ * owns one bit in each register
+ */
+#define	TCA6507_FADE_ON			0x03
+#define	TCA6507_FULL_ON			0x04
+#define	TCA6507_FADE_OFF		0x05
+#define	TCA6507_FIRST_OFF		0x06
+#define	TCA6507_SECOND_OFF		0x07
+#define	TCA6507_MAX_INTENSITY		0x08
+#define	TCA6507_MASTER_INTENSITY	0x09
+#define	TCA6507_INITIALIZE		0x0A
+
+#define	INIT_CODE			0x8
+
+#define TIMECODES 16
+static int time_codes[TIMECODES] = {
+	0, 64, 128, 192, 256, 384, 512, 768,
+	1024, 1536, 2048, 3072, 4096, 5760, 8128, 16320
+};
+
+/* Convert an led.brightness level (0..255) to a TCA6507 level (0..15) */
+static inline int TO_LEVEL(int brightness)
+{
+	return brightness >> 4;
+}
+
+/* ...and convert back */
+static inline int TO_BRIGHT(int level)
+{
+	if (level)
+		return (level << 4) | 0xf;
+	return 0;
+}
+
+#define NUM_LEDS 7
+struct tca6507_chip {
+	int			reg_set;	/* One bit per register where
+						 * a '1' means the register
+						 * should be written */
+	u8			reg_file[TCA6507_REG_CNT];
+	/* Bank 2 is Master Intensity and doesn't use times */
+	struct bank {
+		int level;
+		int ontime, offtime;
+		int on_dflt, off_dflt;
+		int time_use, level_use;
+	} bank[3];
+	struct i2c_client	*client;
+	struct work_struct	work;
+	spinlock_t		lock;
+
+	struct tca6507_led {
+		struct tca6507_chip	*chip;
+		struct led_classdev	led_cdev;
+		int			num;
+		int			ontime, offtime;
+		int			on_dflt, off_dflt;
+		int			bank;	/* Bank used, or -1 */
+		int			blink;	/* Set if hardware-blinking */
+	} leds[NUM_LEDS];
+#ifdef CONFIG_GPIOLIB
+	struct gpio_chip		gpio;
+	const char			*gpio_name[NUM_LEDS];
+	int				gpio_map[NUM_LEDS];
+#endif
+};
+
+static const struct i2c_device_id tca6507_id[] = {
+	{ "tca6507" },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tca6507_id);
+
+static int choose_times(int msec, int *c1p, int *c2p)
+{
+	/*
+	 * Choose two timecodes which add to 'msec' as near as possible.
+	 * The first returned is the 'on' or 'off' time.  The second is to be
+	 * used as a 'fade-on' or 'fade-off' time.  If 'msec' is even,
+	 * the first will not be smaller than the second.  If 'msec' is odd,
+	 * the first will not be larger than the second.
+	 * If we cannot get a sum within 1/8 of 'msec' fail with -EINVAL,
+	 * otherwise return the sum that was achieved, plus 1 if the first is
+	 * smaller.
+	 * If two possibilities are equally good (e.g. 512+0, 256+256), choose
+	 * the first pair so there is more change-time visible (i.e. it is
+	 * softer).
+	 */
+	int c1, c2;
+	int tmax = msec * 9 / 8;
+	int tmin = msec * 7 / 8;
+	int diff = 65536;
+
+	/* We start at '1' to ensure we never even think of choosing a
+	 * total time of '0'.
+	 */
+	for (c1 = 1; c1 < TIMECODES; c1++) {
+		int t = time_codes[c1];
+		if (t*2 < tmin)
+			continue;
+		if (t > tmax)
+			break;
+		for (c2 = 0; c2 <= c1; c2++) {
+			int tt = t + time_codes[c2];
+			int d;
+			if (tt < tmin)
+				continue;
+			if (tt > tmax)
+				break;
+			/* This works! */
+			d = abs(msec - tt);
+			if (d >= diff)
+				continue;
+			/* Best yet */
+			*c1p = c1;
+			*c2p = c2;
+			diff = d;
+			if (d == 0)
+				return msec;
+		}
+	}
+	if (diff < 65536) {
+		int actual;
+		if (msec & 1) {
+			c1 = *c2p;
+			*c2p = *c1p;
+			*c1p = c1;
+		}
+		actual = time_codes[*c1p] + time_codes[*c2p];
+		if (*c1p < *c2p)
+			return actual + 1;
+		else
+			return actual;
+	}
+	/* No close match */
+	return -EINVAL;
+}
+
+/*
+ * Update the register file with the appropriate 3-bit state for
+ * the given led.
+ */
+static void set_select(struct tca6507_chip *tca, int led, int val)
+{
+	int mask = (1 << led);
+	int bit;
+
+	for (bit = 0; bit < 3; bit++) {
+		int n = tca->reg_file[bit] & ~mask;
+		if (val & (1 << bit))
+			n |= mask;
+		if (tca->reg_file[bit] != n) {
+			tca->reg_file[bit] = n;
+			tca->reg_set |= (1 << bit);
+		}
+	}
+}
+
+/* Update the register file with the appropriate 4-bit code for
+ * one bank or other.  This can be used for timers, for levels, or
+ * for initialisation.
+ */
+static void set_code(struct tca6507_chip *tca, int reg, int bank, int new)
+{
+	int mask = 0xF;
+	int n;
+	if (bank) {
+		mask <<= 4;
+		new <<= 4;
+	}
+	n = tca->reg_file[reg] & ~mask;
+	n |= new;
+	if (tca->reg_file[reg] != n) {
+		tca->reg_file[reg] = n;
+		tca->reg_set |= 1 << reg;
+	}
+}
+
+/* Update brightness level. */
+static void set_level(struct tca6507_chip *tca, int bank, int level)
+{
+	switch (bank) {
+	case BANK0:
+	case BANK1:
+		set_code(tca, TCA6507_MAX_INTENSITY, bank, level);
+		break;
+	case MASTER:
+		set_code(tca, TCA6507_MASTER_INTENSITY, 0, level);
+		break;
+	}
+	tca->bank[bank].level = level;
+}
+
+/* Record all relevant time code for a given bank */
+static void set_times(struct tca6507_chip *tca, int bank)
+{
+	int c1, c2;
+	int result;
+
+	result = choose_times(tca->bank[bank].ontime, &c1, &c2);
+	dev_dbg(&tca->client->dev,
+		"Chose on  times %d(%d) %d(%d) for %dms\n", c1, time_codes[c1],
+		c2, time_codes[c2], tca->bank[bank].ontime);
+	set_code(tca, TCA6507_FADE_ON, bank, c2);
+	set_code(tca, TCA6507_FULL_ON, bank, c1);
+	tca->bank[bank].ontime = result;
+
+	result = choose_times(tca->bank[bank].offtime, &c1, &c2);
+	dev_dbg(&tca->client->dev,
+		"Chose off times %d(%d) %d(%d) for %dms\n", c1, time_codes[c1],
+		c2, time_codes[c2], tca->bank[bank].offtime);
+	set_code(tca, TCA6507_FADE_OFF, bank, c2);
+	set_code(tca, TCA6507_FIRST_OFF, bank, c1);
+	set_code(tca, TCA6507_SECOND_OFF, bank, c1);
+	tca->bank[bank].offtime = result;
+
+	set_code(tca, TCA6507_INITIALIZE, bank, INIT_CODE);
+}
+
+/* Write all needed register of tca6507 */
+
+static void tca6507_work(struct work_struct *work)
+{
+	struct tca6507_chip *tca = container_of(work, struct tca6507_chip,
+						work);
+	struct i2c_client *cl = tca->client;
+	int set;
+	u8 file[TCA6507_REG_CNT];
+	int r;
+
+	spin_lock_irq(&tca->lock);
+	set = tca->reg_set;
+	memcpy(file, tca->reg_file, TCA6507_REG_CNT);
+	tca->reg_set = 0;
+	spin_unlock_irq(&tca->lock);
+
+	for (r = 0; r < TCA6507_REG_CNT; r++)
+		if (set & (1<<r))
+			i2c_smbus_write_byte_data(cl, r, file[r]);
+}
+
+static void led_release(struct tca6507_led *led)
+{
+	/* If led owns any resource, release it. */
+	struct tca6507_chip *tca = led->chip;
+	if (led->bank >= 0) {
+		struct bank *b = tca->bank + led->bank;
+		if (led->blink)
+			b->time_use--;
+		b->level_use--;
+	}
+	led->blink = 0;
+	led->bank = -1;
+}
+
+static int led_prepare(struct tca6507_led *led)
+{
+	/* Assign this led to a bank, configuring that bank if necessary. */
+	int level = TO_LEVEL(led->led_cdev.brightness);
+	struct tca6507_chip *tca = led->chip;
+	int c1, c2;
+	int i;
+	struct bank *b;
+	int need_init = 0;
+
+	led->led_cdev.brightness = TO_BRIGHT(level);
+	if (level == 0) {
+		set_select(tca, led->num, TCA6507_LS_LED_OFF);
+		return 0;
+	}
+
+	if (led->ontime == 0 || led->offtime == 0) {
+		/*
+		 * Just set the brightness, choosing first usable bank.
+		 * If none perfect, choose best.
+		 * Count backwards so we check MASTER bank first
+		 * to avoid wasting a timer.
+		 */
+		int best = -1;/* full-on */
+		int diff = 15-level;
+
+		if (level == 15) {
+			set_select(tca, led->num, TCA6507_LS_LED_ON);
+			return 0;
+		}
+
+		for (i = MASTER; i >= BANK0; i--) {
+			int d;
+			if (tca->bank[i].level == level ||
+			    tca->bank[i].level_use == 0) {
+				best = i;
+				break;
+			}
+			d = abs(level - tca->bank[i].level);
+			if (d < diff) {
+				diff = d;
+				best = i;
+			}
+		}
+		if (best == -1) {
+			/* Best brightness is full-on */
+			set_select(tca, led->num, TCA6507_LS_LED_ON);
+			led->led_cdev.brightness = LED_FULL;
+			return 0;
+		}
+
+		if (!tca->bank[best].level_use)
+			set_level(tca, best, level);
+
+		tca->bank[best].level_use++;
+		led->bank = best;
+		set_select(tca, led->num, bank_source[best]);
+		led->led_cdev.brightness = TO_BRIGHT(tca->bank[best].level);
+		return 0;
+	}
+
+	/*
+	 * We have on/off time so we need to try to allocate a timing bank.
+	 * First check if times are compatible with hardware and give up if
+	 * not.
+	 */
+	if (choose_times(led->ontime, &c1, &c2) < 0)
+		return -EINVAL;
+	if (choose_times(led->offtime, &c1, &c2) < 0)
+		return -EINVAL;
+
+	for (i = BANK0; i <= BANK1; i++) {
+		if (tca->bank[i].level_use == 0)
+			/* not in use - it is ours! */
+			break;
+		if (tca->bank[i].level != level)
+			/* Incompatible level - skip */
+			/* FIX: if timer matches we maybe should consider
+			 * this anyway...
+			 */
+			continue;
+
+		if (tca->bank[i].time_use == 0)
+			/* Timer not in use, and level matches - use it */
+			break;
+
+		if (!(tca->bank[i].on_dflt ||
+		      led->on_dflt ||
+		      tca->bank[i].ontime == led->ontime))
+			/* on time is incompatible */
+			continue;
+
+		if (!(tca->bank[i].off_dflt ||
+		      led->off_dflt ||
+		      tca->bank[i].offtime == led->offtime))
+			/* off time is incompatible */
+			continue;
+
+		/* looks like a suitable match */
+		break;
+	}
+
+	if (i > BANK1)
+		/* Nothing matches - how sad */
+		return -EINVAL;
+
+	b = &tca->bank[i];
+	if (b->level_use == 0)
+		set_level(tca, i, level);
+	b->level_use++;
+	led->bank = i;
+
+	if (b->on_dflt ||
+	    !led->on_dflt ||
+	    b->time_use == 0) {
+		b->ontime = led->ontime;
+		b->on_dflt = led->on_dflt;
+		need_init = 1;
+	}
+
+	if (b->off_dflt ||
+	    !led->off_dflt ||
+	    b->time_use == 0) {
+		b->offtime = led->offtime;
+		b->off_dflt = led->off_dflt;
+		need_init = 1;
+	}
+
+	if (need_init)
+		set_times(tca, i);
+
+	led->ontime = b->ontime;
+	led->offtime = b->offtime;
+
+	b->time_use++;
+	led->blink = 1;
+	led->led_cdev.brightness = TO_BRIGHT(b->level);
+	set_select(tca, led->num, blink_source[i]);
+	return 0;
+}
+
+static int led_assign(struct tca6507_led *led)
+{
+	struct tca6507_chip *tca = led->chip;
+	int err;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tca->lock, flags);
+	led_release(led);
+	err = led_prepare(led);
+	if (err) {
+		/*
+		 * Can only fail on timer setup.  In that case we need to
+		 * re-establish as steady level.
+		 */
+		led->ontime = 0;
+		led->offtime = 0;
+		led_prepare(led);
+	}
+	spin_unlock_irqrestore(&tca->lock, flags);
+
+	if (tca->reg_set)
+		schedule_work(&tca->work);
+	return err;
+}
+
+static void tca6507_brightness_set(struct led_classdev *led_cdev,
+				   enum led_brightness brightness)
+{
+	struct tca6507_led *led = container_of(led_cdev, struct tca6507_led,
+					       led_cdev);
+	led->led_cdev.brightness = brightness;
+	led->ontime = 0;
+	led->offtime = 0;
+	led_assign(led);
+}
+
+static int tca6507_blink_set(struct led_classdev *led_cdev,
+			     unsigned long *delay_on,
+			     unsigned long *delay_off)
+{
+	struct tca6507_led *led = container_of(led_cdev, struct tca6507_led,
+					       led_cdev);
+
+	if (*delay_on == 0)
+		led->on_dflt = 1;
+	else if (delay_on != &led_cdev->blink_delay_on)
+		led->on_dflt = 0;
+	led->ontime = *delay_on;
+
+	if (*delay_off == 0)
+		led->off_dflt = 1;
+	else if (delay_off != &led_cdev->blink_delay_off)
+		led->off_dflt = 0;
+	led->offtime = *delay_off;
+
+	if (led->ontime == 0)
+		led->ontime = 512;
+	if (led->offtime == 0)
+		led->offtime = 512;
+
+	if (led->led_cdev.brightness == LED_OFF)
+		led->led_cdev.brightness = LED_FULL;
+	if (led_assign(led) < 0) {
+		led->ontime = 0;
+		led->offtime = 0;
+		led->led_cdev.brightness = LED_OFF;
+		return -EINVAL;
+	}
+	*delay_on = led->ontime;
+	*delay_off = led->offtime;
+	return 0;
+}
+
+#ifdef CONFIG_GPIOLIB
+static void tca6507_gpio_set_value(struct gpio_chip *gc,
+				   unsigned offset, int val)
+{
+	struct tca6507_chip *tca = container_of(gc, struct tca6507_chip, gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tca->lock, flags);
+	/*
+	 * 'OFF' is floating high, and 'ON' is pulled down, so it has the
+	 * inverse sense of 'val'.
+	 */
+	set_select(tca, tca->gpio_map[offset],
+		   val ? TCA6507_LS_LED_OFF : TCA6507_LS_LED_ON);
+	spin_unlock_irqrestore(&tca->lock, flags);
+	if (tca->reg_set)
+		schedule_work(&tca->work);
+}
+
+static int tca6507_gpio_direction_output(struct gpio_chip *gc,
+					  unsigned offset, int val)
+{
+	tca6507_gpio_set_value(gc, offset, val);
+	return 0;
+}
+
+static int tca6507_probe_gpios(struct i2c_client *client,
+			       struct tca6507_chip *tca,
+			       struct tca6507_platform_data *pdata)
+{
+	int err;
+	int i = 0;
+	int gpios = 0;
+
+	for (i = 0; i < NUM_LEDS; i++)
+		if (pdata->leds.leds[i].name && pdata->leds.leds[i].flags) {
+			/* Configure as a gpio */
+			tca->gpio_name[gpios] = pdata->leds.leds[i].name;
+			tca->gpio_map[gpios] = i;
+			gpios++;
+		}
+
+	if (!gpios)
+		return 0;
+
+	tca->gpio.label = "gpio-tca6507";
+	tca->gpio.names = tca->gpio_name;
+	tca->gpio.ngpio = gpios;
+	tca->gpio.base = pdata->gpio_base;
+	tca->gpio.owner = THIS_MODULE;
+	tca->gpio.direction_output = tca6507_gpio_direction_output;
+	tca->gpio.set = tca6507_gpio_set_value;
+	tca->gpio.dev = &client->dev;
+	err = gpiochip_add(&tca->gpio);
+	if (err) {
+		tca->gpio.ngpio = 0;
+		return err;
+	}
+	if (pdata->setup)
+		pdata->setup(tca->gpio.base, tca->gpio.ngpio);
+	return 0;
+}
+
+static void tca6507_remove_gpio(struct tca6507_chip *tca)
+{
+	if (tca->gpio.ngpio) {
+		int err = gpiochip_remove(&tca->gpio);
+		dev_err(&tca->client->dev, "%s failed, %d\n",
+			"gpiochip_remove()", err);
+	}
+}
+#else /* CONFIG_GPIOLIB */
+static int tca6507_probe_gpios(struct i2c_client *client,
+			       struct tca6507_chip *tca,
+			       struct tca6507_platform_data *pdata)
+{
+	return 0;
+}
+static void tca6507_remove_gpio(struct tca6507_chip *tca)
+{
+}
+#endif /* CONFIG_GPIOLIB */
+
+static int __devinit tca6507_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct tca6507_chip *tca;
+	struct i2c_adapter *adapter;
+	struct tca6507_platform_data *pdata;
+	int err;
+	int i = 0;
+
+	adapter = to_i2c_adapter(client->dev.parent);
+	pdata = client->dev.platform_data;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
+		return -EIO;
+
+	if (!pdata || pdata->leds.num_leds != NUM_LEDS) {
+		dev_err(&client->dev, "Need %d entries in platform-data list\n",
+			NUM_LEDS);
+		return -ENODEV;
+	}
+	err = -ENOMEM;
+	tca = kzalloc(sizeof(*tca), GFP_KERNEL);
+	if (!tca)
+		goto exit;
+
+	tca->client = client;
+	INIT_WORK(&tca->work, tca6507_work);
+	spin_lock_init(&tca->lock);
+	i2c_set_clientdata(client, tca);
+
+	for (i = 0; i < NUM_LEDS; i++) {
+		struct tca6507_led *l = tca->leds + i;
+
+		l->chip = tca;
+		l->num = i;
+		if (pdata->leds.leds[i].name && !pdata->leds.leds[i].flags) {
+			l->led_cdev.name = pdata->leds.leds[i].name;
+			l->led_cdev.default_trigger
+				= pdata->leds.leds[i].default_trigger;
+			l->led_cdev.brightness_set = tca6507_brightness_set;
+			l->led_cdev.blink_set = tca6507_blink_set;
+			l->bank = -1;
+			err = led_classdev_register(&client->dev,
+						    &l->led_cdev);
+			if (err < 0)
+				goto exit;
+		}
+	}
+	err = tca6507_probe_gpios(client, tca, pdata);
+	if (err)
+		goto exit;
+	/* set all registers to known state - zero */
+	tca->reg_set = 0x7f;
+	schedule_work(&tca->work);
+
+	return 0;
+exit:
+	while (i--)
+		if (tca->leds[i].led_cdev.name)
+			led_classdev_unregister(&tca->leds[i].led_cdev);
+	cancel_work_sync(&tca->work);
+	i2c_set_clientdata(client, NULL);
+	kfree(tca);
+	return err;
+}
+
+static int __devexit tca6507_remove(struct i2c_client *client)
+{
+	int i;
+	struct tca6507_chip *tca = i2c_get_clientdata(client);
+	struct tca6507_led *tca_leds = tca->leds;
+
+	for (i = 0; i < NUM_LEDS; i++) {
+		if (tca_leds[i].led_cdev.name)
+			led_classdev_unregister(&tca_leds[i].led_cdev);
+	}
+	tca6507_remove_gpio(tca);
+	cancel_work_sync(&tca->work);
+	i2c_set_clientdata(client, NULL);
+	kfree(tca);
+
+	return 0;
+}
+
+static struct i2c_driver tca6507_driver = {
+	.driver   = {
+		.name    = "leds-tca6507",
+		.owner   = THIS_MODULE,
+	},
+	.probe    = tca6507_probe,
+	.remove   = __devexit_p(tca6507_remove),
+	.id_table = tca6507_id,
+};
+
+static int __init tca6507_leds_init(void)
+{
+	return i2c_add_driver(&tca6507_driver);
+}
+
+static void __exit tca6507_leds_exit(void)
+{
+	i2c_del_driver(&tca6507_driver);
+}
+
+module_init(tca6507_leds_init);
+module_exit(tca6507_leds_exit);
+
+MODULE_AUTHOR("NeilBrown <neilb@suse.de>");
+MODULE_DESCRIPTION("TCA6507 LED/GPO driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/leds-tca6507.h b/include/linux/leds-tca6507.h
new file mode 100644
index 000000000000..dcabf4fa2aef
--- /dev/null
+++ b/include/linux/leds-tca6507.h
@@ -0,0 +1,34 @@
+/*
+ * TCA6507 LED chip driver.
+ *
+ * Copyright (C) 2011 Neil Brown <neil@brown.name>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef __LINUX_TCA6507_H
+#define __LINUX_TCA6507_H
+#include <linux/leds.h>
+
+struct tca6507_platform_data {
+	struct led_platform_data leds;
+#ifdef CONFIG_GPIOLIB
+	int gpio_base;
+	void (*setup)(unsigned gpio_base, unsigned ngpio);
+#endif
+};
+
+#define	TCA6507_MAKE_GPIO 1
+#endif /* __LINUX_TCA6507_H*/
-- 
cgit v1.2.3


From 5e6292c0f28f03dfdb8ea3d685f0b838a23bfba4 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 10 Jan 2012 15:11:17 -0800
Subject: signal: add block_sigmask() for adding sigmask to current->blocked

Abstract the code sequence for adding a signal handler's sa_mask to
current->blocked because the sequence is identical for all architectures.
Furthermore, in the past some architectures actually got this code wrong,
so introduce a wrapper that all architectures can use.

Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/signal.c |  6 +-----
 include/linux/signal.h   |  1 +
 kernel/signal.c          | 21 +++++++++++++++++++++
 3 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 54ddaeb221c1..46a01bdc27e2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -682,7 +682,6 @@ static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 		struct pt_regs *regs)
 {
-	sigset_t blocked;
 	int ret;
 
 	/* Are we from a system call? */
@@ -733,10 +732,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
 
-	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&blocked, sig);
-	set_current_blocked(&blocked);
+	block_sigmask(ka, sig);
 
 	tracehook_signal_handler(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLESTEP));
diff --git a/include/linux/signal.h b/include/linux/signal.h
index a822300a253b..7987ce74874b 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -254,6 +254,7 @@ extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
+extern void block_sigmask(struct k_sigaction *ka, int signr);
 extern void exit_signals(struct task_struct *tsk);
 
 extern struct kmem_cache *sighand_cachep;
diff --git a/kernel/signal.c b/kernel/signal.c
index bb0efa5705ed..d532f1709fbf 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2318,6 +2318,27 @@ relock:
 	return signr;
 }
 
+/**
+ * block_sigmask - add @ka's signal mask to current->blocked
+ * @ka: action for @signr
+ * @signr: signal that has been successfully delivered
+ *
+ * This function should be called when a signal has succesfully been
+ * delivered. It adds the mask of signals for @ka to current->blocked
+ * so that they are blocked during the execution of the signal
+ * handler. In addition, @signr will be blocked unless %SA_NODEFER is
+ * set in @ka->sa.sa_flags.
+ */
+void block_sigmask(struct k_sigaction *ka, int signr)
+{
+	sigset_t blocked;
+
+	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&blocked, signr);
+	set_current_blocked(&blocked);
+}
+
 /*
  * It could be that complete_signal() picked us to notify about the
  * group-wide signal. Other threads should be notified now to take
-- 
cgit v1.2.3


From 7773fbc54182a90cd248656619c7d33859e5f91d Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 10 Jan 2012 15:11:20 -0800
Subject: procfs: make proc_get_link to use dentry instead of inode

Prepare the ground for the next "map_files" patch which needs a name of a
link file to analyse.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c          | 20 ++++++++++----------
 include/linux/proc_fs.h |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1aab5fe05a1b..e31d95055c67 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -166,9 +166,9 @@ static int get_task_root(struct task_struct *task, struct path *root)
 	return result;
 }
 
-static int proc_cwd_link(struct inode *inode, struct path *path)
+static int proc_cwd_link(struct dentry *dentry, struct path *path)
 {
-	struct task_struct *task = get_proc_task(inode);
+	struct task_struct *task = get_proc_task(dentry->d_inode);
 	int result = -ENOENT;
 
 	if (task) {
@@ -183,9 +183,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path)
 	return result;
 }
 
-static int proc_root_link(struct inode *inode, struct path *path)
+static int proc_root_link(struct dentry *dentry, struct path *path)
 {
-	struct task_struct *task = get_proc_task(inode);
+	struct task_struct *task = get_proc_task(dentry->d_inode);
 	int result = -ENOENT;
 
 	if (task) {
@@ -1456,13 +1456,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
 	.release	= single_release,
 };
 
-static int proc_exe_link(struct inode *inode, struct path *exe_path)
+static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
 {
 	struct task_struct *task;
 	struct mm_struct *mm;
 	struct file *exe_file;
 
-	task = get_proc_task(inode);
+	task = get_proc_task(dentry->d_inode);
 	if (!task)
 		return -ENOENT;
 	mm = get_task_mm(task);
@@ -1492,7 +1492,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
+	error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
 out:
 	return ERR_PTR(error);
 }
@@ -1531,7 +1531,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(inode, &path);
+	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
 	if (error)
 		goto out;
 
@@ -1823,9 +1823,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
 	return -ENOENT;
 }
 
-static int proc_fd_link(struct inode *inode, struct path *path)
+static int proc_fd_link(struct dentry *dentry, struct path *path)
 {
-	return proc_fd_info(inode, path, NULL);
+	return proc_fd_info(dentry->d_inode, path, NULL);
 }
 
 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 6d9e575519cc..85c507306239 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -253,7 +253,7 @@ extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
 
 union proc_op {
-	int (*proc_get_link)(struct inode *, struct path *);
+	int (*proc_get_link)(struct dentry *, struct path *);
 	int (*proc_read)(struct task_struct *task, char *page);
 	int (*proc_show)(struct seq_file *m,
 		struct pid_namespace *ns, struct pid *pid,
-- 
cgit v1.2.3


From 640708a2cff7f81e246243b0073c66e6ece7e53e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 10 Jan 2012 15:11:23 -0800
Subject: procfs: introduce the /proc/<pid>/map_files/ directory

This one behaves similarly to the /proc/<pid>/fd/ one - it contains
symlinks one for each mapping with file, the name of a symlink is
"vma->vm_start-vma->vm_end", the target is the file.  Opening a symlink
results in a file that point exactly to the same inode as them vma's one.

For example the ls -l of some arbitrary /proc/<pid>/map_files/

 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80403000-7f8f80404000 -> /lib64/libc-2.5.so
 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f8061e000-7f8f80620000 -> /lib64/libselinux.so.1
 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80826000-7f8f80827000 -> /lib64/libacl.so.1.1.0
 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80a2f000-7f8f80a30000 -> /lib64/librt-2.5.so
 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80a30000-7f8f80a4c000 -> /lib64/ld-2.5.so

This *helps* checkpointing process in three ways:

1. When dumping a task mappings we do know exact file that is mapped
   by particular region.  We do this by opening
   /proc/$pid/map_files/$address symlink the way we do with file
   descriptors.

2. This also helps in determining which anonymous shared mappings are
   shared with each other by comparing the inodes of them.

3. When restoring a set of processes in case two of them has a mapping
   shared, we map the memory by the 1st one and then open its
   /proc/$pid/map_files/$address file and map it by the 2nd task.

Using /proc/$pid/maps for this is quite inconvenient since it brings
repeatable re-reading and reparsing for this text file which slows down
restore procedure significantly.  Also as being pointed in (3) it is a way
easier to use top level shared mapping in children as
/proc/$pid/map_files/$address when needed.

[akpm@linux-foundation.org: coding-style fixes]
[gorcunov@openvz.org: make map_files depend on CHECKPOINT_RESTORE]
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Vasiliy Kulikov <segoon@openwall.com>
Reviewed-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Tejun Heo <tj@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c     | 355 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h |  12 ++
 2 files changed, 367 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index e31d95055c67..4d755fed3ecb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -83,6 +83,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#include <linux/flex_array.h>
 #ifdef CONFIG_HARDWALL
 #include <asm/hardwall.h>
 #endif
@@ -134,6 +135,8 @@ struct pid_entry {
 		NULL, &proc_single_file_operations,	\
 		{ .proc_show = show } )
 
+static int proc_fd_permission(struct inode *inode, int mask);
+
 /*
  * Count the number of hardlinks for the pid_entry table, excluding the .
  * and .. links.
@@ -2046,6 +2049,355 @@ static const struct file_operations proc_fd_operations = {
 	.llseek		= default_llseek,
 };
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+
+/*
+ * dname_to_vma_addr - maps a dentry name into two unsigned longs
+ * which represent vma start and end addresses.
+ */
+static int dname_to_vma_addr(struct dentry *dentry,
+			     unsigned long *start, unsigned long *end)
+{
+	if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	unsigned long vm_start, vm_end;
+	bool exact_vma_exists = false;
+	struct mm_struct *mm = NULL;
+	struct task_struct *task;
+	const struct cred *cred;
+	struct inode *inode;
+	int status = 0;
+
+	if (nd && nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		status = -EACCES;
+		goto out_notask;
+	}
+
+	inode = dentry->d_inode;
+	task = get_proc_task(inode);
+	if (!task)
+		goto out_notask;
+
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto out;
+
+	mm = get_task_mm(task);
+	if (!mm)
+		goto out;
+
+	if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
+		down_read(&mm->mmap_sem);
+		exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
+		up_read(&mm->mmap_sem);
+	}
+
+	mmput(mm);
+
+	if (exact_vma_exists) {
+		if (task_dumpable(task)) {
+			rcu_read_lock();
+			cred = __task_cred(task);
+			inode->i_uid = cred->euid;
+			inode->i_gid = cred->egid;
+			rcu_read_unlock();
+		} else {
+			inode->i_uid = 0;
+			inode->i_gid = 0;
+		}
+		security_task_to_inode(task, inode);
+		status = 1;
+	}
+
+out:
+	put_task_struct(task);
+
+out_notask:
+	if (status <= 0)
+		d_drop(dentry);
+
+	return status;
+}
+
+static const struct dentry_operations tid_map_files_dentry_operations = {
+	.d_revalidate	= map_files_d_revalidate,
+	.d_delete	= pid_delete_dentry,
+};
+
+static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
+{
+	unsigned long vm_start, vm_end;
+	struct vm_area_struct *vma;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	int rc;
+
+	rc = -ENOENT;
+	task = get_proc_task(dentry->d_inode);
+	if (!task)
+		goto out;
+
+	mm = get_task_mm(task);
+	put_task_struct(task);
+	if (!mm)
+		goto out;
+
+	rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
+	if (rc)
+		goto out_mmput;
+
+	down_read(&mm->mmap_sem);
+	vma = find_exact_vma(mm, vm_start, vm_end);
+	if (vma && vma->vm_file) {
+		*path = vma->vm_file->f_path;
+		path_get(path);
+		rc = 0;
+	}
+	up_read(&mm->mmap_sem);
+
+out_mmput:
+	mmput(mm);
+out:
+	return rc;
+}
+
+struct map_files_info {
+	struct file	*file;
+	unsigned long	len;
+	unsigned char	name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
+};
+
+static struct dentry *
+proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
+			   struct task_struct *task, const void *ptr)
+{
+	const struct file *file = ptr;
+	struct proc_inode *ei;
+	struct inode *inode;
+
+	if (!file)
+		return ERR_PTR(-ENOENT);
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		return ERR_PTR(-ENOENT);
+
+	ei = PROC_I(inode);
+	ei->op.proc_get_link = proc_map_files_get_link;
+
+	inode->i_op = &proc_pid_link_inode_operations;
+	inode->i_size = 64;
+	inode->i_mode = S_IFLNK;
+
+	if (file->f_mode & FMODE_READ)
+		inode->i_mode |= S_IRUSR;
+	if (file->f_mode & FMODE_WRITE)
+		inode->i_mode |= S_IWUSR;
+
+	d_set_d_op(dentry, &tid_map_files_dentry_operations);
+	d_add(dentry, inode);
+
+	return NULL;
+}
+
+static struct dentry *proc_map_files_lookup(struct inode *dir,
+		struct dentry *dentry, struct nameidata *nd)
+{
+	unsigned long vm_start, vm_end;
+	struct vm_area_struct *vma;
+	struct task_struct *task;
+	struct dentry *result;
+	struct mm_struct *mm;
+
+	result = ERR_PTR(-EACCES);
+	if (!capable(CAP_SYS_ADMIN))
+		goto out;
+
+	result = ERR_PTR(-ENOENT);
+	task = get_proc_task(dir);
+	if (!task)
+		goto out;
+
+	result = ERR_PTR(-EACCES);
+	if (lock_trace(task))
+		goto out_put_task;
+
+	result = ERR_PTR(-ENOENT);
+	if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
+		goto out_unlock;
+
+	mm = get_task_mm(task);
+	if (!mm)
+		goto out_unlock;
+
+	down_read(&mm->mmap_sem);
+	vma = find_exact_vma(mm, vm_start, vm_end);
+	if (!vma)
+		goto out_no_vma;
+
+	result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
+
+out_no_vma:
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+out_unlock:
+	unlock_trace(task);
+out_put_task:
+	put_task_struct(task);
+out:
+	return result;
+}
+
+static const struct inode_operations proc_map_files_inode_operations = {
+	.lookup		= proc_map_files_lookup,
+	.permission	= proc_fd_permission,
+	.setattr	= proc_setattr,
+};
+
+static int
+proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	struct vm_area_struct *vma;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	ino_t ino;
+	int ret;
+
+	ret = -EACCES;
+	if (!capable(CAP_SYS_ADMIN))
+		goto out;
+
+	ret = -ENOENT;
+	task = get_proc_task(inode);
+	if (!task)
+		goto out;
+
+	ret = -EACCES;
+	if (lock_trace(task))
+		goto out_put_task;
+
+	ret = 0;
+	switch (filp->f_pos) {
+	case 0:
+		ino = inode->i_ino;
+		if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
+			goto out_unlock;
+		filp->f_pos++;
+	case 1:
+		ino = parent_ino(dentry);
+		if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+			goto out_unlock;
+		filp->f_pos++;
+	default:
+	{
+		unsigned long nr_files, pos, i;
+		struct flex_array *fa = NULL;
+		struct map_files_info info;
+		struct map_files_info *p;
+
+		mm = get_task_mm(task);
+		if (!mm)
+			goto out_unlock;
+		down_read(&mm->mmap_sem);
+
+		nr_files = 0;
+
+		/*
+		 * We need two passes here:
+		 *
+		 *  1) Collect vmas of mapped files with mmap_sem taken
+		 *  2) Release mmap_sem and instantiate entries
+		 *
+		 * otherwise we get lockdep complained, since filldir()
+		 * routine might require mmap_sem taken in might_fault().
+		 */
+
+		for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
+			if (vma->vm_file && ++pos > filp->f_pos)
+				nr_files++;
+		}
+
+		if (nr_files) {
+			fa = flex_array_alloc(sizeof(info), nr_files,
+						GFP_KERNEL);
+			if (!fa || flex_array_prealloc(fa, 0, nr_files,
+							GFP_KERNEL)) {
+				ret = -ENOMEM;
+				if (fa)
+					flex_array_free(fa);
+				up_read(&mm->mmap_sem);
+				mmput(mm);
+				goto out_unlock;
+			}
+			for (i = 0, vma = mm->mmap, pos = 2; vma;
+					vma = vma->vm_next) {
+				if (!vma->vm_file)
+					continue;
+				if (++pos <= filp->f_pos)
+					continue;
+
+				get_file(vma->vm_file);
+				info.file = vma->vm_file;
+				info.len = snprintf(info.name,
+						sizeof(info.name), "%lx-%lx",
+						vma->vm_start, vma->vm_end);
+				if (flex_array_put(fa, i++, &info, GFP_KERNEL))
+					BUG();
+			}
+		}
+		up_read(&mm->mmap_sem);
+
+		for (i = 0; i < nr_files; i++) {
+			p = flex_array_get(fa, i);
+			ret = proc_fill_cache(filp, dirent, filldir,
+					      p->name, p->len,
+					      proc_map_files_instantiate,
+					      task, p->file);
+			if (ret)
+				break;
+			filp->f_pos++;
+			fput(p->file);
+		}
+		for (; i < nr_files; i++) {
+			/*
+			 * In case of error don't forget
+			 * to put rest of file refs.
+			 */
+			p = flex_array_get(fa, i);
+			fput(p->file);
+		}
+		if (fa)
+			flex_array_free(fa);
+		mmput(mm);
+	}
+	}
+
+out_unlock:
+	unlock_trace(task);
+out_put_task:
+	put_task_struct(task);
+out:
+	return ret;
+}
+
+static const struct file_operations proc_map_files_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_map_files_readdir,
+	.llseek		= default_llseek,
+};
+
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
 /*
  * /proc/pid/fd needs a special permission handler so that a process can still
  * access /proc/self/fd after it has executed a setuid().
@@ -2661,6 +3013,9 @@ static const struct inode_operations proc_task_inode_operations;
 static const struct pid_entry tgid_base_stuff[] = {
 	DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
 	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
+#endif
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
 	DIR("ns",	  S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 #ifdef CONFIG_NET
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5568553a41fd..6eba2cc016c9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1482,6 +1482,18 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
+/* Look up the first VMA which exactly match the interval vm_start ... vm_end */
+static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
+				unsigned long vm_start, unsigned long vm_end)
+{
+	struct vm_area_struct *vma = find_vma(mm, vm_start);
+
+	if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end))
+		vma = NULL;
+
+	return vma;
+}
+
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
 #else
-- 
cgit v1.2.3


From 0499680a42141d86417a8fbaa8c8db806bea1201 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Tue, 10 Jan 2012 15:11:31 -0800
Subject: procfs: add hidepid= and gid= mount options

Add support for mount options to restrict access to /proc/PID/
directories.  The default backward-compatible "relaxed" behaviour is left
untouched.

The first mount option is called "hidepid" and its value defines how much
info about processes we want to be available for non-owners:

hidepid=0 (default) means the old behavior - anybody may read all
world-readable /proc/PID/* files.

hidepid=1 means users may not access any /proc/<pid>/ directories, but
their own.  Sensitive files like cmdline, sched*, status are now protected
against other users.  As permission checking done in proc_pid_permission()
and files' permissions are left untouched, programs expecting specific
files' modes are not confused.

hidepid=2 means hidepid=1 plus all /proc/PID/ will be invisible to other
users.  It doesn't mean that it hides whether a process exists (it can be
learned by other means, e.g.  by kill -0 $PID), but it hides process' euid
and egid.  It compicates intruder's task of gathering info about running
processes, whether some daemon runs with elevated privileges, whether
another user runs some sensitive program, whether other users run any
program at all, etc.

gid=XXX defines a group that will be able to gather all processes' info
(as in hidepid=0 mode).  This group should be used instead of putting
nonroot user in sudoers file or something.  However, untrusted users (like
daemons, etc.) which are not supposed to monitor the tasks in the whole
system should not be added to the group.

hidepid=1 or higher is designed to restrict access to procfs files, which
might reveal some sensitive private information like precise keystrokes
timings:

http://www.openwall.com/lists/oss-security/2011/11/05/3

hidepid=1/2 doesn't break monitoring userspace tools.  ps, top, pgrep, and
conky gracefully handle EPERM/ENOENT and behave as if the current user is
the only user running processes.  pstree shows the process subtree which
contains "pstree" process.

Note: the patch doesn't deal with setuid/setgid issues of keeping
preopened descriptors of procfs files (like
https://lkml.org/lkml/2011/2/7/368).  We rely on that the leaked
information like the scheduling counters of setuid apps doesn't threaten
anybody's privacy - only the user started the setuid program may read the
counters.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Greg KH <greg@kroah.com>
Cc: Theodore Tso <tytso@MIT.EDU>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: James Morris <jmorris@namei.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 39 +++++++++++++++++++++
 fs/proc/base.c                     | 69 +++++++++++++++++++++++++++++++++++++-
 fs/proc/inode.c                    |  8 +++++
 fs/proc/root.c                     | 21 ++++++++++--
 include/linux/pid_namespace.h      |  2 ++
 5 files changed, 135 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 0ec91f03422e..12fee132fbe2 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -41,6 +41,8 @@ Table of Contents
   3.5	/proc/<pid>/mountinfo - Information about mounts
   3.6	/proc/<pid>/comm  & /proc/<pid>/task/<tid>/comm
 
+  4	Configuring procfs
+  4.1	Mount options
 
 ------------------------------------------------------------------------------
 Preface
@@ -1542,3 +1544,40 @@ a task to set its own or one of its thread siblings comm value. The comm value
 is limited in size compared to the cmdline value, so writing anything longer
 then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated
 comm value.
+
+
+------------------------------------------------------------------------------
+Configuring procfs
+------------------------------------------------------------------------------
+
+4.1	Mount options
+---------------------
+
+The following mount options are supported:
+
+	hidepid=	Set /proc/<pid>/ access mode.
+	gid=		Set the group authorized to learn processes information.
+
+hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories
+(default).
+
+hidepid=1 means users may not access any /proc/<pid>/ directories but their
+own.  Sensitive files like cmdline, sched*, status are now protected against
+other users.  This makes it impossible to learn whether any user runs
+specific program (given the program doesn't reveal itself by its behaviour).
+As an additional bonus, as /proc/<pid>/cmdline is unaccessible for other users,
+poorly written programs passing sensitive information via program arguments are
+now protected against local eavesdroppers.
+
+hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be fully invisible to other
+users.  It doesn't mean that it hides a fact whether a process with a specific
+pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"),
+but it hides process' uid and gid, which may be learned by stat()'ing
+/proc/<pid>/ otherwise.  It greatly complicates an intruder's task of gathering
+information about running processes, whether some daemon runs with elevated
+privileges, whether other user runs some sensitive program, whether other users
+run any program at all, etc.
+
+gid= defines a group authorized to learn processes information otherwise
+prohibited by hidepid=.  If you use some daemon like identd which needs to learn
+information about processes information, just add identd to this group.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4d755fed3ecb..8173dfd89cb2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -631,6 +631,50 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
 	return 0;
 }
 
+/*
+ * May current process learn task's sched/cmdline info (for hide_pid_min=1)
+ * or euid/egid (for hide_pid_min=2)?
+ */
+static bool has_pid_permissions(struct pid_namespace *pid,
+				 struct task_struct *task,
+				 int hide_pid_min)
+{
+	if (pid->hide_pid < hide_pid_min)
+		return true;
+	if (in_group_p(pid->pid_gid))
+		return true;
+	return ptrace_may_access(task, PTRACE_MODE_READ);
+}
+
+
+static int proc_pid_permission(struct inode *inode, int mask)
+{
+	struct pid_namespace *pid = inode->i_sb->s_fs_info;
+	struct task_struct *task;
+	bool has_perms;
+
+	task = get_proc_task(inode);
+	has_perms = has_pid_permissions(pid, task, 1);
+	put_task_struct(task);
+
+	if (!has_perms) {
+		if (pid->hide_pid == 2) {
+			/*
+			 * Let's make getdents(), stat(), and open()
+			 * consistent with each other.  If a process
+			 * may not stat() a file, it shouldn't be seen
+			 * in procfs at all.
+			 */
+			return -ENOENT;
+		}
+
+		return -EPERM;
+	}
+	return generic_permission(inode, mask);
+}
+
+
+
 static const struct inode_operations proc_def_inode_operations = {
 	.setattr	= proc_setattr,
 };
@@ -1615,6 +1659,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task;
 	const struct cred *cred;
+	struct pid_namespace *pid = dentry->d_sb->s_fs_info;
 
 	generic_fillattr(inode, stat);
 
@@ -1623,6 +1668,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	stat->gid = 0;
 	task = pid_task(proc_pid(inode), PIDTYPE_PID);
 	if (task) {
+		if (!has_pid_permissions(pid, task, 2)) {
+			rcu_read_unlock();
+			/*
+			 * This doesn't prevent learning whether PID exists,
+			 * it only makes getattr() consistent with readdir().
+			 */
+			return -ENOENT;
+		}
 		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
 		    task_dumpable(task)) {
 			cred = __task_cred(task);
@@ -3119,6 +3172,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
 	.lookup		= proc_tgid_base_lookup,
 	.getattr	= pid_getattr,
 	.setattr	= proc_setattr,
+	.permission	= proc_pid_permission,
 };
 
 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
@@ -3322,6 +3376,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
 				proc_pid_instantiate, iter.task, NULL);
 }
 
+static int fake_filldir(void *buf, const char *name, int namelen,
+			loff_t offset, u64 ino, unsigned d_type)
+{
+	return 0;
+}
+
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
@@ -3329,6 +3389,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct task_struct *reaper;
 	struct tgid_iter iter;
 	struct pid_namespace *ns;
+	filldir_t __filldir;
 
 	if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
 		goto out_no_task;
@@ -3350,8 +3411,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	for (iter = next_tgid(ns, iter);
 	     iter.task;
 	     iter.tgid += 1, iter = next_tgid(ns, iter)) {
+		if (has_pid_permissions(ns, iter.task, 2))
+			__filldir = filldir;
+		else
+			__filldir = fake_filldir;
+
 		filp->f_pos = iter.tgid + TGID_OFFSET;
-		if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
+		if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
 			put_task_struct(iter.task);
 			goto out;
 		}
@@ -3686,6 +3752,7 @@ static const struct inode_operations proc_task_inode_operations = {
 	.lookup		= proc_task_lookup,
 	.getattr	= proc_task_getattr,
 	.setattr	= proc_setattr,
+	.permission	= proc_pid_permission,
 };
 
 static const struct file_operations proc_task_operations = {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 27c762f34870..84fd3235a590 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -106,6 +106,14 @@ void __init proc_init_inodecache(void)
 
 static int proc_show_options(struct seq_file *seq, struct dentry *root)
 {
+	struct super_block *sb = root->d_sb;
+	struct pid_namespace *pid = sb->s_fs_info;
+
+	if (pid->pid_gid)
+		seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid);
+	if (pid->hide_pid != 0)
+		seq_printf(seq, ",hidepid=%u", pid->hide_pid);
+
 	return 0;
 }
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 6a8ac1d361a9..46a15d8a29ca 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -38,10 +38,12 @@ static int proc_set_super(struct super_block *sb, void *data)
 }
 
 enum {
-	Opt_err,
+	Opt_gid, Opt_hidepid, Opt_err,
 };
 
 static const match_table_t tokens = {
+	{Opt_hidepid, "hidepid=%u"},
+	{Opt_gid, "gid=%u"},
 	{Opt_err, NULL},
 };
 
@@ -49,8 +51,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
-
-	pr_debug("proc: options = %s\n", options);
+	int option;
 
 	if (!options)
 		return 1;
@@ -63,6 +64,20 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
 		args[0].to = args[0].from = 0;
 		token = match_token(p, tokens, args);
 		switch (token) {
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			pid->pid_gid = option;
+			break;
+		case Opt_hidepid:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option < 0 || option > 2) {
+				pr_err("proc: hidepid value must be between 0 and 2.\n");
+				return 0;
+			}
+			pid->hide_pid = option;
+			break;
 		default:
 			pr_err("proc: unrecognized mount option \"%s\" "
 			       "or missing value\n", p);
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 38d10326246a..e7cf6669ac34 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -30,6 +30,8 @@ struct pid_namespace {
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct bsd_acct_struct *bacct;
 #endif
+	gid_t pid_gid;
+	int hide_pid;
 };
 
 extern struct pid_namespace init_pid_ns;
-- 
cgit v1.2.3


From b196be89cdc14a88cc637cdad845a75c5886c82d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 10 Jan 2012 15:11:35 -0800
Subject: workqueue: make alloc_workqueue() take printf fmt and args for name

alloc_workqueue() currently expects the passed in @name pointer to remain
accessible.  This is inconvenient and a bit silly given that the whole wq
is being dynamically allocated.  This patch updates alloc_workqueue() and
friends to take printf format string instead of opaque string and matching
varargs at the end.  The name is allocated together with the wq and
formatted.

alloc_ordered_workqueue() is converted to a macro to unify varargs
handling with alloc_workqueue(), and, while at it, add comment to
alloc_workqueue().

None of the current in-kernel users pass in string with '%' as constant
name and this change shouldn't cause any problem.

[akpm@linux-foundation.org: use __printf]
Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 47 +++++++++++++++++++++++++++++++----------------
 kernel/workqueue.c        | 32 ++++++++++++++++++++++----------
 2 files changed, 53 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0d556deb497b..eb8b9f15f2e0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -297,32 +297,50 @@ extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
 
 extern struct workqueue_struct *
-__alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
-		      struct lock_class_key *key, const char *lock_name);
+__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
+	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
 
+/**
+ * alloc_workqueue - allocate a workqueue
+ * @fmt: printf format for the name of the workqueue
+ * @flags: WQ_* flags
+ * @max_active: max in-flight work items, 0 for default
+ * @args: args for @fmt
+ *
+ * Allocate a workqueue with the specified parameters.  For detailed
+ * information on WQ_* flags, please refer to Documentation/workqueue.txt.
+ *
+ * The __lock_name macro dance is to guarantee that single lock_class_key
+ * doesn't end up with different namesm, which isn't allowed by lockdep.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
 #ifdef CONFIG_LOCKDEP
-#define alloc_workqueue(name, flags, max_active)		\
+#define alloc_workqueue(fmt, flags, max_active, args...)	\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
 								\
-	if (__builtin_constant_p(name))				\
-		__lock_name = (name);				\
+	if (__builtin_constant_p(fmt))				\
+		__lock_name = (fmt);				\
 	else							\
-		__lock_name = #name;				\
+		__lock_name = #fmt;				\
 								\
-	__alloc_workqueue_key((name), (flags), (max_active),	\
-			      &__key, __lock_name);		\
+	__alloc_workqueue_key((fmt), (flags), (max_active),	\
+			      &__key, __lock_name, ##args);	\
 })
 #else
-#define alloc_workqueue(name, flags, max_active)		\
-	__alloc_workqueue_key((name), (flags), (max_active), NULL, NULL)
+#define alloc_workqueue(fmt, flags, max_active, args...)	\
+	__alloc_workqueue_key((fmt), (flags), (max_active),	\
+			      NULL, NULL, ##args)
 #endif
 
 /**
  * alloc_ordered_workqueue - allocate an ordered workqueue
- * @name: name of the workqueue
+ * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
+ * @args: args for @fmt
  *
  * Allocate an ordered workqueue.  An ordered workqueue executes at
  * most one work item at any given time in the queued order.  They are
@@ -331,11 +349,8 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
  * RETURNS:
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
-static inline struct workqueue_struct *
-alloc_ordered_workqueue(const char *name, unsigned int flags)
-{
-	return alloc_workqueue(name, WQ_UNBOUND | flags, 1);
-}
+#define alloc_ordered_workqueue(fmt, flags, args...)		\
+	alloc_workqueue(fmt, WQ_UNBOUND | (flags), 1, ##args)
 
 #define create_workqueue(name)					\
 	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 42fa9ad0a810..bec7b5b53e03 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -242,10 +242,10 @@ struct workqueue_struct {
 
 	int			nr_drainers;	/* W: drain in progress */
 	int			saved_max_active; /* W: saved cwq max_active */
-	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
+	char			name[];		/* I: workqueue name */
 };
 
 struct workqueue_struct *system_wq __read_mostly;
@@ -2954,14 +2954,29 @@ static int wq_clamp_max_active(int max_active, unsigned int flags,
 	return clamp_val(max_active, 1, lim);
 }
 
-struct workqueue_struct *__alloc_workqueue_key(const char *name,
+struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 					       unsigned int flags,
 					       int max_active,
 					       struct lock_class_key *key,
-					       const char *lock_name)
+					       const char *lock_name, ...)
 {
+	va_list args, args1;
 	struct workqueue_struct *wq;
 	unsigned int cpu;
+	size_t namelen;
+
+	/* determine namelen, allocate wq and format name */
+	va_start(args, lock_name);
+	va_copy(args1, args);
+	namelen = vsnprintf(NULL, 0, fmt, args) + 1;
+
+	wq = kzalloc(sizeof(*wq) + namelen, GFP_KERNEL);
+	if (!wq)
+		goto err;
+
+	vsnprintf(wq->name, namelen, fmt, args1);
+	va_end(args);
+	va_end(args1);
 
 	/*
 	 * Workqueues which may be used during memory reclaim should
@@ -2978,12 +2993,9 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 		flags |= WQ_HIGHPRI;
 
 	max_active = max_active ?: WQ_DFL_ACTIVE;
-	max_active = wq_clamp_max_active(max_active, flags, name);
-
-	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
-	if (!wq)
-		goto err;
+	max_active = wq_clamp_max_active(max_active, flags, wq->name);
 
+	/* init wq */
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
@@ -2991,7 +3003,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
 
-	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
@@ -3020,7 +3031,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 		if (!rescuer)
 			goto err;
 
-		rescuer->task = kthread_create(rescuer_thread, wq, "%s", name);
+		rescuer->task = kthread_create(rescuer_thread, wq, "%s",
+					       wq->name);
 		if (IS_ERR(rescuer->task))
 			goto err;
 
-- 
cgit v1.2.3


From 0e8caaceff160ad821c83d798fc03812cb810560 Mon Sep 17 00:00:00 2001
From: Tomasz Stanislawski <t.stanislaws@samsung.com>
Date: Wed, 10 Aug 2011 10:37:47 -0300
Subject: [media] v4l: add support for selection api

This patch introduces new api for a precise control of cropping and composing
features for video devices. The new ioctls are VIDIOC_S_SELECTION and
VIDIOC_G_SELECTION.

Signed-off-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-compat-ioctl32.c |  2 ++
 drivers/media/video/v4l2-ioctl.c          | 34 +++++++++++++++++++++++
 include/linux/videodev2.h                 | 46 +++++++++++++++++++++++++++++++
 include/media/v4l2-ioctl.h                |  4 +++
 4 files changed, 86 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index c68531b88279..af4419e6c658 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -985,6 +985,8 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	case VIDIOC_CROPCAP:
 	case VIDIOC_G_CROP:
 	case VIDIOC_S_CROP:
+	case VIDIOC_G_SELECTION:
+	case VIDIOC_S_SELECTION:
 	case VIDIOC_G_JPEGCOMP:
 	case VIDIOC_S_JPEGCOMP:
 	case VIDIOC_QUERYSTD:
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index 639abeee3392..072cfc1a166f 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -238,6 +238,8 @@ static const char *v4l2_ioctls[] = {
 	[_IOC_NR(VIDIOC_CROPCAP)]          = "VIDIOC_CROPCAP",
 	[_IOC_NR(VIDIOC_G_CROP)]           = "VIDIOC_G_CROP",
 	[_IOC_NR(VIDIOC_S_CROP)]           = "VIDIOC_S_CROP",
+	[_IOC_NR(VIDIOC_G_SELECTION)]      = "VIDIOC_G_SELECTION",
+	[_IOC_NR(VIDIOC_S_SELECTION)]      = "VIDIOC_S_SELECTION",
 	[_IOC_NR(VIDIOC_G_JPEGCOMP)]       = "VIDIOC_G_JPEGCOMP",
 	[_IOC_NR(VIDIOC_S_JPEGCOMP)]       = "VIDIOC_S_JPEGCOMP",
 	[_IOC_NR(VIDIOC_QUERYSTD)]         = "VIDIOC_QUERYSTD",
@@ -1571,6 +1573,38 @@ static long __video_do_ioctl(struct file *file,
 		ret = ops->vidioc_s_crop(file, fh, p);
 		break;
 	}
+	case VIDIOC_G_SELECTION:
+	{
+		struct v4l2_selection *p = arg;
+
+		if (!ops->vidioc_g_selection)
+			break;
+
+		dbgarg(cmd, "type=%s\n", prt_names(p->type, v4l2_type_names));
+
+		ret = ops->vidioc_g_selection(file, fh, p);
+		if (!ret)
+			dbgrect(vfd, "", &p->r);
+		break;
+	}
+	case VIDIOC_S_SELECTION:
+	{
+		struct v4l2_selection *p = arg;
+
+		if (!ops->vidioc_s_selection)
+			break;
+
+		if (ret_prio) {
+			ret = ret_prio;
+			break;
+		}
+
+		dbgarg(cmd, "type=%s\n", prt_names(p->type, v4l2_type_names));
+		dbgrect(vfd, "", &p->r);
+
+		ret = ops->vidioc_s_selection(file, fh, p);
+		break;
+	}
 	case VIDIOC_CROPCAP:
 	{
 		struct v4l2_cropcap *p = arg;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index b2e1331ca76b..012a29604522 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -743,6 +743,48 @@ struct v4l2_crop {
 	struct v4l2_rect        c;
 };
 
+/* Hints for adjustments of selection rectangle */
+#define V4L2_SEL_FLAG_GE	0x00000001
+#define V4L2_SEL_FLAG_LE	0x00000002
+
+/* Selection targets */
+
+/* current cropping area */
+#define V4L2_SEL_TGT_CROP_ACTIVE	0
+/* default cropping area */
+#define V4L2_SEL_TGT_CROP_DEFAULT	1
+/* cropping bounds */
+#define V4L2_SEL_TGT_CROP_BOUNDS	2
+/* current composing area */
+#define V4L2_SEL_TGT_COMPOSE_ACTIVE	256
+/* default composing area */
+#define V4L2_SEL_TGT_COMPOSE_DEFAULT	257
+/* composing bounds */
+#define V4L2_SEL_TGT_COMPOSE_BOUNDS	258
+/* current composing area plus all padding pixels */
+#define V4L2_SEL_TGT_COMPOSE_PADDED	259
+
+/**
+ * struct v4l2_selection - selection info
+ * @type:	buffer type (do not use *_MPLANE types)
+ * @target:	selection target, used to choose one of possible rectangles
+ * @flags:	constraints flags
+ * @r:		coordinates of selection window
+ * @reserved:	for future use, rounds structure size to 64 bytes, set to zero
+ *
+ * Hardware may use multiple helper window to process a video stream.
+ * The structure is used to exchange this selection areas between
+ * an application and a driver.
+ */
+struct v4l2_selection {
+	__u32			type;
+	__u32			target;
+	__u32                   flags;
+	struct v4l2_rect        r;
+	__u32                   reserved[9];
+};
+
+
 /*
  *      A N A L O G   V I D E O   S T A N D A R D
  */
@@ -2259,6 +2301,10 @@ struct v4l2_create_buffers {
 #define VIDIOC_CREATE_BUFS	_IOWR('V', 92, struct v4l2_create_buffers)
 #define VIDIOC_PREPARE_BUF	_IOWR('V', 93, struct v4l2_buffer)
 
+/* Experimental selection API */
+#define VIDIOC_G_SELECTION	_IOWR('V', 94, struct v4l2_selection)
+#define VIDIOC_S_SELECTION	_IOWR('V', 95, struct v4l2_selection)
+
 /* Reminder: when adding new ioctls please add support for them to
    drivers/media/video/v4l2-compat-ioctl32.c as well! */
 
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 4d1c74ad4c84..3f5d60fc5df6 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -196,6 +196,10 @@ struct v4l2_ioctl_ops {
 					struct v4l2_crop *a);
 	int (*vidioc_s_crop)           (struct file *file, void *fh,
 					struct v4l2_crop *a);
+	int (*vidioc_g_selection)      (struct file *file, void *fh,
+					struct v4l2_selection *s);
+	int (*vidioc_s_selection)      (struct file *file, void *fh,
+					struct v4l2_selection *s);
 	/* Compression ioctls */
 	int (*vidioc_g_jpegcomp)       (struct file *file, void *fh,
 					struct v4l2_jpegcompression *a);
-- 
cgit v1.2.3


From e0c2a9aa1e68455dc3439e95d85cabcaff073666 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 9 Jan 2012 17:18:05 -0500
Subject: GFS2: dlm based recovery coordination

This new method of managing recovery is an alternative to
the previous approach of using the userland gfs_controld.

- use dlm slot numbers to assign journal id's
- use dlm recovery callbacks to initiate journal recovery
- use a dlm lock to determine the first node to mount fs
- use a dlm lock to track journals that need recovery

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c             |   2 +-
 fs/gfs2/glock.h             |   7 +-
 fs/gfs2/incore.h            |  58 ++-
 fs/gfs2/lock_dlm.c          | 993 +++++++++++++++++++++++++++++++++++++++++++-
 fs/gfs2/main.c              |  10 +
 fs/gfs2/ops_fstype.c        |  29 +-
 fs/gfs2/recovery.c          |   4 +
 fs/gfs2/sys.c               |  33 +-
 fs/gfs2/sys.h               |   2 +
 include/linux/gfs2_ondisk.h |   2 +
 10 files changed, 1098 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 88e8a23d0026..376816fcd040 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 	spin_lock(&gl->gl_spin);
 	gl->gl_reply = ret;
 
-	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
+	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
 		if (gfs2_should_freeze(gl)) {
 			set_bit(GLF_FROZEN, &gl->gl_flags);
 			spin_unlock(&gl->gl_spin);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 2553b858a72e..307ac31df781 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -121,8 +121,11 @@ enum {
 
 struct lm_lockops {
 	const char *lm_proto_name;
-	int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
- 	void (*lm_unmount) (struct gfs2_sbd *sdp);
+	int (*lm_mount) (struct gfs2_sbd *sdp, const char *table);
+	void (*lm_first_done) (struct gfs2_sbd *sdp);
+	void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
+				    unsigned int result);
+	void (*lm_unmount) (struct gfs2_sbd *sdp);
 	void (*lm_withdraw) (struct gfs2_sbd *sdp);
 	void (*lm_put_lock) (struct gfs2_glock *gl);
 	int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e1d3bb59945c..b9422bc8e2fe 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -139,8 +139,45 @@ struct gfs2_bufdata {
 #define GDLM_STRNAME_BYTES	25
 #define GDLM_LVB_SIZE		32
 
+/*
+ * ls_recover_flags:
+ *
+ * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been
+ * held by failed nodes whose journals need recovery.  Those locks should
+ * only be used for journal recovery until the journal recovery is done.
+ * This is set by the dlm recover_prep callback and cleared by the
+ * gfs2_control thread when journal recovery is complete.  To avoid
+ * races between recover_prep setting and gfs2_control clearing, recover_spin
+ * is held while changing this bit and reading/writing recover_block
+ * and recover_start.
+ *
+ * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used.
+ *
+ * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing
+ * recovery of all journals before allowing other nodes to mount the fs.
+ * This is cleared when FIRST_MOUNT_DONE is set.
+ *
+ * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished
+ * recovery of all journals, and now allows other nodes to mount the fs.
+ *
+ * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared
+ * BLOCK_LOCKS for the first time.  The gfs2_control thread should now
+ * control clearing BLOCK_LOCKS for further recoveries.
+ *
+ * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq.
+ *
+ * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep()
+ * and recover_done(), i.e. set while recover_block == recover_start.
+ */
+
 enum {
 	DFL_BLOCK_LOCKS		= 0,
+	DFL_NO_DLM_OPS		= 1,
+	DFL_FIRST_MOUNT		= 2,
+	DFL_FIRST_MOUNT_DONE	= 3,
+	DFL_MOUNT_DONE		= 4,
+	DFL_UNMOUNT		= 5,
+	DFL_DLM_RECOVERY	= 6,
 };
 
 struct lm_lockname {
@@ -499,14 +536,26 @@ struct gfs2_sb_host {
 struct lm_lockstruct {
 	int ls_jid;
 	unsigned int ls_first;
-	unsigned int ls_first_done;
 	unsigned int ls_nodir;
 	const struct lm_lockops *ls_ops;
-	unsigned long ls_flags;
 	dlm_lockspace_t *ls_dlm;
 
-	int ls_recover_jid_done;
-	int ls_recover_jid_status;
+	int ls_recover_jid_done;   /* These two are deprecated, */
+	int ls_recover_jid_status; /* used previously by gfs_controld */
+
+	struct dlm_lksb ls_mounted_lksb; /* mounted_lock */
+	struct dlm_lksb ls_control_lksb; /* control_lock */
+	char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
+	struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
+
+	spinlock_t ls_recover_spin; /* protects following fields */
+	unsigned long ls_recover_flags; /* DFL_ */
+	uint32_t ls_recover_mount; /* gen in first recover_done cb */
+	uint32_t ls_recover_start; /* gen in last recover_done cb */
+	uint32_t ls_recover_block; /* copy recover_start in last recover_prep */
+	uint32_t ls_recover_size; /* size of recover_submit, recover_result */
+	uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */
+	uint32_t *ls_recover_result; /* result of last jid recovery */
 };
 
 struct gfs2_sbd {
@@ -544,6 +593,7 @@ struct gfs2_sbd {
 	wait_queue_head_t sd_glock_wait;
 	atomic_t sd_glock_disposal;
 	struct completion sd_locking_init;
+	struct delayed_work sd_control_work;
 
 	/* Inode Stuff */
 
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index ce85b62bc0a2..8944d1e32ab5 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2009 Red Hat, Inc.  All rights reserved.
+ * Copyright 2004-2011 Red Hat, Inc.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -11,12 +11,15 @@
 #include <linux/dlm.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
 
 #include "incore.h"
 #include "glock.h"
 #include "util.h"
+#include "sys.h"
 
+extern struct workqueue_struct *gfs2_control_wq;
 
 static void gdlm_ast(void *arg)
 {
@@ -185,34 +188,1002 @@ static void gdlm_cancel(struct gfs2_glock *gl)
 	dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
 }
 
-static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname)
+/*
+ * dlm/gfs2 recovery coordination using dlm_recover callbacks
+ *
+ *  1. dlm_controld sees lockspace members change
+ *  2. dlm_controld blocks dlm-kernel locking activity
+ *  3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep)
+ *  4. dlm_controld starts and finishes its own user level recovery
+ *  5. dlm_controld starts dlm-kernel dlm_recoverd to do kernel recovery
+ *  6. dlm_recoverd notifies gfs2 of failed nodes (recover_slot)
+ *  7. dlm_recoverd does its own lock recovery
+ *  8. dlm_recoverd unblocks dlm-kernel locking activity
+ *  9. dlm_recoverd notifies gfs2 when done (recover_done with new generation)
+ * 10. gfs2_control updates control_lock lvb with new generation and jid bits
+ * 11. gfs2_control enqueues journals for gfs2_recover to recover (maybe none)
+ * 12. gfs2_recover dequeues and recovers journals of failed nodes
+ * 13. gfs2_recover provides recovery results to gfs2_control (recovery_result)
+ * 14. gfs2_control updates control_lock lvb jid bits for recovered journals
+ * 15. gfs2_control unblocks normal locking when all journals are recovered
+ *
+ * - failures during recovery
+ *
+ * recover_prep() may set BLOCK_LOCKS (step 3) again before gfs2_control
+ * clears BLOCK_LOCKS (step 15), e.g. another node fails while still
+ * recovering for a prior failure.  gfs2_control needs a way to detect
+ * this so it can leave BLOCK_LOCKS set in step 15.  This is managed using
+ * the recover_block and recover_start values.
+ *
+ * recover_done() provides a new lockspace generation number each time it
+ * is called (step 9).  This generation number is saved as recover_start.
+ * When recover_prep() is called, it sets BLOCK_LOCKS and sets
+ * recover_block = recover_start.  So, while recover_block is equal to
+ * recover_start, BLOCK_LOCKS should remain set.  (recover_spin must
+ * be held around the BLOCK_LOCKS/recover_block/recover_start logic.)
+ *
+ * - more specific gfs2 steps in sequence above
+ *
+ *  3. recover_prep sets BLOCK_LOCKS and sets recover_block = recover_start
+ *  6. recover_slot records any failed jids (maybe none)
+ *  9. recover_done sets recover_start = new generation number
+ * 10. gfs2_control sets control_lock lvb = new gen + bits for failed jids
+ * 12. gfs2_recover does journal recoveries for failed jids identified above
+ * 14. gfs2_control clears control_lock lvb bits for recovered jids
+ * 15. gfs2_control checks if recover_block == recover_start (step 3 occured
+ *     again) then do nothing, otherwise if recover_start > recover_block
+ *     then clear BLOCK_LOCKS.
+ *
+ * - parallel recovery steps across all nodes
+ *
+ * All nodes attempt to update the control_lock lvb with the new generation
+ * number and jid bits, but only the first to get the control_lock EX will
+ * do so; others will see that it's already done (lvb already contains new
+ * generation number.)
+ *
+ * . All nodes get the same recover_prep/recover_slot/recover_done callbacks
+ * . All nodes attempt to set control_lock lvb gen + bits for the new gen
+ * . One node gets control_lock first and writes the lvb, others see it's done
+ * . All nodes attempt to recover jids for which they see control_lock bits set
+ * . One node succeeds for a jid, and that one clears the jid bit in the lvb
+ * . All nodes will eventually see all lvb bits clear and unblock locks
+ *
+ * - is there a problem with clearing an lvb bit that should be set
+ *   and missing a journal recovery?
+ *
+ * 1. jid fails
+ * 2. lvb bit set for step 1
+ * 3. jid recovered for step 1
+ * 4. jid taken again (new mount)
+ * 5. jid fails (for step 4)
+ * 6. lvb bit set for step 5 (will already be set)
+ * 7. lvb bit cleared for step 3
+ *
+ * This is not a problem because the failure in step 5 does not
+ * require recovery, because the mount in step 4 could not have
+ * progressed far enough to unblock locks and access the fs.  The
+ * control_mount() function waits for all recoveries to be complete
+ * for the latest lockspace generation before ever unblocking locks
+ * and returning.  The mount in step 4 waits until the recovery in
+ * step 1 is done.
+ *
+ * - special case of first mounter: first node to mount the fs
+ *
+ * The first node to mount a gfs2 fs needs to check all the journals
+ * and recover any that need recovery before other nodes are allowed
+ * to mount the fs.  (Others may begin mounting, but they must wait
+ * for the first mounter to be done before taking locks on the fs
+ * or accessing the fs.)  This has two parts:
+ *
+ * 1. The mounted_lock tells a node it's the first to mount the fs.
+ * Each node holds the mounted_lock in PR while it's mounted.
+ * Each node tries to acquire the mounted_lock in EX when it mounts.
+ * If a node is granted the mounted_lock EX it means there are no
+ * other mounted nodes (no PR locks exist), and it is the first mounter.
+ * The mounted_lock is demoted to PR when first recovery is done, so
+ * others will fail to get an EX lock, but will get a PR lock.
+ *
+ * 2. The control_lock blocks others in control_mount() while the first
+ * mounter is doing first mount recovery of all journals.
+ * A mounting node needs to acquire control_lock in EX mode before
+ * it can proceed.  The first mounter holds control_lock in EX while doing
+ * the first mount recovery, blocking mounts from other nodes, then demotes
+ * control_lock to NL when it's done (others_may_mount/first_done),
+ * allowing other nodes to continue mounting.
+ *
+ * first mounter:
+ * control_lock EX/NOQUEUE success
+ * mounted_lock EX/NOQUEUE success (no other PR, so no other mounters)
+ * set first=1
+ * do first mounter recovery
+ * mounted_lock EX->PR
+ * control_lock EX->NL, write lvb generation
+ *
+ * other mounter:
+ * control_lock EX/NOQUEUE success (if fail -EAGAIN, retry)
+ * mounted_lock EX/NOQUEUE fail -EAGAIN (expected due to other mounters PR)
+ * mounted_lock PR/NOQUEUE success
+ * read lvb generation
+ * control_lock EX->NL
+ * set first=0
+ *
+ * - mount during recovery
+ *
+ * If a node mounts while others are doing recovery (not first mounter),
+ * the mounting node will get its initial recover_done() callback without
+ * having seen any previous failures/callbacks.
+ *
+ * It must wait for all recoveries preceding its mount to be finished
+ * before it unblocks locks.  It does this by repeating the "other mounter"
+ * steps above until the lvb generation number is >= its mount generation
+ * number (from initial recover_done) and all lvb bits are clear.
+ *
+ * - control_lock lvb format
+ *
+ * 4 bytes generation number: the latest dlm lockspace generation number
+ * from recover_done callback.  Indicates the jid bitmap has been updated
+ * to reflect all slot failures through that generation.
+ * 4 bytes unused.
+ * GDLM_LVB_SIZE-8 bytes of jid bit map. If bit N is set, it indicates
+ * that jid N needs recovery.
+ */
+
+#define JID_BITMAP_OFFSET 8 /* 4 byte generation number + 4 byte unused */
+
+static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
+			     char *lvb_bits)
+{
+	uint32_t gen;
+	memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
+	memcpy(&gen, lvb_bits, sizeof(uint32_t));
+	*lvb_gen = le32_to_cpu(gen);
+}
+
+static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
+			      char *lvb_bits)
+{
+	uint32_t gen;
+	memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
+	gen = cpu_to_le32(lvb_gen);
+	memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
+}
+
+static int all_jid_bits_clear(char *lvb)
+{
+	int i;
+	for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) {
+		if (lvb[i])
+			return 0;
+	}
+	return 1;
+}
+
+static void sync_wait_cb(void *arg)
+{
+	struct lm_lockstruct *ls = arg;
+	complete(&ls->ls_sync_wait);
+}
+
+static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 	int error;
 
-	if (fsname == NULL) {
-		fs_info(sdp, "no fsname found\n");
-		return -EINVAL;
+	error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
+	if (error) {
+		fs_err(sdp, "%s lkid %x error %d\n",
+		       name, lksb->sb_lkid, error);
+		return error;
+	}
+
+	wait_for_completion(&ls->ls_sync_wait);
+
+	if (lksb->sb_status != -DLM_EUNLOCK) {
+		fs_err(sdp, "%s lkid %x status %d\n",
+		       name, lksb->sb_lkid, lksb->sb_status);
+		return -1;
+	}
+	return 0;
+}
+
+static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
+		     unsigned int num, struct dlm_lksb *lksb, char *name)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char strname[GDLM_STRNAME_BYTES];
+	int error, status;
+
+	memset(strname, 0, GDLM_STRNAME_BYTES);
+	snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
+
+	error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
+			 strname, GDLM_STRNAME_BYTES - 1,
+			 0, sync_wait_cb, ls, NULL);
+	if (error) {
+		fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
+		       name, lksb->sb_lkid, flags, mode, error);
+		return error;
+	}
+
+	wait_for_completion(&ls->ls_sync_wait);
+
+	status = lksb->sb_status;
+
+	if (status && status != -EAGAIN) {
+		fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n",
+		       name, lksb->sb_lkid, flags, mode, status);
+	}
+
+	return status;
+}
+
+static int mounted_unlock(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock");
+}
+
+static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK,
+			 &ls->ls_mounted_lksb, "mounted_lock");
+}
+
+static int control_unlock(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock");
+}
+
+static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK,
+			 &ls->ls_control_lksb, "control_lock");
+}
+
+static void gfs2_control_func(struct work_struct *work)
+{
+	struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char lvb_bits[GDLM_LVB_SIZE];
+	uint32_t block_gen, start_gen, lvb_gen, flags;
+	int recover_set = 0;
+	int write_lvb = 0;
+	int recover_size;
+	int i, error;
+
+	spin_lock(&ls->ls_recover_spin);
+	/*
+	 * No MOUNT_DONE means we're still mounting; control_mount()
+	 * will set this flag, after which this thread will take over
+	 * all further clearing of BLOCK_LOCKS.
+	 *
+	 * FIRST_MOUNT means this node is doing first mounter recovery,
+	 * for which recovery control is handled by
+	 * control_mount()/control_first_done(), not this thread.
+	 */
+	if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
+	     test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+	block_gen = ls->ls_recover_block;
+	start_gen = ls->ls_recover_start;
+	spin_unlock(&ls->ls_recover_spin);
+
+	/*
+	 * Equal block_gen and start_gen implies we are between
+	 * recover_prep and recover_done callbacks, which means
+	 * dlm recovery is in progress and dlm locking is blocked.
+	 * There's no point trying to do any work until recover_done.
+	 */
+
+	if (block_gen == start_gen)
+		return;
+
+	/*
+	 * Propagate recover_submit[] and recover_result[] to lvb:
+	 * dlm_recoverd adds to recover_submit[] jids needing recovery
+	 * gfs2_recover adds to recover_result[] journal recovery results
+	 *
+	 * set lvb bit for jids in recover_submit[] if the lvb has not
+	 * yet been updated for the generation of the failure
+	 *
+	 * clear lvb bit for jids in recover_result[] if the result of
+	 * the journal recovery is SUCCESS
+	 */
+
+	error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
+	if (error) {
+		fs_err(sdp, "control lock EX error %d\n", error);
+		return;
+	}
+
+	control_lvb_read(ls, &lvb_gen, lvb_bits);
+
+	spin_lock(&ls->ls_recover_spin);
+	if (block_gen != ls->ls_recover_block ||
+	    start_gen != ls->ls_recover_start) {
+		fs_info(sdp, "recover generation %u block1 %u %u\n",
+			start_gen, block_gen, ls->ls_recover_block);
+		spin_unlock(&ls->ls_recover_spin);
+		control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
+		return;
+	}
+
+	recover_size = ls->ls_recover_size;
+
+	if (lvb_gen <= start_gen) {
+		/*
+		 * Clear lvb bits for jids we've successfully recovered.
+		 * Because all nodes attempt to recover failed journals,
+		 * a journal can be recovered multiple times successfully
+		 * in succession.  Only the first will really do recovery,
+		 * the others find it clean, but still report a successful
+		 * recovery.  So, another node may have already recovered
+		 * the jid and cleared the lvb bit for it.
+		 */
+		for (i = 0; i < recover_size; i++) {
+			if (ls->ls_recover_result[i] != LM_RD_SUCCESS)
+				continue;
+
+			ls->ls_recover_result[i] = 0;
+
+			if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET))
+				continue;
+
+			__clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+			write_lvb = 1;
+		}
+	}
+
+	if (lvb_gen == start_gen) {
+		/*
+		 * Failed slots before start_gen are already set in lvb.
+		 */
+		for (i = 0; i < recover_size; i++) {
+			if (!ls->ls_recover_submit[i])
+				continue;
+			if (ls->ls_recover_submit[i] < lvb_gen)
+				ls->ls_recover_submit[i] = 0;
+		}
+	} else if (lvb_gen < start_gen) {
+		/*
+		 * Failed slots before start_gen are not yet set in lvb.
+		 */
+		for (i = 0; i < recover_size; i++) {
+			if (!ls->ls_recover_submit[i])
+				continue;
+			if (ls->ls_recover_submit[i] < start_gen) {
+				ls->ls_recover_submit[i] = 0;
+				__set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+			}
+		}
+		/* even if there are no bits to set, we need to write the
+		   latest generation to the lvb */
+		write_lvb = 1;
+	} else {
+		/*
+		 * we should be getting a recover_done() for lvb_gen soon
+		 */
+	}
+	spin_unlock(&ls->ls_recover_spin);
+
+	if (write_lvb) {
+		control_lvb_write(ls, start_gen, lvb_bits);
+		flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
+	} else {
+		flags = DLM_LKF_CONVERT;
+	}
+
+	error = control_lock(sdp, DLM_LOCK_NL, flags);
+	if (error) {
+		fs_err(sdp, "control lock NL error %d\n", error);
+		return;
+	}
+
+	/*
+	 * Everyone will see jid bits set in the lvb, run gfs2_recover_set(),
+	 * and clear a jid bit in the lvb if the recovery is a success.
+	 * Eventually all journals will be recovered, all jid bits will
+	 * be cleared in the lvb, and everyone will clear BLOCK_LOCKS.
+	 */
+
+	for (i = 0; i < recover_size; i++) {
+		if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) {
+			fs_info(sdp, "recover generation %u jid %d\n",
+				start_gen, i);
+			gfs2_recover_set(sdp, i);
+			recover_set++;
+		}
+	}
+	if (recover_set)
+		return;
+
+	/*
+	 * No more jid bits set in lvb, all recovery is done, unblock locks
+	 * (unless a new recover_prep callback has occured blocking locks
+	 * again while working above)
+	 */
+
+	spin_lock(&ls->ls_recover_spin);
+	if (ls->ls_recover_block == block_gen &&
+	    ls->ls_recover_start == start_gen) {
+		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		fs_info(sdp, "recover generation %u done\n", start_gen);
+		gfs2_glock_thaw(sdp);
+	} else {
+		fs_info(sdp, "recover generation %u block2 %u %u\n",
+			start_gen, block_gen, ls->ls_recover_block);
+		spin_unlock(&ls->ls_recover_spin);
+	}
+}
+
+static int control_mount(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char lvb_bits[GDLM_LVB_SIZE];
+	uint32_t start_gen, block_gen, mount_gen, lvb_gen;
+	int mounted_mode;
+	int retries = 0;
+	int error;
+
+	memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb));
+	memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb));
+	memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE);
+	ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb;
+	init_completion(&ls->ls_sync_wait);
+
+	set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+
+	error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK);
+	if (error) {
+		fs_err(sdp, "control_mount control_lock NL error %d\n", error);
+		return error;
+	}
+
+	error = mounted_lock(sdp, DLM_LOCK_NL, 0);
+	if (error) {
+		fs_err(sdp, "control_mount mounted_lock NL error %d\n", error);
+		control_unlock(sdp);
+		return error;
+	}
+	mounted_mode = DLM_LOCK_NL;
+
+restart:
+	if (retries++ && signal_pending(current)) {
+		error = -EINTR;
+		goto fail;
+	}
+
+	/*
+	 * We always start with both locks in NL. control_lock is
+	 * demoted to NL below so we don't need to do it here.
+	 */
+
+	if (mounted_mode != DLM_LOCK_NL) {
+		error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
+		if (error)
+			goto fail;
+		mounted_mode = DLM_LOCK_NL;
+	}
+
+	/*
+	 * Other nodes need to do some work in dlm recovery and gfs2_control
+	 * before the recover_done and control_lock will be ready for us below.
+	 * A delay here is not required but often avoids having to retry.
+	 */
+
+	msleep_interruptible(500);
+
+	/*
+	 * Acquire control_lock in EX and mounted_lock in either EX or PR.
+	 * control_lock lvb keeps track of any pending journal recoveries.
+	 * mounted_lock indicates if any other nodes have the fs mounted.
+	 */
+
+	error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK);
+	if (error == -EAGAIN) {
+		goto restart;
+	} else if (error) {
+		fs_err(sdp, "control_mount control_lock EX error %d\n", error);
+		goto fail;
+	}
+
+	error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
+	if (!error) {
+		mounted_mode = DLM_LOCK_EX;
+		goto locks_done;
+	} else if (error != -EAGAIN) {
+		fs_err(sdp, "control_mount mounted_lock EX error %d\n", error);
+		goto fail;
+	}
+
+	error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
+	if (!error) {
+		mounted_mode = DLM_LOCK_PR;
+		goto locks_done;
+	} else {
+		/* not even -EAGAIN should happen here */
+		fs_err(sdp, "control_mount mounted_lock PR error %d\n", error);
+		goto fail;
+	}
+
+locks_done:
+	/*
+	 * If we got both locks above in EX, then we're the first mounter.
+	 * If not, then we need to wait for the control_lock lvb to be
+	 * updated by other mounted nodes to reflect our mount generation.
+	 *
+	 * In simple first mounter cases, first mounter will see zero lvb_gen,
+	 * but in cases where all existing nodes leave/fail before mounting
+	 * nodes finish control_mount, then all nodes will be mounting and
+	 * lvb_gen will be non-zero.
+	 */
+
+	control_lvb_read(ls, &lvb_gen, lvb_bits);
+
+	if (lvb_gen == 0xFFFFFFFF) {
+		/* special value to force mount attempts to fail */
+		fs_err(sdp, "control_mount control_lock disabled\n");
+		error = -EINVAL;
+		goto fail;
+	}
+
+	if (mounted_mode == DLM_LOCK_EX) {
+		/* first mounter, keep both EX while doing first recovery */
+		spin_lock(&ls->ls_recover_spin);
+		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+		set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
+		set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		fs_info(sdp, "first mounter control generation %u\n", lvb_gen);
+		return 0;
+	}
+
+	error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
+	if (error)
+		goto fail;
+
+	/*
+	 * We are not first mounter, now we need to wait for the control_lock
+	 * lvb generation to be >= the generation from our first recover_done
+	 * and all lvb bits to be clear (no pending journal recoveries.)
+	 */
+
+	if (!all_jid_bits_clear(lvb_bits)) {
+		/* journals need recovery, wait until all are clear */
+		fs_info(sdp, "control_mount wait for journal recovery\n");
+		goto restart;
+	}
+
+	spin_lock(&ls->ls_recover_spin);
+	block_gen = ls->ls_recover_block;
+	start_gen = ls->ls_recover_start;
+	mount_gen = ls->ls_recover_mount;
+
+	if (lvb_gen < mount_gen) {
+		/* wait for mounted nodes to update control_lock lvb to our
+		   generation, which might include new recovery bits set */
+		fs_info(sdp, "control_mount wait1 block %u start %u mount %u "
+			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
+			lvb_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		goto restart;
+	}
+
+	if (lvb_gen != start_gen) {
+		/* wait for mounted nodes to update control_lock lvb to the
+		   latest recovery generation */
+		fs_info(sdp, "control_mount wait2 block %u start %u mount %u "
+			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
+			lvb_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		goto restart;
+	}
+
+	if (block_gen == start_gen) {
+		/* dlm recovery in progress, wait for it to finish */
+		fs_info(sdp, "control_mount wait3 block %u start %u mount %u "
+			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
+			lvb_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		goto restart;
 	}
 
-	error = dlm_new_lockspace(fsname, NULL, 
-				  DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
-				  (ls->ls_nodir ? DLM_LSFL_NODIR : 0),
-				  GDLM_LVB_SIZE, NULL, NULL, NULL, &ls->ls_dlm);
+	clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+	set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
+	memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
+	memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
+	spin_unlock(&ls->ls_recover_spin);
+	return 0;
+
+fail:
+	mounted_unlock(sdp);
+	control_unlock(sdp);
+	return error;
+}
+
+static int dlm_recovery_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
+static int control_first_done(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char lvb_bits[GDLM_LVB_SIZE];
+	uint32_t start_gen, block_gen;
+	int error;
+
+restart:
+	spin_lock(&ls->ls_recover_spin);
+	start_gen = ls->ls_recover_start;
+	block_gen = ls->ls_recover_block;
+
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) ||
+	    !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
+	    !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		/* sanity check, should not happen */
+		fs_err(sdp, "control_first_done start %u block %u flags %lx\n",
+		       start_gen, block_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		control_unlock(sdp);
+		return -1;
+	}
+
+	if (start_gen == block_gen) {
+		/*
+		 * Wait for the end of a dlm recovery cycle to switch from
+		 * first mounter recovery.  We can ignore any recover_slot
+		 * callbacks between the recover_prep and next recover_done
+		 * because we are still the first mounter and any failed nodes
+		 * have not fully mounted, so they don't need recovery.
+		 */
+		spin_unlock(&ls->ls_recover_spin);
+		fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
+
+		wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
+			    dlm_recovery_wait, TASK_UNINTERRUPTIBLE);
+		goto restart;
+	}
+
+	clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
+	set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags);
+	memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
+	memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
+	spin_unlock(&ls->ls_recover_spin);
+
+	memset(lvb_bits, 0, sizeof(lvb_bits));
+	control_lvb_write(ls, start_gen, lvb_bits);
+
+	error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT);
+	if (error)
+		fs_err(sdp, "control_first_done mounted PR error %d\n", error);
+
+	error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
 	if (error)
-		printk(KERN_ERR "dlm_new_lockspace error %d", error);
+		fs_err(sdp, "control_first_done control NL error %d\n", error);
 
 	return error;
 }
 
+/*
+ * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC)
+ * to accomodate the largest slot number.  (NB dlm slot numbers start at 1,
+ * gfs2 jids start at 0, so jid = slot - 1)
+ */
+
+#define RECOVER_SIZE_INC 16
+
+static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
+			    int num_slots)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	uint32_t *submit = NULL;
+	uint32_t *result = NULL;
+	uint32_t old_size, new_size;
+	int i, max_jid;
+
+	max_jid = 0;
+	for (i = 0; i < num_slots; i++) {
+		if (max_jid < slots[i].slot - 1)
+			max_jid = slots[i].slot - 1;
+	}
+
+	old_size = ls->ls_recover_size;
+
+	if (old_size >= max_jid + 1)
+		return 0;
+
+	new_size = old_size + RECOVER_SIZE_INC;
+
+	submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+	result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+	if (!submit || !result) {
+		kfree(submit);
+		kfree(result);
+		return -ENOMEM;
+	}
+
+	spin_lock(&ls->ls_recover_spin);
+	memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t));
+	memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t));
+	kfree(ls->ls_recover_submit);
+	kfree(ls->ls_recover_result);
+	ls->ls_recover_submit = submit;
+	ls->ls_recover_result = result;
+	ls->ls_recover_size = new_size;
+	spin_unlock(&ls->ls_recover_spin);
+	return 0;
+}
+
+static void free_recover_size(struct lm_lockstruct *ls)
+{
+	kfree(ls->ls_recover_submit);
+	kfree(ls->ls_recover_result);
+	ls->ls_recover_submit = NULL;
+	ls->ls_recover_result = NULL;
+	ls->ls_recover_size = 0;
+}
+
+/* dlm calls before it does lock recovery */
+
+static void gdlm_recover_prep(void *arg)
+{
+	struct gfs2_sbd *sdp = arg;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	spin_lock(&ls->ls_recover_spin);
+	ls->ls_recover_block = ls->ls_recover_start;
+	set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
+
+	if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
+	     test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+	set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+/* dlm calls after recover_prep has been completed on all lockspace members;
+   identifies slot/jid of failed member */
+
+static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
+{
+	struct gfs2_sbd *sdp = arg;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	int jid = slot->slot - 1;
+
+	spin_lock(&ls->ls_recover_spin);
+	if (ls->ls_recover_size < jid + 1) {
+		fs_err(sdp, "recover_slot jid %d gen %u short size %d",
+		       jid, ls->ls_recover_block, ls->ls_recover_size);
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+
+	if (ls->ls_recover_submit[jid]) {
+		fs_info(sdp, "recover_slot jid %d gen %u prev %u",
+			jid, ls->ls_recover_block, ls->ls_recover_submit[jid]);
+	}
+	ls->ls_recover_submit[jid] = ls->ls_recover_block;
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+/* dlm calls after recover_slot and after it completes lock recovery */
+
+static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
+			      int our_slot, uint32_t generation)
+{
+	struct gfs2_sbd *sdp = arg;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	/* ensure the ls jid arrays are large enough */
+	set_recover_size(sdp, slots, num_slots);
+
+	spin_lock(&ls->ls_recover_spin);
+	ls->ls_recover_start = generation;
+
+	if (!ls->ls_recover_mount) {
+		ls->ls_recover_mount = generation;
+		ls->ls_jid = our_slot - 1;
+	}
+
+	if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
+		queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
+
+	clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY);
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+/* gfs2_recover thread has a journal recovery result */
+
+static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
+				 unsigned int result)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
+		return;
+
+	/* don't care about the recovery of own journal during mount */
+	if (jid == ls->ls_jid)
+		return;
+
+	spin_lock(&ls->ls_recover_spin);
+	if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+	if (ls->ls_recover_size < jid + 1) {
+		fs_err(sdp, "recovery_result jid %d short size %d",
+		       jid, ls->ls_recover_size);
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+
+	fs_info(sdp, "recover jid %d result %s\n", jid,
+		result == LM_RD_GAVEUP ? "busy" : "success");
+
+	ls->ls_recover_result[jid] = result;
+
+	/* GAVEUP means another node is recovering the journal; delay our
+	   next attempt to recover it, to give the other node a chance to
+	   finish before trying again */
+
+	if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
+		queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work,
+				   result == LM_RD_GAVEUP ? HZ : 0);
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+const struct dlm_lockspace_ops gdlm_lockspace_ops = {
+	.recover_prep = gdlm_recover_prep,
+	.recover_slot = gdlm_recover_slot,
+	.recover_done = gdlm_recover_done,
+};
+
+static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char cluster[GFS2_LOCKNAME_LEN];
+	const char *fsname;
+	uint32_t flags;
+	int error, ops_result;
+
+	/*
+	 * initialize everything
+	 */
+
+	INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
+	spin_lock_init(&ls->ls_recover_spin);
+	ls->ls_recover_flags = 0;
+	ls->ls_recover_mount = 0;
+	ls->ls_recover_start = 0;
+	ls->ls_recover_block = 0;
+	ls->ls_recover_size = 0;
+	ls->ls_recover_submit = NULL;
+	ls->ls_recover_result = NULL;
+
+	error = set_recover_size(sdp, NULL, 0);
+	if (error)
+		goto fail;
+
+	/*
+	 * prepare dlm_new_lockspace args
+	 */
+
+	fsname = strchr(table, ':');
+	if (!fsname) {
+		fs_info(sdp, "no fsname found\n");
+		error = -EINVAL;
+		goto fail_free;
+	}
+	memset(cluster, 0, sizeof(cluster));
+	memcpy(cluster, table, strlen(table) - strlen(fsname));
+	fsname++;
+
+	flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
+	if (ls->ls_nodir)
+		flags |= DLM_LSFL_NODIR;
+
+	/*
+	 * create/join lockspace
+	 */
+
+	error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
+				  &gdlm_lockspace_ops, sdp, &ops_result,
+				  &ls->ls_dlm);
+	if (error) {
+		fs_err(sdp, "dlm_new_lockspace error %d\n", error);
+		goto fail_free;
+	}
+
+	if (ops_result < 0) {
+		/*
+		 * dlm does not support ops callbacks,
+		 * old dlm_controld/gfs_controld are used, try without ops.
+		 */
+		fs_info(sdp, "dlm lockspace ops not used\n");
+		free_recover_size(ls);
+		set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags);
+		return 0;
+	}
+
+	if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) {
+		fs_err(sdp, "dlm lockspace ops disallow jid preset\n");
+		error = -EINVAL;
+		goto fail_release;
+	}
+
+	/*
+	 * control_mount() uses control_lock to determine first mounter,
+	 * and for later mounts, waits for any recoveries to be cleared.
+	 */
+
+	error = control_mount(sdp);
+	if (error) {
+		fs_err(sdp, "mount control error %d\n", error);
+		goto fail_release;
+	}
+
+	ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
+	clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
+	return 0;
+
+fail_release:
+	dlm_release_lockspace(ls->ls_dlm, 2);
+fail_free:
+	free_recover_size(ls);
+fail:
+	return error;
+}
+
+static void gdlm_first_done(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	int error;
+
+	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
+		return;
+
+	error = control_first_done(sdp);
+	if (error)
+		fs_err(sdp, "mount first_done error %d\n", error);
+}
+
 static void gdlm_unmount(struct gfs2_sbd *sdp)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 
+	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
+		goto release;
+
+	/* wait for gfs2_control_wq to be done with this mount */
+
+	spin_lock(&ls->ls_recover_spin);
+	set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
+	spin_unlock(&ls->ls_recover_spin);
+	flush_delayed_work_sync(&sdp->sd_control_work);
+
+	/* mounted_lock and control_lock will be purged in dlm recovery */
+release:
 	if (ls->ls_dlm) {
 		dlm_release_lockspace(ls->ls_dlm, 2);
 		ls->ls_dlm = NULL;
 	}
+
+	free_recover_size(ls);
 }
 
 static const match_table_t dlm_tokens = {
@@ -226,6 +1197,8 @@ static const match_table_t dlm_tokens = {
 const struct lm_lockops gfs2_dlm_ops = {
 	.lm_proto_name = "lock_dlm",
 	.lm_mount = gdlm_mount,
+	.lm_first_done = gdlm_first_done,
+	.lm_recovery_result = gdlm_recovery_result,
 	.lm_unmount = gdlm_unmount,
 	.lm_put_lock = gdlm_put_lock,
 	.lm_lock = gdlm_lock,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index c150298e2d8e..a8d9bcd0e19c 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -28,6 +28,8 @@
 #include "recovery.h"
 #include "dir.h"
 
+struct workqueue_struct *gfs2_control_wq;
+
 static struct shrinker qd_shrinker = {
 	.shrink = gfs2_shrink_qd_memory,
 	.seeks = DEFAULT_SEEKS,
@@ -146,12 +148,19 @@ static int __init init_gfs2_fs(void)
 	if (!gfs_recovery_wq)
 		goto fail_wq;
 
+	gfs2_control_wq = alloc_workqueue("gfs2_control",
+			       WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
+	if (!gfs2_control_wq)
+		goto fail_control;
+
 	gfs2_register_debugfs();
 
 	printk("GFS2 installed\n");
 
 	return 0;
 
+fail_control:
+	destroy_workqueue(gfs_recovery_wq);
 fail_wq:
 	unregister_filesystem(&gfs2meta_fs_type);
 fail_unregister:
@@ -195,6 +204,7 @@ static void __exit exit_gfs2_fs(void)
 	unregister_filesystem(&gfs2_fs_type);
 	unregister_filesystem(&gfs2meta_fs_type);
 	destroy_workqueue(gfs_recovery_wq);
+	destroy_workqueue(gfs2_control_wq);
 
 	rcu_barrier();
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index fe72e79e6ff9..b01573b7ad96 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
 {
 	char *message = "FIRSTMOUNT=Done";
 	char *envp[] = { message, NULL };
-	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	ls->ls_first_done = 1;
+
+	fs_info(sdp, "first mount done, others may mount\n");
+
+	if (sdp->sd_lockstruct.ls_ops->lm_first_done)
+		sdp->sd_lockstruct.ls_ops->lm_first_done(sdp);
+
 	kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 }
 
@@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
 	struct gfs2_args *args = &sdp->sd_args;
 	const char *proto = sdp->sd_proto_name;
 	const char *table = sdp->sd_table_name;
-	const char *fsname;
 	char *o, *options;
 	int ret;
 
@@ -1004,21 +1007,12 @@ hostdata_error:
 		}
 	}
 
-	if (sdp->sd_args.ar_spectator)
-		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
-	else
-		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
-			 sdp->sd_lockstruct.ls_jid);
-
-	fsname = strchr(table, ':');
-	if (fsname)
-		fsname++;
 	if (lm->lm_mount == NULL) {
 		fs_info(sdp, "Now mounting FS...\n");
 		complete_all(&sdp->sd_locking_init);
 		return 0;
 	}
-	ret = lm->lm_mount(sdp, fsname);
+	ret = lm->lm_mount(sdp, table);
 	if (ret == 0)
 		fs_info(sdp, "Joined cluster. Now mounting FS...\n");
 	complete_all(&sdp->sd_locking_init);
@@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 	if (error)
 		goto fail;
 
+	snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
+
 	gfs2_create_debugfs_file(sdp);
 
 	error = gfs2_sys_fs_add(sdp);
@@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 		goto fail_sb;
 	}
 
+	if (sdp->sd_args.ar_spectator)
+		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
+			 sdp->sd_table_name);
+	else
+		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
+			 sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
+
 	error = init_inodes(sdp, DO);
 	if (error)
 		goto fail_sb;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index f2a02edcac8f..af49e8f432fe 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
 	char env_status[20];
 	char *envp[] = { env_jid, env_status, NULL };
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
         ls->ls_recover_jid_done = jid;
         ls->ls_recover_jid_status = message;
 	sprintf(env_jid, "JID=%d", jid);
 	sprintf(env_status, "RECOVERY=%s",
 		message == LM_RD_SUCCESS ? "Done" : "Failed");
         kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
+
+	if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
+		sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
 }
 
 void gfs2_recover_func(struct work_struct *work)
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 443cabcfcd23..d33172c291ba 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
 	ssize_t ret;
 	int val = 0;
 
-	if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
 		val = 1;
 	ret = sprintf(buf, "%d\n", val);
 	return ret;
@@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 	val = simple_strtol(buf, NULL, 0);
 
 	if (val == 1)
-		set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+		set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
 	else if (val == 0) {
-		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
 		smp_mb__after_clear_bit();
 		gfs2_glock_thaw(sdp);
 	} else {
@@ -350,8 +350,8 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 		goto out;
 	if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
 		goto out;
-        sdp->sd_lockstruct.ls_first = first;
-        rv = 0;
+	sdp->sd_lockstruct.ls_first = first;
+	rv = 0;
 out:
         spin_unlock(&sdp->sd_jindex_spin);
         return rv ? rv : len;
@@ -360,19 +360,14 @@ out:
 static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	return sprintf(buf, "%d\n", ls->ls_first_done);
+	return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
 }
 
-static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
 {
-	unsigned jid;
 	struct gfs2_jdesc *jd;
 	int rv;
 
-	rv = sscanf(buf, "%u", &jid);
-	if (rv != 1)
-		return -EINVAL;
-
 	rv = -ESHUTDOWN;
 	spin_lock(&sdp->sd_jindex_spin);
 	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
@@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 	}
 out:
 	spin_unlock(&sdp->sd_jindex_spin);
+	return rv;
+}
+
+static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+	unsigned jid;
+	int rv;
+
+	rv = sscanf(buf, "%u", &jid);
+	if (rv != 1)
+		return -EINVAL;
+
+	rv = gfs2_recover_set(sdp, jid);
+
 	return rv ? rv : len;
 }
 
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
index e94560e836d7..79182d6ad6ac 100644
--- a/fs/gfs2/sys.h
+++ b/fs/gfs2/sys.h
@@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
 int gfs2_sys_init(void);
 void gfs2_sys_uninit(void);
 
+int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid);
+
 #endif /* __SYS_DOT_H__ */
 
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index 4f4462974c14..b148087f49a6 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -22,6 +22,8 @@
 #define GFS2_LIVE_LOCK		1
 #define GFS2_TRANS_LOCK		2
 #define GFS2_RENAME_LOCK	3
+#define GFS2_CONTROL_LOCK	4
+#define GFS2_MOUNTED_LOCK	5
 
 /* Format numbers for various metadata types */
 
-- 
cgit v1.2.3


From b1bd055d397e09f99dcef9b138ed104ff1812fcb Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 11 Jan 2012 16:27:11 +0100
Subject: block: Introduce blk_set_stacking_limits function

Stacking driver queue limits are typically bounded exclusively by the
capabilities of the low level devices, not by the stacking driver
itself.

This patch introduces blk_set_stacking_limits() which has more liberal
metrics than the default queue limits function. This allows us to
inherit topology parameters from bottom devices without manually
tweaking the default limits in each driver prior to calling the stacking
function.

Since there is now a clear distinction between stacking and low-level
devices, blk_set_default_limits() has been modified to carry the more
conservative values that we used to manually set in
blk_queue_make_request().

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c   | 32 ++++++++++++++++++++++++--------
 drivers/md/dm-table.c  |  6 +++---
 drivers/md/md.c        |  1 +
 include/linux/blkdev.h |  1 +
 4 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index fa1eb0449a05..d3234fc494ad 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -104,9 +104,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
  * @lim:  the queue_limits structure to reset
  *
  * Description:
- *   Returns a queue_limit struct to its default state.  Can be used by
- *   stacking drivers like DM that stage table swaps and reuse an
- *   existing device queue.
+ *   Returns a queue_limit struct to its default state.
  */
 void blk_set_default_limits(struct queue_limits *lim)
 {
@@ -114,13 +112,12 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_integrity_segments = 0;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-	lim->max_sectors = BLK_DEF_MAX_SECTORS;
-	lim->max_hw_sectors = INT_MAX;
+	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
 	lim->max_discard_sectors = 0;
 	lim->discard_granularity = 0;
 	lim->discard_alignment = 0;
 	lim->discard_misaligned = 0;
-	lim->discard_zeroes_data = 1;
+	lim->discard_zeroes_data = 0;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -130,6 +127,27 @@ void blk_set_default_limits(struct queue_limits *lim)
 }
 EXPORT_SYMBOL(blk_set_default_limits);
 
+/**
+ * blk_set_stacking_limits - set default limits for stacking devices
+ * @lim:  the queue_limits structure to reset
+ *
+ * Description:
+ *   Returns a queue_limit struct to its default state. Should be used
+ *   by stacking drivers like DM that have no internal limits.
+ */
+void blk_set_stacking_limits(struct queue_limits *lim)
+{
+	blk_set_default_limits(lim);
+
+	/* Inherit limits from component devices */
+	lim->discard_zeroes_data = 1;
+	lim->max_segments = USHRT_MAX;
+	lim->max_hw_sectors = UINT_MAX;
+
+	lim->max_sectors = BLK_DEF_MAX_SECTORS;
+}
+EXPORT_SYMBOL(blk_set_stacking_limits);
+
 /**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
@@ -165,8 +183,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->nr_batching = BLK_BATCH_REQ;
 
 	blk_set_default_limits(&q->limits);
-	blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
-	q->limits.discard_zeroes_data = 0;
 
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 8e9132130142..63cc54289aff 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -699,7 +699,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
 	while (i < dm_table_get_num_targets(table)) {
 		ti = dm_table_get_target(table, i++);
 
-		blk_set_default_limits(&ti_limits);
+		blk_set_stacking_limits(&ti_limits);
 
 		/* combine all target devices' limits */
 		if (ti->type->iterate_devices)
@@ -1221,10 +1221,10 @@ int dm_calculate_queue_limits(struct dm_table *table,
 	struct queue_limits ti_limits;
 	unsigned i = 0;
 
-	blk_set_default_limits(limits);
+	blk_set_stacking_limits(limits);
 
 	while (i < dm_table_get_num_targets(table)) {
-		blk_set_default_limits(&ti_limits);
+		blk_set_stacking_limits(&ti_limits);
 
 		ti = dm_table_get_target(table, i++);
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ee981737edfc..114ba155af87 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4622,6 +4622,7 @@ static int md_alloc(dev_t dev, char *name)
 	mddev->queue->queuedata = mddev;
 
 	blk_queue_make_request(mddev->queue, md_make_request);
+	blk_set_stacking_limits(&mddev->queue->limits);
 
 	disk = alloc_disk(1 << shift);
 	if (!disk) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8bca04873f53..adc34133a56a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -844,6 +844,7 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
+extern void blk_set_stacking_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
 extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
-- 
cgit v1.2.3


From ef00f59c95fe6e002e7c6e3663cdea65e253f4cc Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 11 Jan 2012 16:29:31 +0100
Subject: block: Add BLKROTATIONAL ioctl

Introduce an ioctl which permits applications to query whether a block
device is rotational.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/compat_ioctl.c | 3 +++
 block/ioctl.c        | 2 ++
 include/linux/fs.h   | 1 +
 3 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7b725020823c..7c668c8a6f95 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -719,6 +719,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case BLKSECTGET:
 		return compat_put_ushort(arg,
 					 queue_max_sectors(bdev_get_queue(bdev)));
+	case BLKROTATIONAL:
+		return compat_put_ushort(arg,
+					 !blk_queue_nonrot(bdev_get_queue(bdev)));
 	case BLKRASET: /* compatible, but no compat_ptr (!) */
 	case BLKFRASET:
 		if (!capable(CAP_SYS_ADMIN))
diff --git a/block/ioctl.c b/block/ioctl.c
index ca939fc1030f..337d207ab14d 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -278,6 +278,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		return put_uint(arg, bdev_discard_zeroes_data(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
+	case BLKROTATIONAL:
+		return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
 	case BLKRASET:
 	case BLKFRASET:
 		if(!capable(CAP_SYS_ADMIN))
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0bc4ffb8e7f..95dd911506f1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -319,6 +319,7 @@ struct inodes_stat_t {
 #define BLKPBSZGET _IO(0x12,123)
 #define BLKDISCARDZEROES _IO(0x12,124)
 #define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
-- 
cgit v1.2.3


From c8991362a0d3cf317dfbfb6cb946607870654e6d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 10 Jan 2012 22:36:35 +0000
Subject: inet_diag: Rename inet_diag_req into inet_diag_req_v2

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 12 ++++++------
 net/dccp/diag.c           |  4 ++--
 net/ipv4/inet_diag.c      | 34 +++++++++++++++++-----------------
 net/ipv4/tcp_diag.c       |  4 ++--
 net/ipv4/udp_diag.c       | 14 +++++++-------
 5 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 34e8d52c1925..a5b7e910eea9 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -34,7 +34,7 @@ struct inet_diag_req_compat {
 	__u32	idiag_dbs;		/* Tables to dump (NI) */
 };
 
-struct inet_diag_req {
+struct inet_diag_req_v2 {
 	__u8	sdiag_family;
 	__u8	sdiag_protocol;
 	__u8	idiag_ext;
@@ -143,12 +143,12 @@ struct netlink_callback;
 struct inet_diag_handler {
 	void			(*dump)(struct sk_buff *skb,
 					struct netlink_callback *cb,
-					struct inet_diag_req *r,
+					struct inet_diag_req_v2 *r,
 					struct nlattr *bc);
 
 	int			(*dump_one)(struct sk_buff *in_skb,
 					const struct nlmsghdr *nlh,
-					struct inet_diag_req *req);
+					struct inet_diag_req_v2 *req);
 
 	void			(*idiag_get_info)(struct sock *sk,
 						  struct inet_diag_msg *r,
@@ -158,15 +158,15 @@ struct inet_diag_handler {
 
 struct inet_connection_sock;
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
-			      struct sk_buff *skb, struct inet_diag_req *req,
+			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh);
 void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req *r,
+		struct netlink_callback *cb, struct inet_diag_req_v2 *r,
 		struct nlattr *bc);
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 		struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req);
+		struct inet_diag_req_v2 *req);
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 8f1625753377..028fc43aacbd 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -49,13 +49,13 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 }
 
 static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc);
 }
 
 static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req);
 }
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2240a8e8c44d..cf23a7cacdd4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -71,7 +71,7 @@ static inline void inet_diag_unlock_handler(
 }
 
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
-			      struct sk_buff *skb, struct inet_diag_req *req,
+			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
@@ -193,7 +193,7 @@ nlmsg_failure:
 EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
 
 static int inet_csk_diag_fill(struct sock *sk,
-			      struct sk_buff *skb, struct inet_diag_req *req,
+			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
@@ -202,7 +202,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 }
 
 static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
-			       struct sk_buff *skb, struct inet_diag_req *req,
+			       struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			       u32 pid, u32 seq, u16 nlmsg_flags,
 			       const struct nlmsghdr *unlh)
 {
@@ -253,7 +253,7 @@ nlmsg_failure:
 }
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
-			struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags,
+			struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags,
 			const struct nlmsghdr *unlh)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
@@ -264,7 +264,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 }
 
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
-		const struct nlmsghdr *nlh, struct inet_diag_req *req)
+		const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req)
 {
 	int err;
 	struct sock *sk;
@@ -333,7 +333,7 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
 static int inet_diag_get_exact(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
-			       struct inet_diag_req *req)
+			       struct inet_diag_req_v2 *req)
 {
 	const struct inet_diag_handler *handler;
 	int err;
@@ -540,7 +540,7 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 static int inet_csk_diag_dump(struct sock *sk,
 			      struct sk_buff *skb,
 			      struct netlink_callback *cb,
-			      struct inet_diag_req *r,
+			      struct inet_diag_req_v2 *r,
 			      const struct nlattr *bc)
 {
 	if (!inet_diag_bc_sk(bc, sk))
@@ -554,7 +554,7 @@ static int inet_csk_diag_dump(struct sock *sk,
 static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 			       struct sk_buff *skb,
 			       struct netlink_callback *cb,
-			       struct inet_diag_req *r,
+			       struct inet_diag_req_v2 *r,
 			       const struct nlattr *bc)
 {
 	if (bc != NULL) {
@@ -639,7 +639,7 @@ nlmsg_failure:
 
 static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 			       struct netlink_callback *cb,
-			       struct inet_diag_req *r,
+			       struct inet_diag_req_v2 *r,
 			       const struct nlattr *bc)
 {
 	struct inet_diag_entry entry;
@@ -721,7 +721,7 @@ out:
 }
 
 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc)
+		struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	int i, num;
 	int s_i, s_num;
@@ -872,7 +872,7 @@ out:
 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
 
 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	const struct inet_diag_handler *handler;
 
@@ -887,12 +887,12 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nlattr *bc = NULL;
-	int hdrlen = sizeof(struct inet_diag_req);
+	int hdrlen = sizeof(struct inet_diag_req_v2);
 
 	if (nlmsg_attrlen(cb->nlh, hdrlen))
 		bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
 
-	return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc);
+	return __inet_diag_dump(skb, cb, (struct inet_diag_req_v2 *)NLMSG_DATA(cb->nlh), bc);
 }
 
 static inline int inet_diag_type2proto(int type)
@@ -910,7 +910,7 @@ static inline int inet_diag_type2proto(int type)
 static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh);
-	struct inet_diag_req req;
+	struct inet_diag_req_v2 req;
 	struct nlattr *bc = NULL;
 	int hdrlen = sizeof(struct inet_diag_req_compat);
 
@@ -930,7 +930,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh)
 {
 	struct inet_diag_req_compat *rc = NLMSG_DATA(nlh);
-	struct inet_diag_req req;
+	struct inet_diag_req_v2 req;
 
 	req.sdiag_family = rc->idiag_family;
 	req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
@@ -970,7 +970,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 {
-	int hdrlen = sizeof(struct inet_diag_req);
+	int hdrlen = sizeof(struct inet_diag_req_v2);
 
 	if (nlmsg_len(h) < hdrlen)
 		return -EINVAL;
@@ -990,7 +990,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 					  inet_diag_dump, NULL, 0);
 	}
 
-	return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h));
+	return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h));
 }
 
 static struct sock_diag_handler inet_diag_handler = {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 8cd357a8be79..ed3f2ad42e0f 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -35,13 +35,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 }
 
 static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
 }
 
 static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 69f8a7ca63dd..e5e18cb8a586 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -19,7 +19,7 @@
 #include <linux/sock_diag.h>
 
 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req *req,
+		struct netlink_callback *cb, struct inet_diag_req_v2 *req,
 		struct nlattr *bc)
 {
 	if (!inet_diag_bc_sk(bc, sk))
@@ -30,7 +30,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
 }
 
 static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
-		const struct nlmsghdr *nlh, struct inet_diag_req *req)
+		const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req)
 {
 	int err = -EINVAL;
 	struct sock *sk;
@@ -88,7 +88,7 @@ out_nosk:
 }
 
 static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	int num, s_num, slot, s_slot;
 
@@ -136,13 +136,13 @@ done:
 }
 
 static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	udp_dump(&udp_table, skb, cb, r, bc);
 }
 
 static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return udp_dump_one(&udp_table, in_skb, nlh, req);
 }
@@ -154,13 +154,13 @@ static const struct inet_diag_handler udp_diag_handler = {
 };
 
 static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	udp_dump(&udplite_table, skb, cb, r, bc);
 }
 
 static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return udp_dump_one(&udplite_table, in_skb, nlh, req);
 }
-- 
cgit v1.2.3


From 3b09c84cb622ffbcdb5d541986b1eaf7d5812602 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 10 Jan 2012 22:37:26 +0000
Subject: inet_diag: Rename inet_diag_req_compat into inet_diag_req

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 2 +-
 net/ipv4/inet_diag.c      | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index a5b7e910eea9..f1362b5447fc 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -22,7 +22,7 @@ struct inet_diag_sockid {
 
 /* Request structure */
 
-struct inet_diag_req_compat {
+struct inet_diag_req {
 	__u8	idiag_family;		/* Family of addresses. */
 	__u8	idiag_src_len;
 	__u8	idiag_dst_len;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index cf23a7cacdd4..fcf281819cd4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -909,10 +909,10 @@ static inline int inet_diag_type2proto(int type)
 
 static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req *rc = NLMSG_DATA(cb->nlh);
 	struct inet_diag_req_v2 req;
 	struct nlattr *bc = NULL;
-	int hdrlen = sizeof(struct inet_diag_req_compat);
+	int hdrlen = sizeof(struct inet_diag_req);
 
 	req.sdiag_family = AF_UNSPEC; /* compatibility */
 	req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
@@ -929,7 +929,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *c
 static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh)
 {
-	struct inet_diag_req_compat *rc = NLMSG_DATA(nlh);
+	struct inet_diag_req *rc = NLMSG_DATA(nlh);
 	struct inet_diag_req_v2 req;
 
 	req.sdiag_family = rc->idiag_family;
@@ -943,7 +943,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	int hdrlen = sizeof(struct inet_diag_req_compat);
+	int hdrlen = sizeof(struct inet_diag_req);
 
 	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
 	    nlmsg_len(nlh) < hdrlen)
-- 
cgit v1.2.3


From fffe5d5aa05b4e69f79bc75a51c5ee0fc6203fa5 Mon Sep 17 00:00:00 2001
From: Qiang Liu <qiang.liu@freescale.com>
Date: Tue, 8 Nov 2011 08:43:08 -0500
Subject: mmc: sd: Macro name cleanup for high speed dtr

Add new macros for the high speed 50MHz case, rather than having
a confusing reuse of the value for UHS SDR50, which is 100MHz.

Reported-by: Aaron Lu <aaron.lu@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 4 ++--
 include/linux/mmc/card.h | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index f2a05ea40f2a..f54392c4638a 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -307,8 +307,8 @@ static int mmc_read_switch(struct mmc_card *card)
 		goto out;
 	}
 
-	if (status[13] & UHS_SDR50_BUS_SPEED)
-		card->sw_caps.hs_max_dtr = 50000000;
+	if (status[13] & SD_MODE_HIGH_SPEED)
+		card->sw_caps.hs_max_dtr = HIGH_SPEED_MAX_DTR;
 
 	if (card->scr.sda_spec3) {
 		card->sw_caps.sd3_bus_mode = status[13];
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c8ef9bc54d50..2c9be29684cf 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -110,6 +110,7 @@ struct sd_ssr {
 struct sd_switch_caps {
 	unsigned int		hs_max_dtr;
 	unsigned int		uhs_max_dtr;
+#define HIGH_SPEED_MAX_DTR	50000000
 #define UHS_SDR104_MAX_DTR	208000000
 #define UHS_SDR50_MAX_DTR	100000000
 #define UHS_DDR50_MAX_DTR	50000000
@@ -117,11 +118,13 @@ struct sd_switch_caps {
 #define UHS_SDR12_MAX_DTR	25000000
 	unsigned int		sd3_bus_mode;
 #define UHS_SDR12_BUS_SPEED	0
+#define HIGH_SPEED_BUS_SPEED	1
 #define UHS_SDR25_BUS_SPEED	1
 #define UHS_SDR50_BUS_SPEED	2
 #define UHS_SDR104_BUS_SPEED	3
 #define UHS_DDR50_BUS_SPEED	4
 
+#define SD_MODE_HIGH_SPEED	(1 << HIGH_SPEED_BUS_SPEED)
 #define SD_MODE_UHS_SDR12	(1 << UHS_SDR12_BUS_SPEED)
 #define SD_MODE_UHS_SDR25	(1 << UHS_SDR25_BUS_SPEED)
 #define SD_MODE_UHS_SDR50	(1 << UHS_SDR50_BUS_SPEED)
-- 
cgit v1.2.3


From df16219f365f7f5a2d88a6e123251d57255cca3f Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Fri, 4 Nov 2011 13:53:19 +0100
Subject: mmc: debugfs: expose the SDCLK frq in sys ios

This patch is to expose the actual SDCLK frequency in
/sys/kernel/debug/mmcX/ios entry.

For example, if the max clk for a normal speed card is 20MHz this
is reported in /sys/kernel/debug/mmcX/ios.  Unfortunately the actual
SDCLK frequency (i.e. Baseclock / divisor) is not reported at all:
for example, in that case, on Arasan HC, it should be 48/4=12 (MHz).

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/debugfs.c |  2 ++
 drivers/mmc/host/sdhci.c   | 10 ++++++++++
 include/linux/mmc/host.h   |  2 ++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 3923880118b6..027615d3bf3e 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -57,6 +57,8 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 	const char *str;
 
 	seq_printf(s, "clock:\t\t%u Hz\n", ios->clock);
+	if (host->actual_clock)
+		seq_printf(s, "actual clock:\t%u Hz\n", host->actual_clock);
 	seq_printf(s, "vdd:\t\t%u ", ios->vdd);
 	if ((1 << ios->vdd) & MMC_VDD_165_195)
 		seq_printf(s, "(1.65 - 1.95 V)\n");
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 19ed580f2cab..a7c23118dab2 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1066,12 +1066,15 @@ static void sdhci_finish_command(struct sdhci_host *host)
 static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	int div = 0; /* Initialized for compiler warning */
+	int real_div = div, clk_mul = 1;
 	u16 clk = 0;
 	unsigned long timeout;
 
 	if (clock == host->clock)
 		return;
 
+	host->mmc->actual_clock = 0;
+
 	if (host->ops->set_clock) {
 		host->ops->set_clock(host, clock);
 		if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK)
@@ -1109,6 +1112,8 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 				 * Control register.
 				 */
 				clk = SDHCI_PROG_CLOCK_MODE;
+				real_div = div;
+				clk_mul = host->clk_mul;
 				div--;
 			}
 		} else {
@@ -1122,6 +1127,7 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 						break;
 				}
 			}
+			real_div = div;
 			div >>= 1;
 		}
 	} else {
@@ -1130,9 +1136,13 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 			if ((host->max_clk / div) <= clock)
 				break;
 		}
+		real_div = div;
 		div >>= 1;
 	}
 
+	if (real_div)
+		host->mmc->actual_clock = (host->max_clk * clk_mul) / real_div;
+
 	clk |= (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT;
 	clk |= ((div & SDHCI_DIV_HI_MASK) >> SDHCI_DIV_MASK_LEN)
 		<< SDHCI_DIVIDER_HI_SHIFT;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a3ac9c48e5de..cea064f73514 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -323,6 +323,8 @@ struct mmc_host {
 	struct fault_attr	fail_mmc_request;
 #endif
 
+	unsigned int		actual_clock;	/* Actual HC clock rate */
+
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
-- 
cgit v1.2.3


From 597dd9d79cfbbb1636d00a7fd0880355d9b20c41 Mon Sep 17 00:00:00 2001
From: Sujit Reddy Thumma <sthumma@codeaurora.org>
Date: Mon, 14 Nov 2011 13:53:29 +0530
Subject: mmc: core: Use delayed work in clock gating framework

Current clock gating framework disables the MCI clock as soon as the
request is completed and enables it when a request arrives. This aggressive
clock gating framework, when enabled, cause following issues:

When there are back-to-back requests from the Queue layer, we unnecessarily
end up disabling and enabling the clocks between these requests since 8MCLK
clock cycles is a very short duration compared to the time delay between
back to back requests reaching the MMC layer. This overhead can effect the
overall performance depending on how long the clock enable and disable
calls take which is platform dependent. For example on some platforms we
can have clock control not on the local processor, but on a different
subsystem and the time taken to perform the clock enable/disable can add
significant overhead.

Also if the host controller driver decides to disable the host clock too
when mmc_set_ios function is called with ios.clock=0, it adds additional
delay and it is highly possible that the next request had already arrived
and unnecessarily blocked in enabling the clocks. This is seen frequently
when the processor is executing at high speeds and in multi-core platforms
thus reduces the overall throughput compared to if clock gating is
disabled.

Fix this by delaying turning off the clocks by posting request on
delayed workqueue. Also cancel the unscheduled pending work, if any,
when there is access to card.

sysfs entry is provided to tune the delay as needed, default
value set to 200ms.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 Documentation/mmc/mmc-dev-attrs.txt | 10 +++++++
 drivers/mmc/core/host.c             | 57 ++++++++++++++++++++++++++++++++++---
 include/linux/mmc/host.h            |  4 ++-
 3 files changed, 66 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt
index 8898a95b41e5..b0245565875a 100644
--- a/Documentation/mmc/mmc-dev-attrs.txt
+++ b/Documentation/mmc/mmc-dev-attrs.txt
@@ -64,3 +64,13 @@ Note on Erase Size and Preferred Erase Size:
 	size specified by the card.
 
 	"preferred_erase_size" is in bytes.
+
+SD/MMC/SDIO Clock Gating Attribute
+==================================
+
+Read and write access is provided to following attribute.
+This attribute appears only if CONFIG_MMC_CLKGATE is enabled.
+
+	clkgate_delay	Tune the clock gating delay with desired value in milli seconds.
+
+echo <desired delay> > /sys/class/mmc_host/mmcX/clkgate_delay
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index d31c78b72b0f..817a76039743 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -54,6 +54,31 @@ static DEFINE_IDR(mmc_host_idr);
 static DEFINE_SPINLOCK(mmc_host_lock);
 
 #ifdef CONFIG_MMC_CLKGATE
+static ssize_t clkgate_delay_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	return snprintf(buf, PAGE_SIZE, "%lu millisecs\n",
+			host->clkgate_delay);
+}
+
+static ssize_t clkgate_delay_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	unsigned long flags, value;
+
+	if (kstrtoul(buf, 0, &value))
+		return -EINVAL;
+
+	spin_lock_irqsave(&host->clk_lock, flags);
+	host->clkgate_delay = value;
+	spin_unlock_irqrestore(&host->clk_lock, flags);
+
+	pr_info("%s: clock gate delay set to %lu ms\n",
+			mmc_hostname(host), value);
+	return count;
+}
 
 /*
  * Enabling clock gating will make the core call out to the host
@@ -114,7 +139,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 static void mmc_host_clk_gate_work(struct work_struct *work)
 {
 	struct mmc_host *host = container_of(work, struct mmc_host,
-					      clk_gate_work);
+					      clk_gate_work.work);
 
 	mmc_host_clk_gate_delayed(host);
 }
@@ -131,6 +156,8 @@ void mmc_host_clk_hold(struct mmc_host *host)
 {
 	unsigned long flags;
 
+	/* cancel any clock gating work scheduled by mmc_host_clk_release() */
+	cancel_delayed_work_sync(&host->clk_gate_work);
 	mutex_lock(&host->clk_gate_mutex);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (host->clk_gated) {
@@ -180,7 +207,8 @@ void mmc_host_clk_release(struct mmc_host *host)
 	host->clk_requests--;
 	if (mmc_host_may_gate_card(host->card) &&
 	    !host->clk_requests)
-		queue_work(system_nrt_wq, &host->clk_gate_work);
+		queue_delayed_work(system_nrt_wq, &host->clk_gate_work,
+				msecs_to_jiffies(host->clkgate_delay));
 	spin_unlock_irqrestore(&host->clk_lock, flags);
 }
 
@@ -213,8 +241,13 @@ static inline void mmc_host_clk_init(struct mmc_host *host)
 	host->clk_requests = 0;
 	/* Hold MCI clock for 8 cycles by default */
 	host->clk_delay = 8;
+	/*
+	 * Default clock gating delay is 200ms.
+	 * This value can be tuned by writing into sysfs entry.
+	 */
+	host->clkgate_delay = 200;
 	host->clk_gated = false;
-	INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
+	INIT_DELAYED_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
 	spin_lock_init(&host->clk_lock);
 	mutex_init(&host->clk_gate_mutex);
 }
@@ -229,7 +262,7 @@ static inline void mmc_host_clk_exit(struct mmc_host *host)
 	 * Wait for any outstanding gate and then make sure we're
 	 * ungated before exiting.
 	 */
-	if (cancel_work_sync(&host->clk_gate_work))
+	if (cancel_delayed_work_sync(&host->clk_gate_work))
 		mmc_host_clk_gate_delayed(host);
 	if (host->clk_gated)
 		mmc_host_clk_hold(host);
@@ -237,6 +270,17 @@ static inline void mmc_host_clk_exit(struct mmc_host *host)
 	WARN_ON(host->clk_requests > 1);
 }
 
+static inline void mmc_host_clk_sysfs_init(struct mmc_host *host)
+{
+	host->clkgate_delay_attr.show = clkgate_delay_show;
+	host->clkgate_delay_attr.store = clkgate_delay_store;
+	sysfs_attr_init(&host->clkgate_delay_attr.attr);
+	host->clkgate_delay_attr.attr.name = "clkgate_delay";
+	host->clkgate_delay_attr.attr.mode = S_IRUGO | S_IWUSR;
+	if (device_create_file(&host->class_dev, &host->clkgate_delay_attr))
+		pr_err("%s: Failed to create clkgate_delay sysfs entry\n",
+				mmc_hostname(host));
+}
 #else
 
 static inline void mmc_host_clk_init(struct mmc_host *host)
@@ -247,6 +291,10 @@ static inline void mmc_host_clk_exit(struct mmc_host *host)
 {
 }
 
+static inline void mmc_host_clk_sysfs_init(struct mmc_host *host)
+{
+}
+
 #endif
 
 /**
@@ -335,6 +383,7 @@ int mmc_add_host(struct mmc_host *host)
 #ifdef CONFIG_DEBUG_FS
 	mmc_add_host_debugfs(host);
 #endif
+	mmc_host_clk_sysfs_init(host);
 
 	mmc_start_host(host);
 	register_pm_notifier(&host->pm_notify);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cea064f73514..706f72279a17 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -253,10 +253,12 @@ struct mmc_host {
 	int			clk_requests;	/* internal reference counter */
 	unsigned int		clk_delay;	/* number of MCI clk hold cycles */
 	bool			clk_gated;	/* clock gated */
-	struct work_struct	clk_gate_work; /* delayed clock gate */
+	struct delayed_work	clk_gate_work; /* delayed clock gate */
 	unsigned int		clk_old;	/* old clock value cache */
 	spinlock_t		clk_lock;	/* lock for clk fields */
 	struct mutex		clk_gate_mutex;	/* mutex for clock gating */
+	struct device_attribute clkgate_delay_attr;
+	unsigned long           clkgate_delay;
 #endif
 
 	/* host specific block data */
-- 
cgit v1.2.3


From a303c5319c8e6ab0e744ebca118da8420043b2c3 Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Mon, 14 Nov 2011 19:14:38 -0800
Subject: mmc: sdio: support SDIO UHS cards

This patch adds support for sdio UHS cards per the version 3.0
spec.

UHS mode is only enabled for version 3.0 cards when both the
host and the controller support UHS modes.

1.8v signaling support is removed if both the card and the
host do not support UHS.  This is done to maintain
compatibility and some system/card combinations break when
1.8v signaling is enabled when the host does not support UHS.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Aaron Lu <Aaron.lu@amd.com>
Reviewed-by: Arindam Nath <arindam.nath@amd.com>
Tested-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/bus.c   |   2 +-
 drivers/mmc/core/sd.c    |   2 +-
 drivers/mmc/core/sdio.c  | 329 +++++++++++++++++++++++++++++++++++++++++++----
 include/linux/mmc/card.h |   4 +-
 include/linux/mmc/sdio.h |  29 ++++-
 5 files changed, 336 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 6be49249895a..f8a228a61fd4 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -305,7 +305,7 @@ int mmc_add_card(struct mmc_card *card)
 	} else {
 		printk(KERN_INFO "%s: new %s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
-			mmc_sd_card_uhs(card) ? "ultra high speed " :
+			mmc_card_uhs(card) ? "ultra high speed " :
 			(mmc_card_highspeed(card) ? "high speed " : ""),
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type, card->rca);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index f54392c4638a..85b858f6d5d4 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -960,7 +960,7 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 			goto free_card;
 
 		/* Card is an ultra-high-speed card */
-		mmc_sd_card_set_uhs(card);
+		mmc_card_set_uhs(card);
 
 		/*
 		 * Since initialization is now complete, enable preset
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 3ab565e32a6a..8c04f7f46dec 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -102,6 +102,7 @@ static int sdio_read_cccr(struct mmc_card *card)
 	int ret;
 	int cccr_vsn;
 	unsigned char data;
+	unsigned char speed;
 
 	memset(&card->cccr, 0, sizeof(struct sdio_cccr));
 
@@ -140,12 +141,60 @@ static int sdio_read_cccr(struct mmc_card *card)
 	}
 
 	if (cccr_vsn >= SDIO_CCCR_REV_1_20) {
-		ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &data);
+		ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &speed);
 		if (ret)
 			goto out;
 
-		if (data & SDIO_SPEED_SHS)
-			card->cccr.high_speed = 1;
+		card->scr.sda_spec3 = 0;
+		card->sw_caps.sd3_bus_mode = 0;
+		card->sw_caps.sd3_drv_type = 0;
+		if (cccr_vsn >= SDIO_CCCR_REV_3_00) {
+			card->scr.sda_spec3 = 1;
+			ret = mmc_io_rw_direct(card, 0, 0,
+				SDIO_CCCR_UHS, 0, &data);
+			if (ret)
+				goto out;
+
+			if (card->host->caps &
+				(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+				 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
+				 MMC_CAP_UHS_DDR50)) {
+				if (data & SDIO_UHS_DDR50)
+					card->sw_caps.sd3_bus_mode
+						|= SD_MODE_UHS_DDR50;
+
+				if (data & SDIO_UHS_SDR50)
+					card->sw_caps.sd3_bus_mode
+						|= SD_MODE_UHS_SDR50;
+
+				if (data & SDIO_UHS_SDR104)
+					card->sw_caps.sd3_bus_mode
+						|= SD_MODE_UHS_SDR104;
+			}
+
+			ret = mmc_io_rw_direct(card, 0, 0,
+				SDIO_CCCR_DRIVE_STRENGTH, 0, &data);
+			if (ret)
+				goto out;
+
+			if (data & SDIO_DRIVE_SDTA)
+				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_A;
+			if (data & SDIO_DRIVE_SDTC)
+				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_C;
+			if (data & SDIO_DRIVE_SDTD)
+				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_D;
+		}
+
+		/* if no uhs mode ensure we check for high speed */
+		if (!card->sw_caps.sd3_bus_mode) {
+			if (speed & SDIO_SPEED_SHS) {
+				card->cccr.high_speed = 1;
+				card->sw_caps.hs_max_dtr = 50000000;
+			} else {
+				card->cccr.high_speed = 0;
+				card->sw_caps.hs_max_dtr = 25000000;
+			}
+		}
 	}
 
 out:
@@ -327,6 +376,193 @@ static unsigned mmc_sdio_get_max_clock(struct mmc_card *card)
 	return max_dtr;
 }
 
+static unsigned char host_drive_to_sdio_drive(int host_strength)
+{
+	switch (host_strength) {
+	case MMC_SET_DRIVER_TYPE_A:
+		return SDIO_DTSx_SET_TYPE_A;
+	case MMC_SET_DRIVER_TYPE_B:
+		return SDIO_DTSx_SET_TYPE_B;
+	case MMC_SET_DRIVER_TYPE_C:
+		return SDIO_DTSx_SET_TYPE_C;
+	case MMC_SET_DRIVER_TYPE_D:
+		return SDIO_DTSx_SET_TYPE_D;
+	default:
+		return SDIO_DTSx_SET_TYPE_B;
+	}
+}
+
+static void sdio_select_driver_type(struct mmc_card *card)
+{
+	int host_drv_type = SD_DRIVER_TYPE_B;
+	int card_drv_type = SD_DRIVER_TYPE_B;
+	int drive_strength;
+	unsigned char card_strength;
+	int err;
+
+	/*
+	 * If the host doesn't support any of the Driver Types A,C or D,
+	 * or there is no board specific handler then default Driver
+	 * Type B is used.
+	 */
+	if (!(card->host->caps &
+		(MMC_CAP_DRIVER_TYPE_A |
+		 MMC_CAP_DRIVER_TYPE_C |
+		 MMC_CAP_DRIVER_TYPE_D)))
+		return;
+
+	if (!card->host->ops->select_drive_strength)
+		return;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_A)
+		host_drv_type |= SD_DRIVER_TYPE_A;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_C)
+		host_drv_type |= SD_DRIVER_TYPE_C;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_D)
+		host_drv_type |= SD_DRIVER_TYPE_D;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
+		card_drv_type |= SD_DRIVER_TYPE_A;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+		card_drv_type |= SD_DRIVER_TYPE_C;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_D)
+		card_drv_type |= SD_DRIVER_TYPE_D;
+
+	/*
+	 * The drive strength that the hardware can support
+	 * depends on the board design.  Pass the appropriate
+	 * information and let the hardware specific code
+	 * return what is possible given the options
+	 */
+	drive_strength = card->host->ops->select_drive_strength(
+		card->sw_caps.uhs_max_dtr,
+		host_drv_type, card_drv_type);
+
+	/* if error just use default for drive strength B */
+	err = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_DRIVE_STRENGTH, 0,
+		&card_strength);
+	if (err)
+		return;
+
+	card_strength &= ~(SDIO_DRIVE_DTSx_MASK<<SDIO_DRIVE_DTSx_SHIFT);
+	card_strength |= host_drive_to_sdio_drive(drive_strength);
+
+	err = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_DRIVE_STRENGTH,
+		card_strength, NULL);
+
+	/* if error default to drive strength B */
+	if (!err)
+		mmc_set_driver_type(card->host, drive_strength);
+}
+
+
+static int sdio_set_bus_speed_mode(struct mmc_card *card)
+{
+	unsigned int bus_speed, timing;
+	int err;
+	unsigned char speed;
+
+	/*
+	 * If the host doesn't support any of the UHS-I modes, fallback on
+	 * default speed.
+	 */
+	if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50)))
+		return 0;
+
+	bus_speed = SDIO_SPEED_SDR12;
+	timing = MMC_TIMING_UHS_SDR12;
+	if ((card->host->caps & MMC_CAP_UHS_SDR104) &&
+	    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) {
+			bus_speed = SDIO_SPEED_SDR104;
+			timing = MMC_TIMING_UHS_SDR104;
+			card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR;
+	} else if ((card->host->caps & MMC_CAP_UHS_DDR50) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) {
+			bus_speed = SDIO_SPEED_DDR50;
+			timing = MMC_TIMING_UHS_DDR50;
+			card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR50)) {
+			bus_speed = SDIO_SPEED_SDR50;
+			timing = MMC_TIMING_UHS_SDR50;
+			card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) {
+			bus_speed = SDIO_SPEED_SDR25;
+			timing = MMC_TIMING_UHS_SDR25;
+			card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 |
+		    MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR12)) {
+			bus_speed = SDIO_SPEED_SDR12;
+			timing = MMC_TIMING_UHS_SDR12;
+			card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR;
+	}
+
+	err = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &speed);
+	if (err)
+		return err;
+
+	speed &= ~SDIO_SPEED_BSS_MASK;
+	speed |= bus_speed;
+	err = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_SPEED, speed, NULL);
+	if (err)
+		return err;
+
+	if (bus_speed) {
+		mmc_set_timing(card->host, timing);
+		mmc_set_clock(card->host, card->sw_caps.uhs_max_dtr);
+	}
+
+	return 0;
+}
+
+/*
+ * UHS-I specific initialization procedure
+ */
+static int mmc_sdio_init_uhs_card(struct mmc_card *card)
+{
+	int err;
+
+	if (!card->scr.sda_spec3)
+		return 0;
+
+	/*
+	 * Switch to wider bus (if supported).
+	 */
+	if (card->host->caps & MMC_CAP_4_BIT_DATA) {
+		err = sdio_enable_4bit_bus(card);
+		if (err > 0) {
+			mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+			err = 0;
+		}
+	}
+
+	/* Set the driver strength for the card */
+	sdio_select_driver_type(card);
+
+	/* Set bus speed mode of the card */
+	err = sdio_set_bus_speed_mode(card);
+	if (err)
+		goto out;
+
+	/* Initialize and start re-tuning timer */
+	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
+		err = card->host->ops->execute_tuning(card->host);
+
+out:
+
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -393,6 +629,30 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 	if (host->ops->init_card)
 		host->ops->init_card(host, card);
 
+	/*
+	 * If the host and card support UHS-I mode request the card
+	 * to switch to 1.8V signaling level.  No 1.8v signalling if
+	 * UHS mode is not enabled to maintain compatibilty and some
+	 * systems that claim 1.8v signalling in fact do not support
+	 * it.
+	 */
+	if ((ocr & R4_18V_PRESENT) &&
+		(host->caps &
+			(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+			 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
+			 MMC_CAP_UHS_DDR50))) {
+		err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180,
+				true);
+		if (err) {
+			ocr &= ~R4_18V_PRESENT;
+			host->ocr &= ~R4_18V_PRESENT;
+		}
+		err = 0;
+	} else {
+		ocr &= ~R4_18V_PRESENT;
+		host->ocr &= ~R4_18V_PRESENT;
+	}
+
 	/*
 	 * For native busses:  set card RCA and quit open drain mode.
 	 */
@@ -492,29 +752,39 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 	if (err)
 		goto remove;
 
-	/*
-	 * Switch to high-speed (if supported).
-	 */
-	err = sdio_enable_hs(card);
-	if (err > 0)
-		mmc_sd_go_highspeed(card);
-	else if (err)
-		goto remove;
+	/* Initialization sequence for UHS-I cards */
+	/* Only if card supports 1.8v and UHS signaling */
+	if ((ocr & R4_18V_PRESENT) && card->sw_caps.sd3_bus_mode) {
+		err = mmc_sdio_init_uhs_card(card);
+		if (err)
+			goto remove;
 
-	/*
-	 * Change to the card's maximum speed.
-	 */
-	mmc_set_clock(host, mmc_sdio_get_max_clock(card));
+		/* Card is an ultra-high-speed card */
+		mmc_card_set_uhs(card);
+	} else {
+		/*
+		 * Switch to high-speed (if supported).
+		 */
+		err = sdio_enable_hs(card);
+		if (err > 0)
+			mmc_sd_go_highspeed(card);
+		else if (err)
+			goto remove;
 
-	/*
-	 * Switch to wider bus (if supported).
-	 */
-	err = sdio_enable_4bit_bus(card);
-	if (err > 0)
-		mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
-	else if (err)
-		goto remove;
+		/*
+		 * Change to the card's maximum speed.
+		 */
+		mmc_set_clock(host, mmc_sdio_get_max_clock(card));
 
+		/*
+		 * Switch to wider bus (if supported).
+		 */
+		err = sdio_enable_4bit_bus(card);
+		if (err > 0)
+			mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+		else if (err)
+			goto remove;
+	}
 finish:
 	if (!oldcard)
 		host->card = card;
@@ -797,8 +1067,17 @@ int mmc_attach_sdio(struct mmc_host *host)
 	 * Detect and init the card.
 	 */
 	err = mmc_sdio_init_card(host, host->ocr, NULL, 0);
-	if (err)
-		goto err;
+	if (err) {
+		if (err == -EAGAIN) {
+			/*
+			 * Retry initialization with S18R set to 0.
+			 */
+			host->ocr &= ~R4_18V_PRESENT;
+			err = mmc_sdio_init_card(host, host->ocr, NULL, 0);
+		}
+		if (err)
+			goto err;
+	}
 	card = host->card;
 
 	/*
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 2c9be29684cf..534974c3ef0c 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -367,7 +367,8 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
 #define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
-#define mmc_sd_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_sd_card_uhs(c)	((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
@@ -375,6 +376,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
+#define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index e0b1123497b9..c9fe66c58f8f 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -38,6 +38,7 @@
  *      [8:0] Byte/block count
  */
 
+#define R4_18V_PRESENT (1<<24)
 #define R4_MEMORY_PRESENT (1 << 27)
 
 /*
@@ -85,6 +86,7 @@
 #define  SDIO_SD_REV_1_01	0	/* SD Physical Spec Version 1.01 */
 #define  SDIO_SD_REV_1_10	1	/* SD Physical Spec Version 1.10 */
 #define  SDIO_SD_REV_2_00	2	/* SD Physical Spec Version 2.00 */
+#define  SDIO_SD_REV_3_00	3	/* SD Physical Spev Version 3.00 */
 
 #define SDIO_CCCR_IOEx		0x02
 #define SDIO_CCCR_IORx		0x03
@@ -134,8 +136,31 @@
 #define SDIO_CCCR_SPEED		0x13
 
 #define  SDIO_SPEED_SHS		0x01	/* Supports High-Speed mode */
-#define  SDIO_SPEED_EHS		0x02	/* Enable High-Speed mode */
-
+#define  SDIO_SPEED_BSS_SHIFT	1
+#define  SDIO_SPEED_BSS_MASK	(7<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR12	(0<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR25	(1<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR50	(2<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR104	(3<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_DDR50	(4<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_EHS		SDIO_SPEED_SDR25	/* Enable High-Speed */
+
+#define SDIO_CCCR_UHS		0x14
+#define  SDIO_UHS_SDR50		0x01
+#define  SDIO_UHS_SDR104	0x02
+#define  SDIO_UHS_DDR50		0x04
+
+#define SDIO_CCCR_DRIVE_STRENGTH 0x15
+#define  SDIO_SDTx_MASK		0x07
+#define  SDIO_DRIVE_SDTA	(1<<0)
+#define  SDIO_DRIVE_SDTC	(1<<1)
+#define  SDIO_DRIVE_SDTD	(1<<2)
+#define  SDIO_DRIVE_DTSx_MASK	0x03
+#define  SDIO_DRIVE_DTSx_SHIFT	4
+#define  SDIO_DTSx_SET_TYPE_B	(0 << SDIO_DRIVE_DTSx_SHIFT)
+#define  SDIO_DTSx_SET_TYPE_A	(1 << SDIO_DRIVE_DTSx_SHIFT)
+#define  SDIO_DTSx_SET_TYPE_C	(2 << SDIO_DRIVE_DTSx_SHIFT)
+#define  SDIO_DTSx_SET_TYPE_D	(3 << SDIO_DRIVE_DTSx_SHIFT)
 /*
  * Function Basic Registers (FBR)
  */
-- 
cgit v1.2.3


From 5a09262744a0b84719b933ac66801de058776755 Mon Sep 17 00:00:00 2001
From: Per Forlin <per.forlin@stericsson.com>
Date: Mon, 14 Nov 2011 12:02:28 +0100
Subject: mmc: mmci: add capabilities2 for MMC_CAP2

Signed-off-by: Per Forlin <per.forlin@stericsson.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/mmci.c   | 1 +
 include/linux/amba/mmci.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 50b5f9926f64..8eabf999a858 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1239,6 +1239,7 @@ static int __devinit mmci_probe(struct amba_device *dev,
 	if (host->vcc == NULL)
 		mmc->ocr_avail = plat->ocr_mask;
 	mmc->caps = plat->capabilities;
+	mmc->caps2 = plat->capabilities2;
 
 	/*
 	 * We can do SGIO
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 21114810c7c0..0101e9c17fa1 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -30,6 +30,7 @@ struct dma_chan;
  * @cd_invert: true if the gpio_cd pin value is active low
  * @capabilities: the capabilities of the block as implemented in
  * this platform, signify anything MMC_CAP_* from mmc/host.h
+ * @capabilities2: more capabilities, MMC_CAP2_* from mmc/host.h
  * @dma_filter: function used to select an appropriate RX and TX
  * DMA channel to be used for DMA, if and only if you're deploying the
  * generic DMA engine
@@ -52,6 +53,7 @@ struct mmci_platform_data {
 	int	gpio_cd;
 	bool	cd_invert;
 	unsigned long capabilities;
+	unsigned long capabilities2;
 	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
 	void *dma_rx_param;
 	void *dma_tx_param;
-- 
cgit v1.2.3


From d30495048892980e5d453328d1cc9343b3f7e917 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 28 Nov 2011 16:22:00 +0200
Subject: mmc: allow upper layers to know immediately if card has been removed

Add a function mmc_detect_card_removed() which upper layers can use to
determine immediately if a card has been removed. This function should
be called after an I/O request fails so that all queued I/O requests
can be errored out immediately instead of waiting for the card device
to be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 51 +++++++++++++++++++++++++++++++++++++++++++++---
 drivers/mmc/core/core.h  |  3 +++
 drivers/mmc/core/mmc.c   | 12 +++++++++++-
 drivers/mmc/core/sd.c    | 12 +++++++++++-
 drivers/mmc/core/sdio.c  | 11 ++++++++++-
 include/linux/mmc/card.h |  3 +++
 include/linux/mmc/core.h |  2 ++
 include/linux/mmc/host.h |  1 +
 8 files changed, 89 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 950b97d7412a..a2aa860956ef 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -140,7 +140,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 			cmd->retries = 0;
 	}
 
-	if (err && cmd->retries) {
+	if (err && cmd->retries && !mmc_card_removed(host->card)) {
 		/*
 		 * Request starter must handle retries - see
 		 * mmc_wait_for_req_done().
@@ -247,6 +247,11 @@ static void __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
 {
 	init_completion(&mrq->completion);
 	mrq->done = mmc_wait_done;
+	if (mmc_card_removed(host->card)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		complete(&mrq->completion);
+		return;
+	}
 	mmc_start_request(host, mrq);
 }
 
@@ -259,7 +264,8 @@ static void mmc_wait_for_req_done(struct mmc_host *host,
 		wait_for_completion(&mrq->completion);
 
 		cmd = mrq->cmd;
-		if (!cmd->error || !cmd->retries)
+		if (!cmd->error || !cmd->retries ||
+		    mmc_card_removed(host->card))
 			break;
 
 		pr_debug("%s: req failed (CMD%u): %d, retrying...\n",
@@ -1456,7 +1462,7 @@ void mmc_detect_change(struct mmc_host *host, unsigned long delay)
 	WARN_ON(host->removed);
 	spin_unlock_irqrestore(&host->lock, flags);
 #endif
-
+	host->detect_change = 1;
 	mmc_schedule_delayed_work(&host->detect, delay);
 }
 
@@ -2049,6 +2055,43 @@ static int mmc_rescan_try_freq(struct mmc_host *host, unsigned freq)
 	return -EIO;
 }
 
+int _mmc_detect_card_removed(struct mmc_host *host)
+{
+	int ret;
+
+	if ((host->caps & MMC_CAP_NONREMOVABLE) || !host->bus_ops->alive)
+		return 0;
+
+	if (!host->card || mmc_card_removed(host->card))
+		return 1;
+
+	ret = host->bus_ops->alive(host);
+	if (ret) {
+		mmc_card_set_removed(host->card);
+		pr_debug("%s: card remove detected\n", mmc_hostname(host));
+	}
+
+	return ret;
+}
+
+int mmc_detect_card_removed(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+
+	WARN_ON(!host->claimed);
+	/*
+	 * The card will be considered unchanged unless we have been asked to
+	 * detect a change or host requires polling to provide card detection.
+	 */
+	if (card && !host->detect_change && !(host->caps & MMC_CAP_NEEDS_POLL))
+		return mmc_card_removed(card);
+
+	host->detect_change = 0;
+
+	return _mmc_detect_card_removed(host);
+}
+EXPORT_SYMBOL(mmc_detect_card_removed);
+
 void mmc_rescan(struct work_struct *work)
 {
 	static const unsigned freqs[] = { 400000, 300000, 200000, 100000 };
@@ -2069,6 +2112,8 @@ void mmc_rescan(struct work_struct *work)
 	    && !(host->caps & MMC_CAP_NONREMOVABLE))
 		host->bus_ops->detect(host);
 
+	host->detect_change = 0;
+
 	/*
 	 * Let mmc_bus_put() free the bus/bus_ops if we've found that
 	 * the card is no longer present.
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 14664f1fb16f..34009241213c 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -24,6 +24,7 @@ struct mmc_bus_ops {
 	int (*resume)(struct mmc_host *);
 	int (*power_save)(struct mmc_host *);
 	int (*power_restore)(struct mmc_host *);
+	int (*alive)(struct mmc_host *);
 };
 
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
@@ -59,6 +60,8 @@ void mmc_rescan(struct work_struct *work);
 void mmc_start_host(struct mmc_host *host);
 void mmc_stop_host(struct mmc_host *host);
 
+int _mmc_detect_card_removed(struct mmc_host *host);
+
 int mmc_attach_mmc(struct mmc_host *host);
 int mmc_attach_sd(struct mmc_host *host);
 int mmc_attach_sdio(struct mmc_host *host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index d240427c1246..fc1059bb6a08 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1104,6 +1104,14 @@ static void mmc_remove(struct mmc_host *host)
 	host->card = NULL;
 }
 
+/*
+ * Card detection - card is alive.
+ */
+static int mmc_alive(struct mmc_host *host)
+{
+	return mmc_send_status(host->card, NULL);
+}
+
 /*
  * Card detection callback from host.
  */
@@ -1119,7 +1127,7 @@ static void mmc_detect(struct mmc_host *host)
 	/*
 	 * Just check if our card has been removed.
 	 */
-	err = mmc_send_status(host->card, NULL);
+	err = _mmc_detect_card_removed(host);
 
 	mmc_release_host(host);
 
@@ -1224,6 +1232,7 @@ static const struct mmc_bus_ops mmc_ops = {
 	.suspend = NULL,
 	.resume = NULL,
 	.power_restore = mmc_power_restore,
+	.alive = mmc_alive,
 };
 
 static const struct mmc_bus_ops mmc_ops_unsafe = {
@@ -1234,6 +1243,7 @@ static const struct mmc_bus_ops mmc_ops_unsafe = {
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
 	.power_restore = mmc_power_restore,
+	.alive = mmc_alive,
 };
 
 static void mmc_attach_bus_ops(struct mmc_host *host)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 85b858f6d5d4..6f27d35081b8 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1018,6 +1018,14 @@ static void mmc_sd_remove(struct mmc_host *host)
 	host->card = NULL;
 }
 
+/*
+ * Card detection - card is alive.
+ */
+static int mmc_sd_alive(struct mmc_host *host)
+{
+	return mmc_send_status(host->card, NULL);
+}
+
 /*
  * Card detection callback from host.
  */
@@ -1033,7 +1041,7 @@ static void mmc_sd_detect(struct mmc_host *host)
 	/*
 	 * Just check if our card has been removed.
 	 */
-	err = mmc_send_status(host->card, NULL);
+	err = _mmc_detect_card_removed(host);
 
 	mmc_release_host(host);
 
@@ -1102,6 +1110,7 @@ static const struct mmc_bus_ops mmc_sd_ops = {
 	.suspend = NULL,
 	.resume = NULL,
 	.power_restore = mmc_sd_power_restore,
+	.alive = mmc_sd_alive,
 };
 
 static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
@@ -1110,6 +1119,7 @@ static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
 	.power_restore = mmc_sd_power_restore,
+	.alive = mmc_sd_alive,
 };
 
 static void mmc_sd_attach_bus_ops(struct mmc_host *host)
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 8c04f7f46dec..b77f770ce5d1 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -819,6 +819,14 @@ static void mmc_sdio_remove(struct mmc_host *host)
 	host->card = NULL;
 }
 
+/*
+ * Card detection - card is alive.
+ */
+static int mmc_sdio_alive(struct mmc_host *host)
+{
+	return mmc_select_card(host->card);
+}
+
 /*
  * Card detection callback from host.
  */
@@ -841,7 +849,7 @@ static void mmc_sdio_detect(struct mmc_host *host)
 	/*
 	 * Just check if our card has been removed.
 	 */
-	err = mmc_select_card(host->card);
+	err = _mmc_detect_card_removed(host);
 
 	mmc_release_host(host);
 
@@ -1019,6 +1027,7 @@ static const struct mmc_bus_ops mmc_sdio_ops = {
 	.suspend = mmc_sdio_suspend,
 	.resume = mmc_sdio_resume,
 	.power_restore = mmc_sdio_power_restore,
+	.alive = mmc_sdio_alive,
 };
 
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 534974c3ef0c..6402d9224d6a 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -209,6 +209,7 @@ struct mmc_card {
 #define MMC_STATE_HIGHSPEED_DDR (1<<4)		/* card is in high speed mode */
 #define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
 #define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
+#define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -370,6 +371,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_sd_card_uhs(c)	((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
+#define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
@@ -379,6 +381,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
+#define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
 
 /*
  * Quirk add/remove for MMC products.
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 174a844a5dda..87a976cc5654 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -180,6 +180,8 @@ extern int mmc_try_claim_host(struct mmc_host *host);
 
 extern int mmc_flush_cache(struct mmc_card *);
 
+extern int mmc_detect_card_removed(struct mmc_host *host);
+
 /**
  *	mmc_claim_host - exclusively claim a host
  *	@host: mmc host to claim
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 706f72279a17..9a03d0335745 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -299,6 +299,7 @@ struct mmc_host {
 	int			claim_cnt;	/* "claim" nesting count */
 
 	struct delayed_work	detect;
+	int			detect_change;	/* card detect flag */
 
 	const struct mmc_bus_ops *bus_ops;	/* current bus driver */
 	unsigned int		bus_refs;	/* reference counter */
-- 
cgit v1.2.3


From add710eaa88606de8ba98a014d37178579e6dbaf Mon Sep 17 00:00:00 2001
From: Johan Rudholm <johan.rudholm@stericsson.com>
Date: Fri, 2 Dec 2011 08:51:06 +0100
Subject: mmc: boot partition ro lock support

Enable boot partitions to be read-only locked until next power on via
a sysfs entry. There will be one sysfs entry for each boot partition:

/sys/block/mmcblkXbootY/ro_lock_until_next_power_on

Each boot partition is locked by writing 1 to its file.

Signed-off-by: Johan Rudholm <johan.rudholm@stericsson.com>
Signed-off-by: John Beckett <john.beckett@stericsson.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 Documentation/mmc/mmc-dev-parts.txt |  13 ++++
 drivers/mmc/card/block.c            | 121 +++++++++++++++++++++++++++++++++---
 drivers/mmc/core/mmc.c              |  14 ++++-
 include/linux/mmc/card.h            |  10 ++-
 include/linux/mmc/mmc.h             |   6 ++
 5 files changed, 153 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt
index 2db28b8e662f..f08d078d43cf 100644
--- a/Documentation/mmc/mmc-dev-parts.txt
+++ b/Documentation/mmc/mmc-dev-parts.txt
@@ -25,3 +25,16 @@ echo 0 > /sys/block/mmcblkXbootY/force_ro
 To re-enable read-only access:
 
 echo 1 > /sys/block/mmcblkXbootY/force_ro
+
+The boot partitions can also be locked read only until the next power on,
+with:
+
+echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
+
+This is a feature of the card and not of the kernel. If the card does
+not support boot partition locking, the file will not exist. If the
+feature has been disabled on the card, the file will be read-only.
+
+The boot partitions can also be locked permanently, but this feature is
+not accessible through sysfs in order to avoid accidental or malicious
+bricking.
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index ad0fb8d74dda..0c959c96005e 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -107,6 +107,8 @@ struct mmc_blk_data {
 	 */
 	unsigned int	part_curr;
 	struct device_attribute force_ro;
+	struct device_attribute power_ro_lock;
+	int	area_type;
 };
 
 static DEFINE_MUTEX(open_lock);
@@ -165,6 +167,70 @@ static void mmc_blk_put(struct mmc_blk_data *md)
 	mutex_unlock(&open_lock);
 }
 
+static ssize_t power_ro_lock_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int ret;
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	struct mmc_card *card = md->queue.card;
+	int locked = 0;
+
+	if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PERM_WP_EN)
+		locked = 2;
+	else if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_EN)
+		locked = 1;
+
+	ret = snprintf(buf, PAGE_SIZE, "%d\n", locked);
+
+	return ret;
+}
+
+static ssize_t power_ro_lock_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	struct mmc_blk_data *md, *part_md;
+	struct mmc_card *card;
+	unsigned long set;
+
+	if (kstrtoul(buf, 0, &set))
+		return -EINVAL;
+
+	if (set != 1)
+		return count;
+
+	md = mmc_blk_get(dev_to_disk(dev));
+	card = md->queue.card;
+
+	mmc_claim_host(card->host);
+
+	ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_WP,
+				card->ext_csd.boot_ro_lock |
+				EXT_CSD_BOOT_WP_B_PWR_WP_EN,
+				card->ext_csd.part_time);
+	if (ret)
+		pr_err("%s: Locking boot partition ro until next power on failed: %d\n", md->disk->disk_name, ret);
+	else
+		card->ext_csd.boot_ro_lock |= EXT_CSD_BOOT_WP_B_PWR_WP_EN;
+
+	mmc_release_host(card->host);
+
+	if (!ret) {
+		pr_info("%s: Locking boot partition ro until next power on\n",
+			md->disk->disk_name);
+		set_disk_ro(md->disk, 1);
+
+		list_for_each_entry(part_md, &md->part, part)
+			if (part_md->area_type == MMC_BLK_DATA_AREA_BOOT) {
+				pr_info("%s: Locking boot partition ro until next power on\n", part_md->disk->disk_name);
+				set_disk_ro(part_md->disk, 1);
+			}
+	}
+
+	mmc_blk_put(md);
+	return count;
+}
+
 static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
@@ -1347,7 +1413,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 					      struct device *parent,
 					      sector_t size,
 					      bool default_ro,
-					      const char *subname)
+					      const char *subname,
+					      int area_type)
 {
 	struct mmc_blk_data *md;
 	int devidx, ret;
@@ -1372,11 +1439,12 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 	if (!subname) {
 		md->name_idx = find_first_zero_bit(name_use, max_devices);
 		__set_bit(md->name_idx, name_use);
-	}
-	else
+	} else
 		md->name_idx = ((struct mmc_blk_data *)
 				dev_to_disk(parent)->private_data)->name_idx;
 
+	md->area_type = area_type;
+
 	/*
 	 * Set the read-only status based on the supported commands
 	 * and the write protect switch.
@@ -1470,7 +1538,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 		size = card->csd.capacity << (card->csd.read_blkbits - 9);
 	}
 
-	md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL);
+	md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL,
+					MMC_BLK_DATA_AREA_MAIN);
 	return md;
 }
 
@@ -1479,13 +1548,14 @@ static int mmc_blk_alloc_part(struct mmc_card *card,
 			      unsigned int part_type,
 			      sector_t size,
 			      bool default_ro,
-			      const char *subname)
+			      const char *subname,
+			      int area_type)
 {
 	char cap_str[10];
 	struct mmc_blk_data *part_md;
 
 	part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro,
-				    subname);
+				    subname, area_type);
 	if (IS_ERR(part_md))
 		return PTR_ERR(part_md);
 	part_md->part_type = part_type;
@@ -1518,7 +1588,8 @@ static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md)
 				card->part[idx].part_cfg,
 				card->part[idx].size >> 9,
 				card->part[idx].force_ro,
-				card->part[idx].name);
+				card->part[idx].name,
+				card->part[idx].area_type);
 			if (ret)
 				return ret;
 		}
@@ -1547,9 +1618,16 @@ mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card)
 
 static void mmc_blk_remove_req(struct mmc_blk_data *md)
 {
+	struct mmc_card *card;
+
 	if (md) {
+		card = md->queue.card;
 		if (md->disk->flags & GENHD_FL_UP) {
 			device_remove_file(disk_to_dev(md->disk), &md->force_ro);
+			if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
+					card->ext_csd.boot_ro_lockable)
+				device_remove_file(disk_to_dev(md->disk),
+					&md->power_ro_lock);
 
 			/* Stop new requests from getting into the queue */
 			del_gendisk(md->disk);
@@ -1578,6 +1656,7 @@ static void mmc_blk_remove_parts(struct mmc_card *card,
 static int mmc_add_disk(struct mmc_blk_data *md)
 {
 	int ret;
+	struct mmc_card *card = md->queue.card;
 
 	add_disk(md->disk);
 	md->force_ro.show = force_ro_show;
@@ -1587,7 +1666,33 @@ static int mmc_add_disk(struct mmc_blk_data *md)
 	md->force_ro.attr.mode = S_IRUGO | S_IWUSR;
 	ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
 	if (ret)
-		del_gendisk(md->disk);
+		goto force_ro_fail;
+
+	if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
+	     card->ext_csd.boot_ro_lockable) {
+		mode_t mode;
+
+		if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_DIS)
+			mode = S_IRUGO;
+		else
+			mode = S_IRUGO | S_IWUSR;
+
+		md->power_ro_lock.show = power_ro_lock_show;
+		md->power_ro_lock.store = power_ro_lock_store;
+		md->power_ro_lock.attr.mode = mode;
+		md->power_ro_lock.attr.name =
+					"ro_lock_until_next_power_on";
+		ret = device_create_file(disk_to_dev(md->disk),
+				&md->power_ro_lock);
+		if (ret)
+			goto power_ro_lock_fail;
+	}
+	return ret;
+
+power_ro_lock_fail:
+	device_remove_file(disk_to_dev(md->disk), &md->force_ro);
+force_ro_fail:
+	del_gendisk(md->disk);
 
 	return ret;
 }
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index fc1059bb6a08..006e932a3ae3 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -348,7 +348,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 				part_size = ext_csd[EXT_CSD_BOOT_MULT] << 17;
 				mmc_part_add(card, part_size,
 					EXT_CSD_PART_CONFIG_ACC_BOOT0 + idx,
-					"boot%d", idx, true);
+					"boot%d", idx, true,
+					MMC_BLK_DATA_AREA_BOOT);
 			}
 		}
 	}
@@ -435,7 +436,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 					hc_wp_grp_sz);
 				mmc_part_add(card, part_size << 19,
 					EXT_CSD_PART_CONFIG_ACC_GP0 + idx,
-					"gp%d", idx, false);
+					"gp%d", idx, false,
+					MMC_BLK_DATA_AREA_GP);
 			}
 		}
 		card->ext_csd.sec_trim_mult =
@@ -446,6 +448,14 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 			ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT];
 		card->ext_csd.trim_timeout = 300 *
 			ext_csd[EXT_CSD_TRIM_MULT];
+
+		/*
+		 * Note that the call to mmc_part_add above defaults to read
+		 * only. If this default assumption is changed, the call must
+		 * take into account the value of boot_locked below.
+		 */
+		card->ext_csd.boot_ro_lock = ext_csd[EXT_CSD_BOOT_WP];
+		card->ext_csd.boot_ro_lockable = true;
 	}
 
 	if (card->ext_csd.rev >= 5) {
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 6402d9224d6a..9478a6bf1bb1 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -71,6 +71,8 @@ struct mmc_ext_csd {
 	bool			hpi_en;			/* HPI enablebit */
 	bool			hpi;			/* HPI support bit */
 	unsigned int		hpi_cmd;		/* cmd used as HPI */
+	unsigned int		boot_ro_lock;		/* ro lock support */
+	bool			boot_ro_lockable;
 	u8			raw_partition_support;	/* 160 */
 	u8			raw_erased_mem_count;	/* 181 */
 	u8			raw_ext_csd_structure;	/* 194 */
@@ -187,6 +189,10 @@ struct mmc_part {
 	unsigned int	part_cfg;	/* partition type */
 	char	name[MAX_MMC_PART_NAME_LEN];
 	bool	force_ro;	/* to make boot parts RO by default */
+	unsigned int	area_type;
+#define MMC_BLK_DATA_AREA_MAIN	(1<<0)
+#define MMC_BLK_DATA_AREA_BOOT	(1<<1)
+#define MMC_BLK_DATA_AREA_GP	(1<<2)
 };
 
 /*
@@ -265,12 +271,14 @@ struct mmc_card {
  * This function fill contents in mmc_part.
  */
 static inline void mmc_part_add(struct mmc_card *card, unsigned int size,
-			unsigned int part_cfg, char *name, int idx, bool ro)
+			unsigned int part_cfg, char *name, int idx, bool ro,
+			int area_type)
 {
 	card->part[card->nr_parts].size = size;
 	card->part[card->nr_parts].part_cfg = part_cfg;
 	sprintf(card->part[card->nr_parts].name, name, idx);
 	card->part[card->nr_parts].force_ro = ro;
+	card->part[card->nr_parts].area_type = area_type;
 	card->nr_parts++;
 }
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 0e7135697d11..665548e639e8 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -280,6 +280,7 @@ struct _mmc_csd {
 #define EXT_CSD_RST_N_FUNCTION		162	/* R/W */
 #define EXT_CSD_SANITIZE_START		165     /* W */
 #define EXT_CSD_WR_REL_PARAM		166	/* RO */
+#define EXT_CSD_BOOT_WP			173	/* R/W */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
 #define EXT_CSD_PART_CONFIG		179	/* R/W */
 #define EXT_CSD_ERASED_MEM_CONT		181	/* RO */
@@ -321,6 +322,11 @@ struct _mmc_csd {
 
 #define EXT_CSD_WR_REL_PARAM_EN		(1<<2)
 
+#define EXT_CSD_BOOT_WP_B_PWR_WP_DIS	(0x40)
+#define EXT_CSD_BOOT_WP_B_PERM_WP_DIS	(0x10)
+#define EXT_CSD_BOOT_WP_B_PERM_WP_EN	(0x04)
+#define EXT_CSD_BOOT_WP_B_PWR_WP_EN	(0x01)
+
 #define EXT_CSD_PART_CONFIG_ACC_MASK	(0x7)
 #define EXT_CSD_PART_CONFIG_ACC_BOOT0	(0x1)
 #define EXT_CSD_PART_CONFIG_ACC_GP0	(0x4)
-- 
cgit v1.2.3


From 4f408cc67a0613f969d1e02fff6de74d31a29fb3 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Fri, 9 Dec 2011 14:55:52 +0900
Subject: mmc: dw_mmc: Add more capabilities field

This patch adds another capabilities field for MMC_CAPS2_XXX.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 6 ++++++
 include/linux/mmc/dw_mmc.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 44bc11e8761e..69e588960e79 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1681,6 +1681,12 @@ static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 	else
 		mmc->caps = 0;
 
+	if (host->pdata->caps2)
+		mmc->caps2 = host->pdata->caps2;
+	else
+		mmc->caps2 = 0;
+
+
 	if (host->pdata->get_bus_wd)
 		if (host->pdata->get_bus_wd(slot->id) >= 4)
 			mmc->caps |= MMC_CAP_4_BIT_DATA;
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 6dc9b80568a0..e8779c6d1759 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -214,6 +214,7 @@ struct dw_mci_board {
 	unsigned int bus_hz; /* Bus speed */
 
 	unsigned int caps;	/* Capabilities */
+	unsigned int caps2;	/* More capabilities */
 	/*
 	 * Override fifo depth. If 0, autodetect it from the FIFOTH register,
 	 * but note that this may not be reliable after a bootloader has used
-- 
cgit v1.2.3


From b67e198073b2d2f16572f5fa77553fec14775f69 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Sun, 25 Dec 2011 20:40:03 -0500
Subject: mmc: add a card hotplug handler context

SD/MMC controllers provide different card insertion and removal detection
methods. On some of them the controller itself issues an interrupt, on
others polling is used, on yet others auxiliary means are used for this
purpose, e.g., a GPIO IRQ. Further, on some systems one of those methods
can be chosen at driver probing time and configured in software. E.g., on
some systems the SD/MMC controller card hot-plug detection pin can be
configured either as a respective controller functions, or an IRQ-capable
GPIO. To support such flexible configurations a card hot-plug context
is added by this patch.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/host.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 9a03d0335745..742f0e102e1e 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -167,6 +167,11 @@ struct mmc_async_req {
 	int (*err_check) (struct mmc_card *, struct mmc_async_req *);
 };
 
+struct mmc_hotplug {
+	unsigned int irq;
+	void *handler_priv;
+};
+
 struct mmc_host {
 	struct device		*parent;
 	struct device		class_dev;
@@ -300,6 +305,7 @@ struct mmc_host {
 
 	struct delayed_work	detect;
 	int			detect_change;	/* card detect flag */
+	struct mmc_hotplug	hotplug;
 
 	const struct mmc_bus_ops *bus_ops;	/* current bus driver */
 	unsigned int		bus_refs;	/* reference counter */
-- 
cgit v1.2.3


From 349ab52446772a359bc7e7699cae3880d48fa5c9 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Sun, 25 Dec 2011 21:36:02 +0100
Subject: mmc: add a generic GPIO card-detect helper

This patch adds a primitive helper to support card hotplug detection on
platforms, where a GPIO, capable of producing interrupts, is used for
detection of card-insertion and -removal events.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/Makefile   |  2 +-
 drivers/mmc/core/cd-gpio.c  | 74 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/cd-gpio.h | 19 ++++++++++++
 3 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mmc/core/cd-gpio.c
 create mode 100644 include/linux/mmc/cd-gpio.h

(limited to 'include/linux')

diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile
index 639501970b41..dca4428380f1 100644
--- a/drivers/mmc/core/Makefile
+++ b/drivers/mmc/core/Makefile
@@ -7,6 +7,6 @@ mmc_core-y			:= core.o bus.o host.o \
 				   mmc.o mmc_ops.o sd.o sd_ops.o \
 				   sdio.o sdio_ops.o sdio_bus.o \
 				   sdio_cis.o sdio_io.o sdio_irq.o \
-				   quirks.o
+				   quirks.o cd-gpio.o
 
 mmc_core-$(CONFIG_DEBUG_FS)	+= debugfs.o
diff --git a/drivers/mmc/core/cd-gpio.c b/drivers/mmc/core/cd-gpio.c
new file mode 100644
index 000000000000..082202ae4a03
--- /dev/null
+++ b/drivers/mmc/core/cd-gpio.c
@@ -0,0 +1,74 @@
+/*
+ * Generic GPIO card-detect helper
+ *
+ * Copyright (C) 2011, Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/mmc/host.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+struct mmc_cd_gpio {
+	unsigned int gpio;
+	char label[0];
+};
+
+static irqreturn_t mmc_cd_gpio_irqt(int irq, void *dev_id)
+{
+	/* Schedule a card detection after a debounce timeout */
+	mmc_detect_change(dev_id, msecs_to_jiffies(100));
+	return IRQ_HANDLED;
+}
+
+int mmc_cd_gpio_request(struct mmc_host *host, unsigned int gpio,
+			unsigned int irq, unsigned long flags)
+{
+	size_t len = strlen(dev_name(host->parent)) + 4;
+	struct mmc_cd_gpio *cd = kmalloc(sizeof(*cd) + len, GFP_KERNEL);
+	int ret;
+
+	if (!cd)
+		return -ENOMEM;
+
+	snprintf(cd->label, len, "%s cd", dev_name(host->parent));
+
+	ret = gpio_request_one(gpio, GPIOF_DIR_IN, cd->label);
+	if (ret < 0)
+		goto egpioreq;
+
+	ret = request_threaded_irq(irq, NULL, mmc_cd_gpio_irqt,
+				   flags, cd->label, host);
+	if (ret < 0)
+		goto eirqreq;
+
+	cd->gpio = gpio;
+	host->hotplug.irq = irq;
+	host->hotplug.handler_priv = cd;
+
+	return 0;
+
+eirqreq:
+	gpio_free(gpio);
+egpioreq:
+	kfree(cd);
+	return ret;
+}
+EXPORT_SYMBOL(mmc_cd_gpio_request);
+
+void mmc_cd_gpio_free(struct mmc_host *host)
+{
+	struct mmc_cd_gpio *cd = host->hotplug.handler_priv;
+
+	free_irq(host->hotplug.irq, host);
+	gpio_free(cd->gpio);
+	kfree(cd);
+}
+EXPORT_SYMBOL(mmc_cd_gpio_free);
diff --git a/include/linux/mmc/cd-gpio.h b/include/linux/mmc/cd-gpio.h
new file mode 100644
index 000000000000..a8e469783318
--- /dev/null
+++ b/include/linux/mmc/cd-gpio.h
@@ -0,0 +1,19 @@
+/*
+ * Generic GPIO card-detect helper header
+ *
+ * Copyright (C) 2011, Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MMC_CD_GPIO_H
+#define MMC_CD_GPIO_H
+
+struct mmc_host;
+int mmc_cd_gpio_request(struct mmc_host *host, unsigned int gpio,
+			unsigned int irq, unsigned long flags);
+void mmc_cd_gpio_free(struct mmc_host *host);
+
+#endif
-- 
cgit v1.2.3


From 52c506f0bc72530fb786838e7ffd4f158a2e5c3a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Dec 2011 15:48:43 +0200
Subject: mmc: sdhci-pci: add platform data

Add a means of getting platform data for the SDHCI PCI
devices.  The data is stored against the slot not the
device in order to support multi-slot devices.

The data allows platform-specific setup (such as getting
GPIO numbers from firmware or setting up wl12xx for SDIO)
to be done in platform support files instead of the
sdhci-pci driver.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/Makefile                   |  2 +-
 drivers/mmc/Makefile               |  3 +--
 drivers/mmc/host/Makefile          |  1 +
 drivers/mmc/host/sdhci-pci-data.c  |  5 +++++
 drivers/mmc/host/sdhci-pci.c       | 32 ++++++++++++++++++++++++++++----
 include/linux/mmc/sdhci-pci-data.h | 18 ++++++++++++++++++
 6 files changed, 54 insertions(+), 7 deletions(-)
 create mode 100644 drivers/mmc/host/sdhci-pci-data.c
 create mode 100644 include/linux/mmc/sdhci-pci-data.h

(limited to 'include/linux')

diff --git a/drivers/Makefile b/drivers/Makefile
index 1b3142127bf5..c07be024b962 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -97,7 +97,7 @@ obj-$(CONFIG_EISA)		+= eisa/
 obj-y				+= lguest/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
-obj-$(CONFIG_MMC)		+= mmc/
+obj-y				+= mmc/
 obj-$(CONFIG_MEMSTICK)		+= memstick/
 obj-y				+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index 12eef393e216..400756ec7c49 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile
@@ -6,5 +6,4 @@ subdir-ccflags-$(CONFIG_MMC_DEBUG) := -DDEBUG
 
 obj-$(CONFIG_MMC)		+= core/
 obj-$(CONFIG_MMC)		+= card/
-obj-$(CONFIG_MMC)		+= host/
-
+obj-$(subst m,y,$(CONFIG_MMC))	+= host/
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index b4b83f302e32..745f8fce2519 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_MMC_MXC)		+= mxcmmc.o
 obj-$(CONFIG_MMC_MXS)		+= mxs-mmc.o
 obj-$(CONFIG_MMC_SDHCI)		+= sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)	+= sdhci-pci.o
+obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI))	+= sdhci-pci-data.o
 obj-$(CONFIG_MMC_SDHCI_PXAV3)	+= sdhci-pxav3.o
 obj-$(CONFIG_MMC_SDHCI_PXAV2)	+= sdhci-pxav2.o
 obj-$(CONFIG_MMC_SDHCI_S3C)	+= sdhci-s3c.o
diff --git a/drivers/mmc/host/sdhci-pci-data.c b/drivers/mmc/host/sdhci-pci-data.c
new file mode 100644
index 000000000000..a611217769f5
--- /dev/null
+++ b/drivers/mmc/host/sdhci-pci-data.c
@@ -0,0 +1,5 @@
+#include <linux/module.h>
+#include <linux/mmc/sdhci-pci-data.h>
+
+struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, int slotno);
+EXPORT_SYMBOL_GPL(sdhci_pci_get_data);
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index d2e77fb21b26..4e8f324e2c84 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -25,6 +25,7 @@
 #include <linux/gpio.h>
 #include <linux/sfi.h>
 #include <linux/pm_runtime.h>
+#include <linux/mmc/sdhci-pci-data.h>
 
 #include "sdhci.h"
 
@@ -61,6 +62,7 @@ struct sdhci_pci_fixes {
 struct sdhci_pci_slot {
 	struct sdhci_pci_chip	*chip;
 	struct sdhci_host	*host;
+	struct sdhci_pci_data	*data;
 
 	int			pci_bar;
 	int			rst_n_gpio;
@@ -1188,11 +1190,12 @@ static const struct dev_pm_ops sdhci_pci_pm_ops = {
 \*****************************************************************************/
 
 static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
-	struct pci_dev *pdev, struct sdhci_pci_chip *chip, int bar)
+	struct pci_dev *pdev, struct sdhci_pci_chip *chip, int first_bar,
+	int slotno)
 {
 	struct sdhci_pci_slot *slot;
 	struct sdhci_host *host;
-	int ret;
+	int ret, bar = first_bar + slotno;
 
 	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
 		dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar);
@@ -1227,6 +1230,20 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
 	slot->pci_bar = bar;
 	slot->rst_n_gpio = -EINVAL;
 
+	/* Retrieve platform data if there is any */
+	if (*sdhci_pci_get_data)
+		slot->data = sdhci_pci_get_data(pdev, slotno);
+
+	if (slot->data) {
+		if (slot->data->setup) {
+			ret = slot->data->setup(slot->data);
+			if (ret) {
+				dev_err(&pdev->dev, "platform setup failed\n");
+				goto free;
+			}
+		}
+	}
+
 	host->hw_name = "PCI";
 	host->ops = &sdhci_pci_ops;
 	host->quirks = chip->quirks;
@@ -1236,7 +1253,7 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
 	ret = pci_request_region(pdev, bar, mmc_hostname(host->mmc));
 	if (ret) {
 		dev_err(&pdev->dev, "cannot request region\n");
-		goto free;
+		goto cleanup;
 	}
 
 	host->ioaddr = pci_ioremap_bar(pdev, bar);
@@ -1270,6 +1287,10 @@ unmap:
 release:
 	pci_release_region(pdev, bar);
 
+cleanup:
+	if (slot->data && slot->data->cleanup)
+		slot->data->cleanup(slot->data);
+
 free:
 	sdhci_free_host(host);
 
@@ -1291,6 +1312,9 @@ static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot)
 	if (slot->chip->fixes && slot->chip->fixes->remove_slot)
 		slot->chip->fixes->remove_slot(slot, dead);
 
+	if (slot->data && slot->data->cleanup)
+		slot->data->cleanup(slot->data);
+
 	pci_release_region(slot->chip->pdev, slot->pci_bar);
 
 	sdhci_free_host(slot->host);
@@ -1377,7 +1401,7 @@ static int __devinit sdhci_pci_probe(struct pci_dev *pdev,
 	slots = chip->num_slots;	/* Quirk may have changed this */
 
 	for (i = 0; i < slots; i++) {
-		slot = sdhci_pci_probe_slot(pdev, chip, first_bar + i);
+		slot = sdhci_pci_probe_slot(pdev, chip, first_bar, i);
 		if (IS_ERR(slot)) {
 			for (i--; i >= 0; i--)
 				sdhci_pci_remove_slot(chip->slots[i]);
diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h
new file mode 100644
index 000000000000..8959604a13d3
--- /dev/null
+++ b/include/linux/mmc/sdhci-pci-data.h
@@ -0,0 +1,18 @@
+#ifndef LINUX_MMC_SDHCI_PCI_DATA_H
+#define LINUX_MMC_SDHCI_PCI_DATA_H
+
+struct pci_dev;
+
+struct sdhci_pci_data {
+	struct pci_dev	*pdev;
+	int		slotno;
+	int		rst_n_gpio; /* Set to -EINVAL if unused */
+	int		cd_gpio;    /* Set to -EINVAL if unused */
+	int		(*setup)(struct sdhci_pci_data *data);
+	void		(*cleanup)(struct sdhci_pci_data *data);
+};
+
+extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev,
+				int slotno);
+
+#endif
-- 
cgit v1.2.3


From e2a0a5829c4069ee4a0f28c7301187ffaba91a46 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Dec 2011 15:48:45 +0200
Subject: mmc: sdhci-pci: remove SDHCI_QUIRK2_OWN_CARD_DETECTION

Even if a driver provides separate card detection, an interrupt
is still needed to abort mmc requests that are in progress.
SDHCI_QUIRK2_OWN_CARD_DETECTION prevents that, so remove it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-pci.c | 1 -
 drivers/mmc/host/sdhci.c     | 1 -
 include/linux/mmc/sdhci.h    | 2 --
 3 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 646680a5993a..83a152e9b976 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -212,7 +212,6 @@ static void sdhci_pci_add_own_cd(struct sdhci_pci_slot *slot)
 
 	slot->cd_gpio = gpio;
 	slot->cd_irq = irq;
-	slot->host->quirks2 |= SDHCI_QUIRK2_OWN_CARD_DETECTION;
 
 	return;
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index e6c6cd6e95f2..6f1fd02fe01b 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -147,7 +147,6 @@ static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
 	u32 present, irqs;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) ||
-	    (host->quirks2 & SDHCI_QUIRK2_OWN_CARD_DETECTION) ||
 	    !mmc_card_is_removable(host->mmc))
 		return;
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index e4b69353678d..dad7a469f09c 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -90,8 +90,6 @@ struct sdhci_host {
 
 	unsigned int quirks2;	/* More deviations from spec. */
 
-#define SDHCI_QUIRK2_OWN_CARD_DETECTION			(1<<0)
-
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
 
-- 
cgit v1.2.3


From aa9df4fb2adcc73d36fa41e23059519be770aaa5 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@stericsson.com>
Date: Mon, 19 Dec 2011 16:24:19 +0100
Subject: mmc: core: Add option to prevent eMMC sleep command

Host may now use MMC_CAP2_NO_SLEEP_CMD to disable the use
of eMMC sleep/awake command.

This option can be used when your platform has a buggy
kernel crash dump software, which is supposed to store
the dump on the eMMC, but is not able to wake up the eMMC
from sleep state.

In particular, failures have been seen with u-boot; even if
it is fixed there, platforms will be slow to update their
bootloader binaries.

Signed-off-by: Ulf Hansson <ulf.hansson@stericsson.com>
Reviewed-by: Hanumath Prasad <hanumath.prasad@stericsson.com>
Reviewed-by: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
Acked-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 6 ++++++
 include/linux/mmc/host.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 22050525be84..be7569f3fb56 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2246,6 +2246,9 @@ int mmc_card_awake(struct mmc_host *host)
 {
 	int err = -ENOSYS;
 
+	if (host->caps2 & MMC_CAP2_NO_SLEEP_CMD)
+		return 0;
+
 	mmc_bus_get(host);
 
 	if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
@@ -2261,6 +2264,9 @@ int mmc_card_sleep(struct mmc_host *host)
 {
 	int err = -ENOSYS;
 
+	if (host->caps2 & MMC_CAP2_NO_SLEEP_CMD)
+		return 0;
+
 	mmc_bus_get(host);
 
 	if (host->bus_ops && !host->bus_dead && host->bus_ops->sleep)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 742f0e102e1e..031d865167a2 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -247,6 +247,7 @@ struct mmc_host {
 #define MMC_CAP2_CACHE_CTRL	(1 << 1)	/* Allow cache control */
 #define MMC_CAP2_POWEROFF_NOTIFY (1 << 2)	/* Notify poweroff supported */
 #define MMC_CAP2_NO_MULTI_READ	(1 << 3)	/* Multiblock reads don't work */
+#define MMC_CAP2_NO_SLEEP_CMD	(1 << 4)	/* Don't allow sleep command */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 	unsigned int        power_notify_type;
-- 
cgit v1.2.3


From 7b21e34fd1c272e3a8c3846168f2f6287a4cd72b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Jan 2012 15:44:42 +1030
Subject: virtio: harsher barriers for rpmsg.

We were cheating with our barriers; using the smp ones rather than the
real device ones.  That was fine, until rpmsg came along, which is
used to talk to a real device (a non-SMP CPU).

Unfortunately, just putting back the real barriers (reverting
d57ed95d) causes a performance regression on virtio-pci.  In
particular, Amos reports netbench's TCP_RR over virtio_net CPU
utilization increased up to 35% while throughput went down by up to
14%.

By comparison, this branch is in the noise.

Reference: https://lkml.org/lkml/2011/12/11/22

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c |  8 +++++---
 drivers/s390/kvm/kvm_virtio.c  |  2 +-
 drivers/virtio/virtio_mmio.c   |  4 ++--
 drivers/virtio/virtio_pci.c    |  4 ++--
 drivers/virtio/virtio_ring.c   | 34 +++++++++++++++++++++-------------
 include/linux/virtio_ring.h    |  1 +
 tools/virtio/linux/virtio.h    |  1 +
 tools/virtio/virtio_test.c     |  3 ++-
 8 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 595d73197016..6a1d6447b864 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -292,10 +292,12 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 
 	/*
 	 * OK, tell virtio_ring.c to set up a virtqueue now we know its size
-	 * and we've got a pointer to its pages.
+	 * and we've got a pointer to its pages.  Note that we set weak_barriers
+	 * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
+	 * barriers.
 	 */
-	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
-				 vdev, lvq->pages, lg_notify, callback, name);
+	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev,
+				 true, lvq->pages, lg_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 8af868bab20b..7bc1955337ea 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -198,7 +198,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 		goto out;
 
 	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
-				 vdev, (void *) config->address,
+				 vdev, true, (void *) config->address,
 				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 0269717436af..01d6dc250d5c 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -310,8 +310,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 			vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
 
 	/* Create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN,
-				 vdev, info->queue, vm_notify, callback, name);
+	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
+				 true, info->queue, vm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto error_new_virtqueue;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index baabb7937ec2..688b42d28dad 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -414,8 +414,8 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback, name);
+	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
+				 true, info->queue, vp_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index c7a2c208f6ea..50da92046092 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -28,17 +28,20 @@
 #ifdef CONFIG_SMP
 /* Where possible, use SMP barriers which are more lightweight than mandatory
  * barriers, because mandatory barriers control MMIO effects on accesses
- * through relaxed memory I/O windows (which virtio does not use). */
-#define virtio_mb() smp_mb()
-#define virtio_rmb() smp_rmb()
-#define virtio_wmb() smp_wmb()
+ * through relaxed memory I/O windows (which virtio-pci does not use). */
+#define virtio_mb(vq) \
+	do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0)
+#define virtio_rmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
+#define virtio_wmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
 #else
 /* We must force memory ordering even if guest is UP since host could be
  * running on another CPU, but SMP barriers are defined to barrier() in that
  * configuration. So fall back to mandatory barriers instead. */
-#define virtio_mb() mb()
-#define virtio_rmb() rmb()
-#define virtio_wmb() wmb()
+#define virtio_mb(vq) mb()
+#define virtio_rmb(vq) rmb()
+#define virtio_wmb(vq) wmb()
 #endif
 
 #ifdef DEBUG
@@ -77,6 +80,9 @@ struct vring_virtqueue
 	/* Actual memory layout for this queue */
 	struct vring vring;
 
+	/* Can we use weak barriers? */
+	bool weak_barriers;
+
 	/* Other side has made a mess, don't try any more. */
 	bool broken;
 
@@ -245,14 +251,14 @@ void virtqueue_kick(struct virtqueue *_vq)
 	START_USE(vq);
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
-	virtio_wmb();
+	virtio_wmb(vq);
 
 	old = vq->vring.avail->idx;
 	new = vq->vring.avail->idx = old + vq->num_added;
 	vq->num_added = 0;
 
 	/* Need to update avail index before checking if we should notify */
-	virtio_mb();
+	virtio_mb(vq);
 
 	if (vq->event ?
 	    vring_need_event(vring_avail_event(&vq->vring), new, old) :
@@ -314,7 +320,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 	}
 
 	/* Only get used array entries after they have been exposed by host. */
-	virtio_rmb();
+	virtio_rmb(vq);
 
 	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
 	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
@@ -337,7 +343,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 	 * the read in the next get_buf call. */
 	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
 		vring_used_event(&vq->vring) = vq->last_used_idx;
-		virtio_mb();
+		virtio_mb(vq);
 	}
 
 	END_USE(vq);
@@ -366,7 +372,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
 	 * entry. Always do both to keep code simple. */
 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
 	vring_used_event(&vq->vring) = vq->last_used_idx;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely(more_used(vq))) {
 		END_USE(vq);
 		return false;
@@ -393,7 +399,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 	/* TODO: tune this threshold */
 	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
 	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
 		END_USE(vq);
 		return false;
@@ -453,6 +459,7 @@ EXPORT_SYMBOL_GPL(vring_interrupt);
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
 				      void (*callback)(struct virtqueue *),
@@ -476,6 +483,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
 	vq->notify = notify;
+	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 36be0f6e18a9..e338730c2660 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -168,6 +168,7 @@ struct virtqueue;
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 669bcdd45805..953db2abf6b9 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -214,6 +214,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 74d3331bdaf9..0740284396c1 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -92,7 +92,8 @@ static void vq_info_add(struct vdev_info *dev, int num)
 	assert(r >= 0);
 	memset(info->ring, 0, vring_size(num, 4096));
 	vring_init(&info->vring, num, info->ring, 4096);
-	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring,
+	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev,
+				       true, info->ring,
 				       vq_notify, vq_callback, "test");
 	assert(info->vq);
 	info->vq->priv = info;
-- 
cgit v1.2.3


From 5dfc17628d57f9e62043ed0cba03a6e3eb019a78 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Jan 2012 15:44:42 +1030
Subject: virtio: document functions better.

The old documentation is left over from when we used a structure with
strategy pointers.

And move the documentation to the C file as per kernel practice.
Though I disagree...

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/virtio/virtio_ring.c | 92 +++++++++++++++++++++++++++++++++++++++++++-
 include/linux/virtio.h       | 47 ----------------------
 2 files changed, 91 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 50da92046092..fe50486341a4 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -166,6 +166,23 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
 	return head;
 }
 
+/**
+ * virtqueue_add_buf_gfp - expose buffer to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: the description of the buffer(s).
+ * @out_num: the number of sg readable by other side
+ * @in_num: the number of sg which are writable (after readable ones)
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns remaining capacity of queue or a negative error
+ * (ie. ENOSPC).  Note that it only really makes sense to treat all
+ * positive return values as "available": indirect buffers mean that
+ * we can put an entire sg[] array inside a single queue entry.
+ */
 int virtqueue_add_buf_gfp(struct virtqueue *_vq,
 			  struct scatterlist sg[],
 			  unsigned int out,
@@ -244,6 +261,16 @@ add_head:
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
 
+/**
+ * virtqueue_kick - update after add_buf
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_buf_gfp calls, invoke this to kick
+ * the other side.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 void virtqueue_kick(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -300,6 +327,22 @@ static inline bool more_used(const struct vring_virtqueue *vq)
 	return vq->last_used_idx != vq->vring.used->idx;
 }
 
+/**
+ * virtqueue_get_buf - get the next used buffer
+ * @vq: the struct virtqueue we're talking about.
+ * @len: the length written into the buffer
+ *
+ * If the driver wrote data into the buffer, @len will be set to the
+ * amount written.  This means you don't need to clear the buffer
+ * beforehand to ensure there's no data leakage in the case of short
+ * writes.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns NULL if there are no used buffers, or the "data" token
+ * handed to virtqueue_add_buf_gfp().
+ */
 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -351,6 +394,15 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 
+/**
+ * virtqueue_disable_cb - disable callbacks
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Note that this is not necessarily synchronous, hence unreliable and only
+ * useful as an optimization.
+ *
+ * Unlike other operations, this need not be serialized.
+ */
 void virtqueue_disable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -359,6 +411,17 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
+/**
+ * virtqueue_enable_cb - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks; it returns "false" if there are pending
+ * buffers in the queue, to detect a possible race between the driver
+ * checking for more work, and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 bool virtqueue_enable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -383,6 +446,19 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 
+/**
+ * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks but hints to the other side to delay
+ * interrupts until most of the available buffers have been processed;
+ * it returns "false" if there are many pending buffers in the queue,
+ * to detect a possible race between the driver checking for more work,
+ * and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -410,6 +486,14 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
+/**
+ * virtqueue_detach_unused_buf - detach first unused buffer
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Returns NULL or the "data" token handed to virtqueue_add_buf_gfp().
+ * This is not valid on an active queue; it is useful only for device
+ * shutdown.
+ */
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -538,7 +622,13 @@ void vring_transport_features(struct virtio_device *vdev)
 }
 EXPORT_SYMBOL_GPL(vring_transport_features);
 
-/* return the size of the vring within the virtqueue */
+/**
+ * virtqueue_get_vring_size - return the size of the virtqueue's vring
+ * @vq: the struct virtqueue containing the vring of interest.
+ *
+ * Returns the size of the vring.  This is mainly used for boasting to
+ * userspace.  Unlike other operations, this need not be serialized.
+ */
 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
 {
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4c069d8bd740..73ad7243128f 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -25,53 +25,6 @@ struct virtqueue {
 	void *priv;
 };
 
-/**
- * operations for virtqueue
- * virtqueue_add_buf: expose buffer to other end
- *	vq: the struct virtqueue we're talking about.
- *	sg: the description of the buffer(s).
- *	out_num: the number of sg readable by other side
- *	in_num: the number of sg which are writable (after readable ones)
- *	data: the token identifying the buffer.
- *	gfp: how to do memory allocations (if necessary).
- *      Returns remaining capacity of queue (sg segments) or a negative error.
- * virtqueue_kick: update after add_buf
- *	vq: the struct virtqueue
- *	After one or more add_buf calls, invoke this to kick the other side.
- * virtqueue_get_buf: get the next used buffer
- *	vq: the struct virtqueue we're talking about.
- *	len: the length written into the buffer
- *	Returns NULL or the "data" token handed to add_buf.
- * virtqueue_disable_cb: disable callbacks
- *	vq: the struct virtqueue we're talking about.
- *	Note that this is not necessarily synchronous, hence unreliable and only
- *	useful as an optimization.
- * virtqueue_enable_cb: restart callbacks after disable_cb.
- *	vq: the struct virtqueue we're talking about.
- *	This re-enables callbacks; it returns "false" if there are pending
- *	buffers in the queue, to detect a possible race between the driver
- *	checking for more work, and enabling callbacks.
- * virtqueue_enable_cb_delayed: restart callbacks after disable_cb.
- *	vq: the struct virtqueue we're talking about.
- *	This re-enables callbacks but hints to the other side to delay
- *	interrupts until most of the available buffers have been processed;
- *	it returns "false" if there are many pending buffers in the queue,
- *	to detect a possible race between the driver checking for more work,
- *	and enabling callbacks.
- * virtqueue_detach_unused_buf: detach first unused buffer
- * 	vq: the struct virtqueue we're talking about.
- * 	Returns NULL or the "data" token handed to add_buf
- * virtqueue_get_vring_size: return the size of the virtqueue's vring
- *	vq: the struct virtqueue containing the vring of interest.
- *	Returns the size of the vring.
- *
- * Locking rules are straightforward: the driver is responsible for
- * locking.  No two operations may be invoked simultaneously, with the exception
- * of virtqueue_disable_cb.
- *
- * All operations can be called in any context.
- */
-
 int virtqueue_add_buf_gfp(struct virtqueue *vq,
 			  struct scatterlist sg[],
 			  unsigned int out_num,
-- 
cgit v1.2.3


From f96fde41f7f9af6cf20f6a1919f5d9670f84d574 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Jan 2012 15:44:42 +1030
Subject: virtio: rename virtqueue_add_buf_gfp to virtqueue_add_buf

Remove wrapper functions. This makes the allocation type explicit in
all callers; I used GPF_KERNEL where it seemed obvious, left it at
GFP_ATOMIC otherwise.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/virtio_blk.c          |  2 +-
 drivers/char/hw_random/virtio-rng.c |  2 +-
 drivers/char/virtio_console.c       |  6 +++---
 drivers/net/virtio_net.c            | 12 ++++++------
 drivers/virtio/virtio_balloon.c     |  7 ++++---
 drivers/virtio/virtio_ring.c        | 22 +++++++++++-----------
 include/linux/virtio.h              | 21 ++++++---------------
 net/9p/trans_virtio.c               |  6 ++++--
 tools/virtio/linux/virtio.h         | 21 ++++++---------------
 tools/virtio/virtio_test.c          |  3 ++-
 10 files changed, 44 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4d0b70adf5f7..a345e40e1bca 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -172,7 +172,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		}
 	}
 
-	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
+	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
 		mempool_free(vbr, vblk->pool);
 		return false;
 	}
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index fd699ccecf5b..723725bbb96b 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -47,7 +47,7 @@ static void register_buffer(u8 *buf, size_t size)
 	sg_init_one(&sg, buf, size);
 
 	/* There should always be room for one buffer. */
-	if (virtqueue_add_buf(vq, &sg, 0, 1, buf) < 0)
+	if (virtqueue_add_buf(vq, &sg, 0, 1, buf, GFP_KERNEL) < 0)
 		BUG();
 
 	virtqueue_kick(vq);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 8e3c46d67cb3..d1ae1492ee78 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -392,7 +392,7 @@ static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf)
 
 	sg_init_one(sg, buf->buf, buf->size);
 
-	ret = virtqueue_add_buf(vq, sg, 0, 1, buf);
+	ret = virtqueue_add_buf(vq, sg, 0, 1, buf, GFP_ATOMIC);
 	virtqueue_kick(vq);
 	return ret;
 }
@@ -457,7 +457,7 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id,
 	vq = portdev->c_ovq;
 
 	sg_init_one(sg, &cpkt, sizeof(cpkt));
-	if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt) >= 0) {
+	if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) >= 0) {
 		virtqueue_kick(vq);
 		while (!virtqueue_get_buf(vq, &len))
 			cpu_relax();
@@ -506,7 +506,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
 	reclaim_consumed_buffers(port);
 
 	sg_init_one(sg, in_buf, in_count);
-	ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf);
+	ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC);
 
 	/* Tell Host to go! */
 	virtqueue_kick(out_vq);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 76fe14efb2b5..6345a52194f9 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -370,7 +370,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
 
 	skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len);
 
-	err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
+	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
 	if (err < 0)
 		dev_kfree_skb(skb);
 
@@ -415,8 +415,8 @@ static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp)
 
 	/* chain first in list head */
 	first->private = (unsigned long)list;
-	err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
-				    first, gfp);
+	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
+				first, gfp);
 	if (err < 0)
 		give_pages(vi, first);
 
@@ -434,7 +434,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
 
 	sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE);
 
-	err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
+	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
 	if (err < 0)
 		give_pages(vi, page);
 
@@ -609,7 +609,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 
 	hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1;
 	return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg,
-					0, skb);
+				 0, skb, GFP_ATOMIC);
 }
 
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -767,7 +767,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
 	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
 
-	BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi) < 0);
+	BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0);
 
 	virtqueue_kick(vi->cvq);
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index f64ff185b8b5..0a6425aadf95 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -88,7 +88,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 	init_completion(&vb->acked);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0)
+	if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
 		BUG();
 	virtqueue_kick(vq);
 
@@ -220,7 +220,7 @@ static void stats_handle_request(struct virtio_balloon *vb)
 
 	vq = vb->stats_vq;
 	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
-	if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0)
+	if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
 		BUG();
 	virtqueue_kick(vq);
 }
@@ -313,7 +313,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
 		 * use it to signal us later.
 		 */
 		sg_init_one(&sg, vb->stats, sizeof vb->stats);
-		if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0)
+		if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb, GFP_KERNEL)
+		    < 0)
 			BUG();
 		virtqueue_kick(vb->stats_vq);
 	}
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index fe50486341a4..6ea92a6d1134 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -167,7 +167,7 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
 }
 
 /**
- * virtqueue_add_buf_gfp - expose buffer to other end
+ * virtqueue_add_buf - expose buffer to other end
  * @vq: the struct virtqueue we're talking about.
  * @sg: the description of the buffer(s).
  * @out_num: the number of sg readable by other side
@@ -183,12 +183,12 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
  * positive return values as "available": indirect buffers mean that
  * we can put an entire sg[] array inside a single queue entry.
  */
-int virtqueue_add_buf_gfp(struct virtqueue *_vq,
-			  struct scatterlist sg[],
-			  unsigned int out,
-			  unsigned int in,
-			  void *data,
-			  gfp_t gfp)
+int virtqueue_add_buf(struct virtqueue *_vq,
+		      struct scatterlist sg[],
+		      unsigned int out,
+		      unsigned int in,
+		      void *data,
+		      gfp_t gfp)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i, avail, uninitialized_var(prev);
@@ -259,13 +259,13 @@ add_head:
 
 	return vq->num_free;
 }
-EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
+EXPORT_SYMBOL_GPL(virtqueue_add_buf);
 
 /**
  * virtqueue_kick - update after add_buf
  * @vq: the struct virtqueue
  *
- * After one or more virtqueue_add_buf_gfp calls, invoke this to kick
+ * After one or more virtqueue_add_buf calls, invoke this to kick
  * the other side.
  *
  * Caller must ensure we don't call this with other virtqueue
@@ -341,7 +341,7 @@ static inline bool more_used(const struct vring_virtqueue *vq)
  * operations at the same time (except where noted).
  *
  * Returns NULL if there are no used buffers, or the "data" token
- * handed to virtqueue_add_buf_gfp().
+ * handed to virtqueue_add_buf().
  */
 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 {
@@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
  * virtqueue_detach_unused_buf - detach first unused buffer
  * @vq: the struct virtqueue we're talking about.
  *
- * Returns NULL or the "data" token handed to virtqueue_add_buf_gfp().
+ * Returns NULL or the "data" token handed to virtqueue_add_buf().
  * This is not valid on an active queue; it is useful only for device
  * shutdown.
  */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 73ad7243128f..ec1706e7df50 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -25,21 +25,12 @@ struct virtqueue {
 	void *priv;
 };
 
-int virtqueue_add_buf_gfp(struct virtqueue *vq,
-			  struct scatterlist sg[],
-			  unsigned int out_num,
-			  unsigned int in_num,
-			  void *data,
-			  gfp_t gfp);
-
-static inline int virtqueue_add_buf(struct virtqueue *vq,
-				    struct scatterlist sg[],
-				    unsigned int out_num,
-				    unsigned int in_num,
-				    void *data)
-{
-	return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
-}
+int virtqueue_add_buf(struct virtqueue *vq,
+		      struct scatterlist sg[],
+		      unsigned int out_num,
+		      unsigned int in_num,
+		      void *data,
+		      gfp_t gfp);
 
 void virtqueue_kick(struct virtqueue *vq);
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 330421e54713..3d432068f627 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -272,7 +272,8 @@ req_retry:
 	in = pack_sg_list(chan->sg, out,
 			  VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
 
-	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+				GFP_ATOMIC);
 	if (err < 0) {
 		if (err == -ENOSPC) {
 			chan->ring_bufs_avail = 0;
@@ -414,7 +415,8 @@ req_retry_pinned:
 		in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
 				     in_pages, in_nr_pages, uidata, inlen);
 
-	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+				GFP_ATOMIC);
 	if (err < 0) {
 		if (err == -ENOSPC) {
 			chan->ring_bufs_avail = 0;
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 953db2abf6b9..b4fbc91c41b4 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -186,21 +186,12 @@ struct virtqueue {
 #endif
 
 /* Interfaces exported by virtio_ring. */
-int virtqueue_add_buf_gfp(struct virtqueue *vq,
-			  struct scatterlist sg[],
-			  unsigned int out_num,
-			  unsigned int in_num,
-			  void *data,
-			  gfp_t gfp);
-
-static inline int virtqueue_add_buf(struct virtqueue *vq,
-				    struct scatterlist sg[],
-				    unsigned int out_num,
-				    unsigned int in_num,
-				    void *data)
-{
-	return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
-}
+int virtqueue_add_buf(struct virtqueue *vq,
+		      struct scatterlist sg[],
+		      unsigned int out_num,
+		      unsigned int in_num,
+		      void *data,
+		      gfp_t gfp);
 
 void virtqueue_kick(struct virtqueue *vq);
 
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 0740284396c1..6bf95f995364 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -161,7 +161,8 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
 			if (started < bufs) {
 				sg_init_one(&sl, dev->buf, dev->buf_size);
 				r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
-						      dev->buf + started);
+						      dev->buf + started,
+						      GFP_ATOMIC);
 				if (likely(r >= 0)) {
 					++started;
 					virtqueue_kick(vq->vq);
-- 
cgit v1.2.3


From 41f0377f73039ca6fe97a469d1941a89cd9757f1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Jan 2012 15:44:43 +1030
Subject: virtio: support unlocked queue kick

Based on patch by Christoph for virtio_blk speedup:

	Split virtqueue_kick to be able to do the actual notification
	outside the lock protecting the virtqueue.  This patch was
	originally done by Stefan Hajnoczi, but I can't find the
	original one anymore and had to recreated it from memory.
	Pointers to the original or corrections for the commit message
	are welcome.

Stefan's patch was here:

	https://github.com/stefanha/linux/commit/a6d06644e3a58e57a774e77d7dc34c4a5a2e7496
	http://www.spinics.net/lists/linux-virtualization/msg14616.html

Third time's the charm!

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 60 +++++++++++++++++++++++++++++++++++---------
 include/linux/virtio.h       |  4 +++
 2 files changed, 52 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 6ea92a6d1134..c56bbe799241 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -262,19 +262,22 @@ add_head:
 EXPORT_SYMBOL_GPL(virtqueue_add_buf);
 
 /**
- * virtqueue_kick - update after add_buf
+ * virtqueue_kick_prepare - first half of split virtqueue_kick call.
  * @vq: the struct virtqueue
  *
- * After one or more virtqueue_add_buf calls, invoke this to kick
- * the other side.
+ * Instead of virtqueue_kick(), you can do:
+ *	if (virtqueue_kick_prepare(vq))
+ *		virtqueue_notify(vq);
  *
- * Caller must ensure we don't call this with other virtqueue
- * operations at the same time (except where noted).
+ * This is sometimes useful because the virtqueue_kick_prepare() needs
+ * to be serialized, but the actual virtqueue_notify() call does not.
  */
-void virtqueue_kick(struct virtqueue *_vq)
+bool virtqueue_kick_prepare(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	u16 new, old;
+	bool needs_kick;
+
 	START_USE(vq);
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
@@ -287,13 +290,46 @@ void virtqueue_kick(struct virtqueue *_vq)
 	/* Need to update avail index before checking if we should notify */
 	virtio_mb(vq);
 
-	if (vq->event ?
-	    vring_need_event(vring_avail_event(&vq->vring), new, old) :
-	    !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
-		/* Prod other side to tell it about changes. */
-		vq->notify(&vq->vq);
-
+	if (vq->event) {
+		needs_kick = vring_need_event(vring_avail_event(&vq->vring),
+					      new, old);
+	} else {
+		needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
+	}
 	END_USE(vq);
+	return needs_kick;
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
+
+/**
+ * virtqueue_notify - second half of split virtqueue_kick call.
+ * @vq: the struct virtqueue
+ *
+ * This does not need to be serialized.
+ */
+void virtqueue_notify(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/* Prod other side to tell it about changes. */
+	vq->notify(_vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_notify);
+
+/**
+ * virtqueue_kick - update after add_buf
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_buf calls, invoke this to kick
+ * the other side.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
+void virtqueue_kick(struct virtqueue *vq)
+{
+	if (virtqueue_kick_prepare(vq))
+		virtqueue_notify(vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index ec1706e7df50..31fe3a62874b 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -34,6 +34,10 @@ int virtqueue_add_buf(struct virtqueue *vq,
 
 void virtqueue_kick(struct virtqueue *vq);
 
+bool virtqueue_kick_prepare(struct virtqueue *vq);
+
+void virtqueue_notify(struct virtqueue *vq);
+
 void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
 void virtqueue_disable_cb(struct virtqueue *vq);
-- 
cgit v1.2.3


From f0fe6f11503fa9880867554350ac5d3092c47251 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Thu, 22 Dec 2011 16:58:26 +0530
Subject: virtio: pci: add PM notification handlers for restore, freeze, thaw,
 poweroff

Handle thaw, restore and freeze notifications from the PM core.  Expose
these to individual virtio drivers that can quiesce and resume vq
operations.  For drivers not implementing the thaw() method, use the
restore method instead.

These functions also save device-specific data so that the device can be
put in pre-suspend state after resume, and disable and enable the PCI
device in the freeze and resume functions, respectively.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c | 94 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/virtio.h      |  5 +++
 2 files changed, 97 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 7f9ac1af7cfd..635e1efb3792 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -55,6 +55,10 @@ struct virtio_pci_device
 	unsigned msix_vectors;
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
+
+	/* Status saved during hibernate/restore */
+	u8 saved_status;
+
 	/* Whether we have vector per vq */
 	bool per_vq_vectors;
 };
@@ -734,9 +738,95 @@ static int virtio_pci_resume(struct device *dev)
 	return 0;
 }
 
+static int virtio_pci_freeze(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = 0;
+	vp_dev->saved_status = vp_get_status(&vp_dev->vdev);
+	if (drv && drv->freeze)
+		ret = drv->freeze(&vp_dev->vdev);
+
+	if (!ret)
+		pci_disable_device(pci_dev);
+	return ret;
+}
+
+static int restore_common(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	int ret;
+
+	ret = pci_enable_device(pci_dev);
+	if (ret)
+		return ret;
+	pci_set_master(pci_dev);
+	vp_finalize_features(&vp_dev->vdev);
+
+	return ret;
+}
+
+static int virtio_pci_thaw(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	ret = restore_common(dev);
+	if (ret)
+		return ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	if (drv && drv->thaw)
+		ret = drv->thaw(&vp_dev->vdev);
+	else if (drv && drv->restore)
+		ret = drv->restore(&vp_dev->vdev);
+
+	/* Finally, tell the device we're all set */
+	if (!ret)
+		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+
+	return ret;
+}
+
+static int virtio_pci_restore(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = restore_common(dev);
+	if (!ret && drv && drv->restore)
+		ret = drv->restore(&vp_dev->vdev);
+
+	/* Finally, tell the device we're all set */
+	if (!ret)
+		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+
+	return ret;
+}
+
 static const struct dev_pm_ops virtio_pci_pm_ops = {
-	.suspend = virtio_pci_suspend,
-	.resume  = virtio_pci_resume,
+	.suspend	= virtio_pci_suspend,
+	.resume		= virtio_pci_resume,
+	.freeze		= virtio_pci_freeze,
+	.thaw		= virtio_pci_thaw,
+	.restore	= virtio_pci_restore,
+	.poweroff	= virtio_pci_suspend,
 };
 #endif
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 31fe3a62874b..d0018d27c281 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -94,6 +94,11 @@ struct virtio_driver {
 	int (*probe)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
 	void (*config_changed)(struct virtio_device *dev);
+#ifdef CONFIG_PM
+	int (*freeze)(struct virtio_device *dev);
+	int (*thaw)(struct virtio_device *dev);
+	int (*restore)(struct virtio_device *dev);
+#endif
 };
 
 int register_virtio_driver(struct virtio_driver *drv);
-- 
cgit v1.2.3


From fd83240a60ecc59849420df3393e9e6d35c77683 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 12 Jan 2012 09:17:30 +0100
Subject: blockdev: convert some macros to static inlines

We prefer to program in C rather than preprocessor and it fixes this
warning when CONFIG_BLK_DEV_INTEGRITY is not set:

drivers/md/dm-table.c: In function 'dm_table_set_integrity':
drivers/md/dm-table.c:1285:3: warning: statement with no effect [-Wunused-value]

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 77 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index adc34133a56a..5cfb9b22627f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1298,19 +1298,70 @@ queue_max_integrity_segments(struct request_queue *q)
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
-#define blk_integrity_rq(rq)			(0)
-#define blk_rq_count_integrity_sg(a, b)		(0)
-#define blk_rq_map_integrity_sg(a, b, c)	(0)
-#define bdev_get_integrity(a)			(0)
-#define blk_get_integrity(a)			(0)
-#define blk_integrity_compare(a, b)		(0)
-#define blk_integrity_register(a, b)		(0)
-#define blk_integrity_unregister(a)		do { } while (0)
-#define blk_queue_max_integrity_segments(a, b)	do { } while (0)
-#define queue_max_integrity_segments(a)		(0)
-#define blk_integrity_merge_rq(a, b, c)		(0)
-#define blk_integrity_merge_bio(a, b, c)	(0)
-#define blk_integrity_is_initialized(a)		(0)
+struct bio;
+struct block_device;
+struct gendisk;
+struct blk_integrity;
+
+static inline int blk_integrity_rq(struct request *rq)
+{
+	return 0;
+}
+static inline int blk_rq_count_integrity_sg(struct request_queue *q,
+					    struct bio *b)
+{
+	return 0;
+}
+static inline int blk_rq_map_integrity_sg(struct request_queue *q,
+					  struct bio *b,
+					  struct scatterlist *s)
+{
+	return 0;
+}
+static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
+{
+	return 0;
+}
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+	return NULL;
+}
+static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
+{
+	return 0;
+}
+static inline int blk_integrity_register(struct gendisk *d,
+					 struct blk_integrity *b)
+{
+	return 0;
+}
+static inline void blk_integrity_unregister(struct gendisk *d)
+{
+}
+static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+						    unsigned int segs)
+{
+}
+static inline unsigned short queue_max_integrity_segments(struct request_queue *q)
+{
+	return 0;
+}
+static inline int blk_integrity_merge_rq(struct request_queue *rq,
+					 struct request *r1,
+					 struct request *r2)
+{
+	return 0;
+}
+static inline int blk_integrity_merge_bio(struct request_queue *rq,
+					  struct request *r,
+					  struct bio *b)
+{
+	return 0;
+}
+static inline bool blk_integrity_is_initialized(struct gendisk *g)
+{
+	return 0;
+}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit v1.2.3


From 46fe44ce8777f087aa8ad4a2605fdcfb9c2d63af Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 16 Nov 2011 15:03:59 +0100
Subject: quota: Pass information that quota is stored in system file to
 userspace

Quota tools need to know whether quota is stored in a system file or in
classical aquota.{user|group} files. So pass this information as a flag
in GETINFO quotactl.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 8 +++++---
 include/linux/quota.h | 6 +++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5ec59b20cf76..46741970371b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2125,6 +2125,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		mutex_unlock(&dqopt->dqio_mutex);
 		goto out_file_init;
 	}
+	if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
+		dqopt->info[type].dqi_flags |= DQF_SYS_FILE;
 	mutex_unlock(&dqopt->dqio_mutex);
 	spin_lock(&dq_state_lock);
 	dqopt->flags |= dquot_state_flag(flags, type);
@@ -2464,7 +2466,7 @@ int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	spin_lock(&dq_data_lock);
 	ii->dqi_bgrace = mi->dqi_bgrace;
 	ii->dqi_igrace = mi->dqi_igrace;
-	ii->dqi_flags = mi->dqi_flags & DQF_MASK;
+	ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK;
 	ii->dqi_valid = IIF_ALL;
 	spin_unlock(&dq_data_lock);
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
@@ -2490,8 +2492,8 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	if (ii->dqi_valid & IIF_IGRACE)
 		mi->dqi_igrace = ii->dqi_igrace;
 	if (ii->dqi_valid & IIF_FLAGS)
-		mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) |
-				(ii->dqi_flags & DQF_MASK);
+		mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) |
+				(ii->dqi_flags & DQF_SETINFO_MASK);
 	spin_unlock(&dq_data_lock);
 	mark_info_dirty(sb, type);
 	/* Force write to disk */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index cb7855699037..c09fa042b5ea 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -230,7 +230,11 @@ struct mem_dqinfo {
 struct super_block;
 
 #define DQF_MASK 0xffff		/* Mask for format specific flags */
-#define DQF_INFO_DIRTY_B 16
+#define DQF_GETINFO_MASK 0x1ffff	/* Mask for flags passed to userspace */
+#define DQF_SETINFO_MASK 0xffff		/* Mask for flags modifiable from userspace */
+#define DQF_SYS_FILE_B		16
+#define DQF_SYS_FILE (1 << DQF_SYS_FILE_B)	/* Quota file stored as system file */
+#define DQF_INFO_DIRTY_B	31
 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B)	/* Is info dirty? */
 
 extern void mark_info_dirty(struct super_block *sb, int type);
-- 
cgit v1.2.3


From 46f72b349290d2bd7aecea38f02609d814332df6 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 10 Jan 2012 09:04:37 -0800
Subject: vfs: export symbol d_find_any_alias()

Ceph needs this.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/dcache.c            | 11 +++++++++--
 include/linux/dcache.h |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 89509b5a090e..ba960051dfb7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1471,7 +1471,14 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
 	return alias;
 }
 
-static struct dentry * d_find_any_alias(struct inode *inode)
+/**
+ * d_find_any_alias - find any alias for a given inode
+ * @inode: inode to find an alias for
+ *
+ * If any aliases exist for the given inode, take and return a
+ * reference for one of them.  If no aliases exist, return %NULL.
+ */
+struct dentry *d_find_any_alias(struct inode *inode)
 {
 	struct dentry *de;
 
@@ -1480,7 +1487,7 @@ static struct dentry * d_find_any_alias(struct inode *inode)
 	spin_unlock(&inode->i_lock);
 	return de;
 }
-
+EXPORT_SYMBOL(d_find_any_alias);
 
 /**
  * d_obtain_alias - find or allocate a dentry for a given inode
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ed9f74f6c519..3871ba743b4c 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -241,6 +241,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
+extern struct dentry *d_find_any_alias(struct inode *inode);
 extern struct dentry * d_obtain_alias(struct inode *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
-- 
cgit v1.2.3


From a4924c71aa43d4f8a3f342b1f71788349472e684 Mon Sep 17 00:00:00 2001
From: Girish K S <girish.shivananjappa@linaro.org>
Date: Wed, 11 Jan 2012 14:04:52 -0500
Subject: mmc: core: HS200 mode support for eMMC 4.5

This patch adds the support of the HS200 bus speed for eMMC 4.5 devices.
The eMMC 4.5 devices have support for 200MHz bus speed. The function
prototype of the tuning function is modified to handle the tuning
command number which is different in sd and mmc case.

Signed-off-by: Girish K S <girish.shivananjappa@linaro.org>
Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/bus.c     |   3 +-
 drivers/mmc/core/debugfs.c |   3 +
 drivers/mmc/core/mmc.c     | 162 ++++++++++++++++++++++++++++++++++++++++++---
 drivers/mmc/core/sd.c      |   3 +-
 drivers/mmc/core/sdio.c    |   4 +-
 include/linux/mmc/card.h   |   3 +
 include/linux/mmc/host.h   |  11 ++-
 include/linux/mmc/mmc.h    |  66 +++++++++++++++++-
 8 files changed, 241 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index f8a228a61fd4..5d011a39dfff 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -303,10 +303,11 @@ int mmc_add_card(struct mmc_card *card)
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type);
 	} else {
-		printk(KERN_INFO "%s: new %s%s%s card at address %04x\n",
+		pr_info("%s: new %s%s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
 			mmc_card_uhs(card) ? "ultra high speed " :
 			(mmc_card_highspeed(card) ? "high speed " : ""),
+			(mmc_card_hs200(card) ? "HS200 " : ""),
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type, card->rca);
 	}
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 027615d3bf3e..9ab5b17d488a 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -135,6 +135,9 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 	case MMC_TIMING_UHS_DDR50:
 		str = "sd uhs DDR50";
 		break;
+	case MMC_TIMING_MMC_HS200:
+		str = "mmc high-speed SDR200";
+		break;
 	default:
 		str = "invalid";
 		break;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 67f346e0d105..59b9ba52e66a 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -286,6 +286,27 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 	}
 	card->ext_csd.raw_card_type = ext_csd[EXT_CSD_CARD_TYPE];
 	switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) {
+	case EXT_CSD_CARD_TYPE_SDR_ALL:
+	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V:
+	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V:
+	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52:
+		card->ext_csd.hs_max_dtr = 200000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_200;
+		break;
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_ALL:
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V:
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V:
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52:
+		card->ext_csd.hs_max_dtr = 200000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_2V;
+		break;
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_ALL:
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V:
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V:
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52:
+		card->ext_csd.hs_max_dtr = 200000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_8V;
+		break;
 	case EXT_CSD_CARD_TYPE_DDR_52 | EXT_CSD_CARD_TYPE_52 |
 	     EXT_CSD_CARD_TYPE_26:
 		card->ext_csd.hs_max_dtr = 52000000;
@@ -699,6 +720,79 @@ static int mmc_select_powerclass(struct mmc_card *card,
 	return err;
 }
 
+/*
+ * Selects the desired buswidth and switch to the HS200 mode
+ * if bus width set without error
+ */
+static int mmc_select_hs200(struct mmc_card *card)
+{
+	int idx, err = 0;
+	struct mmc_host *host;
+	static unsigned ext_csd_bits[] = {
+		EXT_CSD_BUS_WIDTH_4,
+		EXT_CSD_BUS_WIDTH_8,
+	};
+	static unsigned bus_widths[] = {
+		MMC_BUS_WIDTH_4,
+		MMC_BUS_WIDTH_8,
+	};
+
+	BUG_ON(!card);
+
+	host = card->host;
+
+	if (card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_2V &&
+	    host->caps2 & MMC_CAP2_HS200_1_2V_SDR)
+		if (mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120, 0))
+			err = mmc_set_signal_voltage(host,
+						     MMC_SIGNAL_VOLTAGE_180, 0);
+
+	/* If fails try again during next card power cycle */
+	if (err)
+		goto err;
+
+	idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 1 : 0;
+
+	/*
+	 * Unlike SD, MMC cards dont have a configuration register to notify
+	 * supported bus width. So bus test command should be run to identify
+	 * the supported bus width or compare the ext csd values of current
+	 * bus width and ext csd values of 1 bit mode read earlier.
+	 */
+	for (; idx >= 0; idx--) {
+
+		/*
+		 * Host is capable of 8bit transfer, then switch
+		 * the device to work in 8bit transfer mode. If the
+		 * mmc switch command returns error then switch to
+		 * 4bit transfer mode. On success set the corresponding
+		 * bus width on the host.
+		 */
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_BUS_WIDTH,
+				 ext_csd_bits[idx],
+				 card->ext_csd.generic_cmd6_time);
+		if (err)
+			continue;
+
+		mmc_set_bus_width(card->host, bus_widths[idx]);
+
+		if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
+			err = mmc_compare_ext_csds(card, bus_widths[idx]);
+		else
+			err = mmc_bus_test(card, bus_widths[idx]);
+		if (!err)
+			break;
+	}
+
+	/* switch to HS200 mode if bus width set successfully */
+	if (!err)
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_HS_TIMING, 2, 0);
+err:
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -905,11 +999,15 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	/*
 	 * Activate high speed (if supported)
 	 */
-	if ((card->ext_csd.hs_max_dtr != 0) &&
-		(host->caps & MMC_CAP_MMC_HIGHSPEED)) {
-		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				 EXT_CSD_HS_TIMING, 1,
-				 card->ext_csd.generic_cmd6_time);
+	if (card->ext_csd.hs_max_dtr != 0) {
+		err = 0;
+		if (card->ext_csd.hs_max_dtr > 52000000 &&
+		    host->caps2 & MMC_CAP2_HS200)
+			err = mmc_select_hs200(card);
+		else if	(host->caps & MMC_CAP_MMC_HIGHSPEED)
+			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					 EXT_CSD_HS_TIMING, 1, 0);
+
 		if (err && err != -EBADMSG)
 			goto free_card;
 
@@ -918,8 +1016,15 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			       mmc_hostname(card->host));
 			err = 0;
 		} else {
-			mmc_card_set_highspeed(card);
-			mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+			if (card->ext_csd.hs_max_dtr > 52000000 &&
+			    host->caps2 & MMC_CAP2_HS200) {
+				mmc_card_set_hs200(card);
+				mmc_set_timing(card->host,
+					       MMC_TIMING_MMC_HS200);
+			} else {
+				mmc_card_set_highspeed(card);
+				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+			}
 		}
 	}
 
@@ -944,7 +1049,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	max_dtr = (unsigned int)-1;
 
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_highspeed(card) || mmc_card_hs200(card)) {
 		if (max_dtr > card->ext_csd.hs_max_dtr)
 			max_dtr = card->ext_csd.hs_max_dtr;
 	} else if (max_dtr > card->csd.max_dtr) {
@@ -969,10 +1074,49 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 				ddr = MMC_1_2V_DDR_MODE;
 	}
 
+	/*
+	 * Indicate HS200 SDR mode (if supported).
+	 */
+	if (mmc_card_hs200(card)) {
+		u32 ext_csd_bits;
+		u32 bus_width = card->host->ios.bus_width;
+
+		/*
+		 * For devices supporting HS200 mode, the bus width has
+		 * to be set before executing the tuning function. If
+		 * set before tuning, then device will respond with CRC
+		 * errors for responses on CMD line. So for HS200 the
+		 * sequence will be
+		 * 1. set bus width 4bit / 8 bit (1 bit not supported)
+		 * 2. switch to HS200 mode
+		 * 3. set the clock to > 52Mhz <=200MHz and
+		 * 4. execute tuning for HS200
+		 */
+		if ((host->caps2 & MMC_CAP2_HS200) &&
+		    card->host->ops->execute_tuning)
+			err = card->host->ops->execute_tuning(card->host,
+				MMC_SEND_TUNING_BLOCK_HS200);
+		if (err) {
+			pr_warning("%s: tuning execution failed\n",
+				   mmc_hostname(card->host));
+			goto err;
+		}
+
+		ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+				EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4;
+		err = mmc_select_powerclass(card, ext_csd_bits, ext_csd);
+		if (err) {
+			pr_err("%s: power class selection to bus width %d failed\n",
+				mmc_hostname(card->host), 1 << bus_width);
+			goto err;
+		}
+	}
+
 	/*
 	 * Activate wide bus and DDR (if supported).
 	 */
-	if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
+	if (!mmc_card_hs200(card) &&
+	    (card->csd.mmca_vsn >= CSD_SPEC_VER_3) &&
 	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
 		static unsigned ext_csd_bits[][2] = {
 			{ EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 },
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 6f27d35081b8..c63ad03c29c7 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -661,7 +661,8 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 
 	/* SPI mode doesn't define CMD19 */
 	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
-		err = card->host->ops->execute_tuning(card->host);
+		err = card->host->ops->execute_tuning(card->host,
+						      MMC_SEND_TUNING_BLOCK);
 
 out:
 	kfree(status);
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index b77f770ce5d1..bd7bacc950dc 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -14,6 +14,7 @@
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
+#include <linux/mmc/mmc.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_func.h>
 #include <linux/mmc/sdio_ids.h>
@@ -556,7 +557,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card)
 
 	/* Initialize and start re-tuning timer */
 	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
-		err = card->host->ops->execute_tuning(card->host);
+		err = card->host->ops->execute_tuning(card->host,
+						      MMC_SEND_TUNING_BLOCK);
 
 out:
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 9478a6bf1bb1..9f22ba572de0 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -216,6 +216,7 @@ struct mmc_card {
 #define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
 #define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
 #define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
+#define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -374,6 +375,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_present(c)	((c)->state & MMC_STATE_PRESENT)
 #define mmc_card_readonly(c)	((c)->state & MMC_STATE_READONLY)
 #define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
+#define mmc_card_hs200(c)	((c)->state & MMC_STATE_HIGHSPEED_200)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
 #define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
 #define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
@@ -384,6 +386,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
+#define mmc_card_set_hs200(c)	((c)->state |= MMC_STATE_HIGHSPEED_200)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
 #define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 031d865167a2..dd13e0539092 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -56,10 +56,13 @@ struct mmc_ios {
 #define MMC_TIMING_UHS_SDR50	3
 #define MMC_TIMING_UHS_SDR104	4
 #define MMC_TIMING_UHS_DDR50	5
+#define MMC_TIMING_MMC_HS200	6
 
 #define MMC_SDR_MODE		0
 #define MMC_1_2V_DDR_MODE	1
 #define MMC_1_8V_DDR_MODE	2
+#define MMC_1_2V_SDR_MODE	3
+#define MMC_1_8V_SDR_MODE	4
 
 	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
 
@@ -148,7 +151,9 @@ struct mmc_host_ops {
 	void	(*init_card)(struct mmc_host *host, struct mmc_card *card);
 
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
-	int	(*execute_tuning)(struct mmc_host *host);
+
+	/* The tuning command opcode value is different for SD and eMMC cards */
+	int	(*execute_tuning)(struct mmc_host *host, u32 opcode);
 	void	(*enable_preset_value)(struct mmc_host *host, bool enable);
 	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 	void	(*hw_reset)(struct mmc_host *host);
@@ -248,6 +253,10 @@ struct mmc_host {
 #define MMC_CAP2_POWEROFF_NOTIFY (1 << 2)	/* Notify poweroff supported */
 #define MMC_CAP2_NO_MULTI_READ	(1 << 3)	/* Multiblock reads don't work */
 #define MMC_CAP2_NO_SLEEP_CMD	(1 << 4)	/* Don't allow sleep command */
+#define MMC_CAP2_HS200_1_8V_SDR	(1 << 5)        /* can support */
+#define MMC_CAP2_HS200_1_2V_SDR	(1 << 6)        /* can support */
+#define MMC_CAP2_HS200		(MMC_CAP2_HS200_1_8V_SDR | \
+				 MMC_CAP2_HS200_1_2V_SDR)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 	unsigned int        power_notify_type;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 665548e639e8..fb9f6e116e1c 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -51,6 +51,7 @@
 #define MMC_READ_SINGLE_BLOCK    17   /* adtc [31:0] data addr   R1  */
 #define MMC_READ_MULTIPLE_BLOCK  18   /* adtc [31:0] data addr   R1  */
 #define MMC_SEND_TUNING_BLOCK    19   /* adtc                    R1  */
+#define MMC_SEND_TUNING_BLOCK_HS200	21	/* adtc R1  */
 
   /* class 3 */
 #define MMC_WRITE_DAT_UNTIL_STOP 20   /* adtc [31:0] data addr   R1  */
@@ -339,13 +340,76 @@ struct _mmc_csd {
 
 #define EXT_CSD_CARD_TYPE_26	(1<<0)	/* Card can run at 26MHz */
 #define EXT_CSD_CARD_TYPE_52	(1<<1)	/* Card can run at 52MHz */
-#define EXT_CSD_CARD_TYPE_MASK	0xF	/* Mask out reserved bits */
+#define EXT_CSD_CARD_TYPE_MASK	0x3F	/* Mask out reserved bits */
 #define EXT_CSD_CARD_TYPE_DDR_1_8V  (1<<2)   /* Card can run at 52MHz */
 					     /* DDR mode @1.8V or 3V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_1_2V  (1<<3)   /* Card can run at 52MHz */
 					     /* DDR mode @1.2V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_52       (EXT_CSD_CARD_TYPE_DDR_1_8V  \
 					| EXT_CSD_CARD_TYPE_DDR_1_2V)
+#define EXT_CSD_CARD_TYPE_SDR_1_8V	(1<<4)	/* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_SDR_1_2V	(1<<5)	/* Card can run at 200MHz */
+						/* SDR mode @1.2V I/O */
+
+#define EXT_CSD_CARD_TYPE_SDR_200	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+					 EXT_CSD_CARD_TYPE_SDR_1_2V)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL	(EXT_CSD_CARD_TYPE_SDR_200 | \
+					 EXT_CSD_CARD_TYPE_52 | \
+					 EXT_CSD_CARD_TYPE_26)
+
+#define	EXT_CSD_CARD_TYPE_SDR_1_2V_ALL	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+					 EXT_CSD_CARD_TYPE_52 | \
+					 EXT_CSD_CARD_TYPE_26)
+
+#define	EXT_CSD_CARD_TYPE_SDR_1_8V_ALL	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+					 EXT_CSD_CARD_TYPE_52 | \
+					 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_DDR_52 | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_DDR_52 | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_200 | \
+						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_200 | \
+						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52	(EXT_CSD_CARD_TYPE_SDR_200 | \
+						 EXT_CSD_CARD_TYPE_DDR_52 | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
-- 
cgit v1.2.3


From 069c9f142822d552ec885572945d8bce9eff0519 Mon Sep 17 00:00:00 2001
From: Girish K S <girish.shivananjappa@linaro.org>
Date: Fri, 6 Jan 2012 09:56:39 +0530
Subject: mmc: host: Adds support for eMMC 4.5 HS200 mode

This patch adds support for the HS200 mode on the host side.
Also enables the tuning feature required when the HS200 mode
is selected.

Signed-off-by: Girish K S <girish.shivananjappa@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 57 ++++++++++++++++++++++++++++++++++-------------
 drivers/mmc/host/sdhci.h  |  1 +
 include/linux/mmc/sdhci.h |  1 +
 3 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 0636e9a587b1..96f4e548ba22 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -49,7 +49,7 @@ static void sdhci_finish_data(struct sdhci_host *);
 
 static void sdhci_send_command(struct sdhci_host *, struct mmc_command *);
 static void sdhci_finish_command(struct sdhci_host *);
-static int sdhci_execute_tuning(struct mmc_host *mmc);
+static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode);
 static void sdhci_tuning_timer(unsigned long data);
 
 #ifdef CONFIG_PM_RUNTIME
@@ -1014,7 +1014,8 @@ static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 		flags |= SDHCI_CMD_INDEX;
 
 	/* CMD19 is special in that the Data Present Select should be set */
-	if (cmd->data || (cmd->opcode == MMC_SEND_TUNING_BLOCK))
+	if (cmd->data || cmd->opcode == MMC_SEND_TUNING_BLOCK ||
+	    cmd->opcode == MMC_SEND_TUNING_BLOCK_HS200)
 		flags |= SDHCI_CMD_DATA;
 
 	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
@@ -1287,7 +1288,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		if ((host->flags & SDHCI_NEEDS_RETUNING) &&
 		    !(present_state & (SDHCI_DOING_WRITE | SDHCI_DOING_READ))) {
 			spin_unlock_irqrestore(&host->lock, flags);
-			sdhci_execute_tuning(mmc);
+			sdhci_execute_tuning(mmc, mrq->cmd->opcode);
 			spin_lock_irqsave(&host->lock, flags);
 
 			/* Restore original mmc_request structure */
@@ -1382,7 +1383,8 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		unsigned int clock;
 
 		/* In case of UHS-I modes, set High Speed Enable */
-		if ((ios->timing == MMC_TIMING_UHS_SDR50) ||
+		if ((ios->timing == MMC_TIMING_MMC_HS200) ||
+		    (ios->timing == MMC_TIMING_UHS_SDR50) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR104) ||
 		    (ios->timing == MMC_TIMING_UHS_DDR50) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR25))
@@ -1435,7 +1437,9 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 			ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 			/* Select Bus Speed Mode for host */
 			ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
-			if (ios->timing == MMC_TIMING_UHS_SDR12)
+			if (ios->timing == MMC_TIMING_MMC_HS200)
+				ctrl_2 |= SDHCI_CTRL_HS_SDR200;
+			else if (ios->timing == MMC_TIMING_UHS_SDR12)
 				ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
 			else if (ios->timing == MMC_TIMING_UHS_SDR25)
 				ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
@@ -1682,7 +1686,7 @@ static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
 	return err;
 }
 
-static int sdhci_execute_tuning(struct mmc_host *mmc)
+static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
 	struct sdhci_host *host;
 	u16 ctrl;
@@ -1690,6 +1694,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 	int tuning_loop_counter = MAX_TUNING_LOOP;
 	unsigned long timeout;
 	int err = 0;
+	bool requires_tuning_nonuhs = false;
 
 	host = mmc_priv(mmc);
 
@@ -1700,13 +1705,19 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 
 	/*
-	 * Host Controller needs tuning only in case of SDR104 mode
-	 * and for SDR50 mode when Use Tuning for SDR50 is set in
+	 * The Host Controller needs tuning only in case of SDR104 mode
+	 * and for SDR50 mode when Use Tuning for SDR50 is set in the
 	 * Capabilities register.
+	 * If the Host Controller supports the HS200 mode then the
+	 * tuning function has to be executed.
 	 */
+	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
+	    (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
+	     host->flags & SDHCI_HS200_NEEDS_TUNING))
+		requires_tuning_nonuhs = true;
+
 	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) ||
-	    (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
-	    (host->flags & SDHCI_SDR50_NEEDS_TUNING)))
+	    requires_tuning_nonuhs)
 		ctrl |= SDHCI_CTRL_EXEC_TUNING;
 	else {
 		spin_unlock(&host->lock);
@@ -1742,7 +1753,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 		if (!tuning_loop_counter && !timeout)
 			break;
 
-		cmd.opcode = MMC_SEND_TUNING_BLOCK;
+		cmd.opcode = opcode;
 		cmd.arg = 0;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 		cmd.retries = 0;
@@ -1757,7 +1768,17 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 		 * block to the Host Controller. So we set the block size
 		 * to 64 here.
 		 */
-		sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64), SDHCI_BLOCK_SIZE);
+		if (cmd.opcode == MMC_SEND_TUNING_BLOCK_HS200) {
+			if (mmc->ios.bus_width == MMC_BUS_WIDTH_8)
+				sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 128),
+					     SDHCI_BLOCK_SIZE);
+			else if (mmc->ios.bus_width == MMC_BUS_WIDTH_4)
+				sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64),
+					     SDHCI_BLOCK_SIZE);
+		} else {
+			sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64),
+				     SDHCI_BLOCK_SIZE);
+		}
 
 		/*
 		 * The tuning block is sent by the card to the host controller.
@@ -2140,12 +2161,14 @@ static void sdhci_show_adma_error(struct sdhci_host *host) { }
 
 static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 {
+	u32 command;
 	BUG_ON(intmask == 0);
 
 	/* CMD19 generates _only_ Buffer Read Ready interrupt */
 	if (intmask & SDHCI_INT_DATA_AVAIL) {
-		if (SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)) ==
-		    MMC_SEND_TUNING_BLOCK) {
+		command = SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND));
+		if (command == MMC_SEND_TUNING_BLOCK ||
+		    command == MMC_SEND_TUNING_BLOCK_HS200) {
 			host->tuning_done = 1;
 			wake_up(&host->buf_ready_int);
 			return;
@@ -2747,10 +2770,14 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[1] & SDHCI_SUPPORT_DDR50)
 		mmc->caps |= MMC_CAP_UHS_DDR50;
 
-	/* Does the host needs tuning for SDR50? */
+	/* Does the host need tuning for SDR50? */
 	if (caps[1] & SDHCI_USE_SDR50_TUNING)
 		host->flags |= SDHCI_SDR50_NEEDS_TUNING;
 
+	/* Does the host need tuning for HS200? */
+	if (mmc->caps2 & MMC_CAP2_HS200)
+		host->flags |= SDHCI_HS200_NEEDS_TUNING;
+
 	/* Driver Type(s) (A, C, D) supported by the host */
 	if (caps[1] & SDHCI_DRIVER_TYPE_A)
 		mmc->caps |= MMC_CAP_DRIVER_TYPE_A;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index a04d4d0c6fd2..ad265b96b75b 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -158,6 +158,7 @@
 #define   SDHCI_CTRL_UHS_SDR50		0x0002
 #define   SDHCI_CTRL_UHS_SDR104		0x0003
 #define   SDHCI_CTRL_UHS_DDR50		0x0004
+#define   SDHCI_CTRL_HS_SDR200		0x0005 /* reserved value in SDIO spec */
 #define  SDHCI_CTRL_VDD_180		0x0008
 #define  SDHCI_CTRL_DRV_TYPE_MASK	0x0030
 #define   SDHCI_CTRL_DRV_TYPE_B		0x0000
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index dad7a469f09c..c750f85177d9 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -119,6 +119,7 @@ struct sdhci_host {
 #define SDHCI_AUTO_CMD23	(1<<7)	/* Auto CMD23 support */
 #define SDHCI_PV_ENABLED	(1<<8)	/* Preset value enabled */
 #define SDHCI_SDIO_IRQ_ENABLED	(1<<9)	/* SDIO irq enabled */
+#define SDHCI_HS200_NEEDS_TUNING (1<<10)	/* HS200 needs tuning */
 
 	unsigned int version;	/* SDHCI spec. version */
 
-- 
cgit v1.2.3


From bd77c04772da38fca510c81f78e51f727123b919 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 13 Jan 2012 09:32:14 +1030
Subject: module: struct module_ref should contains long fields

module_ref contains two "unsigned int" fields.

Thats now too small, since some machines can open more than 2^32 files.

Check commit 518de9b39e8 (fs: allow for more than 2^31 files) for
reference.

We can add an aligned(2 * sizeof(unsigned long)) attribute to force
alloc_percpu() allocating module_ref areas in single cache lines.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Rusty Russell <rusty@rustcorp.com.au>
CC: Tejun Heo <tj@kernel.org>
CC: Robin Holt <holt@sgi.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h      | 21 ++++++++++++++++-----
 kernel/debug/kdb/kdb_main.c |  2 +-
 kernel/module.c             |  8 ++++----
 3 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 3cb7839a60b9..4598bf03e98b 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -205,6 +205,20 @@ enum module_state
 	MODULE_STATE_GOING,
 };
 
+/**
+ * struct module_ref - per cpu module reference counts
+ * @incs: number of module get on this cpu
+ * @decs: number of module put on this cpu
+ *
+ * We force an alignment on 8 or 16 bytes, so that alloc_percpu()
+ * put @incs/@decs in same cache line, with no extra memory cost,
+ * since alloc_percpu() is fine grained.
+ */
+struct module_ref {
+	unsigned long incs;
+	unsigned long decs;
+} __attribute((aligned(2 * sizeof(unsigned long))));
+
 struct module
 {
 	enum module_state state;
@@ -347,10 +361,7 @@ struct module
 	/* Destruction function. */
 	void (*exit)(void);
 
-	struct module_ref {
-		unsigned int incs;
-		unsigned int decs;
-	} __percpu *refptr;
+	struct module_ref __percpu *refptr;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS
@@ -434,7 +445,7 @@ extern void __module_put_and_exit(struct module *mod, long code)
 #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code);
 
 #ifdef CONFIG_MODULE_UNLOAD
-unsigned int module_refcount(struct module *mod);
+unsigned long module_refcount(struct module *mod);
 void __symbol_put(const char *symbol);
 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
 void symbol_put_addr(void *addr);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 63786e71a3cd..e2ae7349437f 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1982,7 +1982,7 @@ static int kdb_lsmod(int argc, const char **argv)
 		kdb_printf("%-20s%8u  0x%p ", mod->name,
 			   mod->core_size, (void *)mod);
 #ifdef CONFIG_MODULE_UNLOAD
-		kdb_printf("%4d ", module_refcount(mod));
+		kdb_printf("%4ld ", module_refcount(mod));
 #endif
 		if (mod->state == MODULE_STATE_GOING)
 			kdb_printf(" (Unloading)");
diff --git a/kernel/module.c b/kernel/module.c
index 4928cffc3dcc..14b8e82e05d4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -725,9 +725,9 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
 	}
 }
 
-unsigned int module_refcount(struct module *mod)
+unsigned long module_refcount(struct module *mod)
 {
-	unsigned int incs = 0, decs = 0;
+	unsigned long incs = 0, decs = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu)
@@ -853,7 +853,7 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
 	struct module_use *use;
 	int printed_something = 0;
 
-	seq_printf(m, " %u ", module_refcount(mod));
+	seq_printf(m, " %lu ", module_refcount(mod));
 
 	/* Always include a trailing , so userspace can differentiate
            between this and the old multi-field proc format. */
@@ -903,7 +903,7 @@ EXPORT_SYMBOL_GPL(symbol_put_addr);
 static ssize_t show_refcnt(struct module_attribute *mattr,
 			   struct module_kobject *mk, char *buffer)
 {
-	return sprintf(buffer, "%u\n", module_refcount(mk->mod));
+	return sprintf(buffer, "%lu\n", module_refcount(mk->mod));
 }
 
 static struct module_attribute refcnt = {
-- 
cgit v1.2.3


From bafeafeab94b8d3019aac15c2df2ce47b08a6363 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:16 +1030
Subject: module_param: check type correctness for module_param_array

module_param_array(), unlike its non-array cousins, didn't check the type
of the variable.  Fixing this found two bugs.

Cc: Luca Risolia <luca.risolia@studio.unibo.it>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Cc: linux-media@vger.kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/media/video/et61x251/et61x251_core.c | 4 ++--
 drivers/media/video/sn9c102/sn9c102_core.c   | 4 ++--
 drivers/mfd/janz-cmodio.c                    | 2 +-
 drivers/misc/lis3lv02d/lis3lv02d.c           | 2 ++
 include/linux/moduleparam.h                  | 1 +
 5 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index 40f214ab924f..5539f09440ac 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -76,8 +76,8 @@ MODULE_PARM_DESC(video_nr,
 		 "\none and for every other camera."
 		 "\n");
 
-static short force_munmap[] = {[0 ... ET61X251_MAX_DEVICES-1] =
-			       ET61X251_FORCE_MUNMAP};
+static bool force_munmap[] = {[0 ... ET61X251_MAX_DEVICES-1] =
+			      ET61X251_FORCE_MUNMAP};
 module_param_array(force_munmap, bool, NULL, 0444);
 MODULE_PARM_DESC(force_munmap,
 		 "\n<0|1[,...]> Force the application to unmap previously"
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index 7025be129286..c2882fa5be85 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -75,8 +75,8 @@ MODULE_PARM_DESC(video_nr,
 		 "\none and for every other camera."
 		 "\n");
 
-static short force_munmap[] = {[0 ... SN9C102_MAX_DEVICES-1] =
-			       SN9C102_FORCE_MUNMAP};
+static bool force_munmap[] = {[0 ... SN9C102_MAX_DEVICES-1] =
+			      SN9C102_FORCE_MUNMAP};
 module_param_array(force_munmap, bool, NULL, 0444);
 MODULE_PARM_DESC(force_munmap,
 		 " <0|1[,...]>"
diff --git a/drivers/mfd/janz-cmodio.c b/drivers/mfd/janz-cmodio.c
index 5c2a06acb77f..a9223ed1b7c5 100644
--- a/drivers/mfd/janz-cmodio.c
+++ b/drivers/mfd/janz-cmodio.c
@@ -33,7 +33,7 @@
 
 /* Module Parameters */
 static unsigned int num_modules = CMODIO_MAX_MODULES;
-static unsigned char *modules[CMODIO_MAX_MODULES] = {
+static char *modules[CMODIO_MAX_MODULES] = {
 	"empty", "empty", "empty", "empty",
 };
 
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index 29d12a70eb1b..a981e2a42f92 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -111,6 +111,8 @@ static struct kernel_param_ops param_ops_axis = {
 	.get = param_get_int,
 };
 
+#define param_check_axis(name, p) param_check_int(name, p)
+
 module_param_array_named(axes, lis3_dev.ac.as_array, axis, NULL, 0644);
 MODULE_PARM_DESC(axes, "Axis-mapping for x,y,z directions");
 
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 7939f636c8ba..794d4b0f1215 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -395,6 +395,7 @@ extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
  * module_param_named() for why this might be necessary.
  */
 #define module_param_array_named(name, array, type, nump, perm)		\
+	param_check_##type(name, &(array)[0]);				\
 	static const struct kparam_array __param_arr_##name		\
 	= { .max = ARRAY_SIZE(array), .num = nump,                      \
 	    .ops = &param_ops_##type,					\
-- 
cgit v1.2.3


From 69116f279a9eaf4c540934269342d9149538fc79 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:17 +1030
Subject: module_param: avoid bool abuse, add bint for special cases.

For historical reasons, we allow module_param(bool) to take an int (or
an unsigned int).  That's going away.

A few drivers really want an int: they set it to -1 and a parameter
will set it to 0 or 1.  This sucks: reading them from sysfs will give
'Y' for both -1 and 1, but if we change it to an int, then the users
might be broken (if they did "param" instead of "param=1").

Use a new 'bint' parser for them.

(ntfs has a different problem: it needs an int for debug_msgs because
it's also exposed via sysctl.)

Cc: Steve Glendinning <steve.glendinning@smsc.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Guenter Roeck <guenter.roeck@ericsson.com>
Cc: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Cc: Christoph Raisch <raisch@de.ibm.com>
Cc: Roland Dreier <roland@kernel.org>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Hal Rosenstock <hal.rosenstock@gmail.com>
Cc: linux390@de.ibm.com
Cc: Anton Altaparmakov <anton@tuxera.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: lm-sensors@lm-sensors.org
Cc: linux-rdma@vger.kernel.org
Cc: linux-s390@vger.kernel.org
Cc: linux-ntfs-dev@lists.sourceforge.net
Cc: alsa-devel@alsa-project.org
Acked-by: Takashi Iwai <tiwai@suse.de> (For the sound part)
Acked-by: Guenter Roeck <guenter.roeck@ericsson.com> (For the hwmon driver)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/hwmon/emc2103.c                |  2 +-
 drivers/infiniband/hw/ehca/ehca_main.c |  2 +-
 drivers/s390/cio/cmf.c                 |  2 +-
 fs/ntfs/super.c                        |  2 +-
 include/linux/moduleparam.h            |  6 ++++++
 kernel/params.c                        | 24 ++++++++++++++++++++++++
 sound/pci/intel8x0.c                   |  4 ++--
 7 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/emc2103.c b/drivers/hwmon/emc2103.c
index 848a2b0bc83f..865063914d76 100644
--- a/drivers/hwmon/emc2103.c
+++ b/drivers/hwmon/emc2103.c
@@ -55,7 +55,7 @@ static const u8 REG_TEMP_MAX[4] = { 0x34, 0x30, 0x31, 0x32 };
  * it.  Default is to leave the device in the state it's already in (-1).
  * This parameter allows APD mode to be optionally forced on or off */
 static int apd = -1;
-module_param(apd, bool, 0);
+module_param(apd, bint, 0);
 MODULE_PARM_DESC(init, "Set to zero to disable anti-parallel diode mode");
 
 struct temperature {
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index c240e9972cb0..8af8d4f7bdb1 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -82,7 +82,7 @@ module_param_named(port_act_time, ehca_port_act_time, int,  S_IRUGO);
 module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  bool, S_IRUGO);
 module_param_named(static_rate,   ehca_static_rate,   int,  S_IRUGO);
 module_param_named(scaling_code,  ehca_scaling_code,  bool, S_IRUGO);
-module_param_named(lock_hcalls,   ehca_lock_hcalls,   bool, S_IRUGO);
+module_param_named(lock_hcalls,   ehca_lock_hcalls,   bint, S_IRUGO);
 module_param_named(number_of_cqs, ehca_max_cq,        int,  S_IRUGO);
 module_param_named(number_of_qps, ehca_max_qp,        int,  S_IRUGO);
 
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index 2985eb439485..204ca728e7fd 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -98,7 +98,7 @@ enum cmb_format {
  * enum cmb_format.
  */
 static int format = CMF_AUTODETECT;
-module_param(format, bool, 0444);
+module_param(format, bint, 0444);
 
 /**
  * struct cmb_operations - functions to use depending on cmb_format
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 608be4516091..5a4a8af5c406 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3198,7 +3198,7 @@ MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparm
 MODULE_VERSION(NTFS_VERSION);
 MODULE_LICENSE("GPL");
 #ifdef DEBUG
-module_param(debug_msgs, bool, 0);
+module_param(debug_msgs, bint, 0);
 MODULE_PARM_DESC(debug_msgs, "Enable debug messages.");
 #endif
 
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 794d4b0f1215..6bdde0c3bcca 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -367,6 +367,12 @@ extern int param_set_invbool(const char *val, const struct kernel_param *kp);
 extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
 #define param_check_invbool(name, p) __param_check(name, p, bool)
 
+/* An int, which can only be set like a bool (though it shows as an int). */
+extern struct kernel_param_ops param_ops_bint;
+extern int param_set_bint(const char *val, const struct kernel_param *kp);
+#define param_get_bint param_get_int
+#define param_check_bint param_check_int
+
 /**
  * module_param_array - a parameter which is an array of some type
  * @name: the name of the array variable
diff --git a/kernel/params.c b/kernel/params.c
index 9240664af110..32ee04308285 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -363,6 +363,30 @@ struct kernel_param_ops param_ops_invbool = {
 };
 EXPORT_SYMBOL(param_ops_invbool);
 
+int param_set_bint(const char *val, const struct kernel_param *kp)
+{
+	struct kernel_param boolkp;
+	bool v;
+	int ret;
+
+	/* Match bool exactly, by re-using it. */
+	boolkp = *kp;
+	boolkp.arg = &v;
+	boolkp.flags |= KPARAM_ISBOOL;
+
+	ret = param_set_bool(val, &boolkp);
+	if (ret == 0)
+		*(int *)kp->arg = v;
+	return ret;
+}
+EXPORT_SYMBOL(param_set_bint);
+
+struct kernel_param_ops param_ops_bint = {
+	.set = param_set_bint,
+	.get = param_get_int,
+};
+EXPORT_SYMBOL(param_ops_bint);
+
 /* We break the rule and mangle the string. */
 static int param_array(const char *name,
 		       const char *val,
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 40b181bab930..9f3b01bb72c8 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -95,13 +95,13 @@ module_param(ac97_quirk, charp, 0444);
 MODULE_PARM_DESC(ac97_quirk, "AC'97 workaround for strange hardware.");
 module_param(buggy_semaphore, bool, 0444);
 MODULE_PARM_DESC(buggy_semaphore, "Enable workaround for hardwares with problematic codec semaphores.");
-module_param(buggy_irq, bool, 0444);
+module_param(buggy_irq, bint, 0444);
 MODULE_PARM_DESC(buggy_irq, "Enable workaround for buggy interrupts on some motherboards.");
 module_param(xbox, bool, 0444);
 MODULE_PARM_DESC(xbox, "Set to 1 for Xbox, if you have problems with the AC'97 codec detection.");
 module_param(spdif_aclink, int, 0444);
 MODULE_PARM_DESC(spdif_aclink, "S/PDIF over AC-link.");
-module_param(inside_vm, bool, 0444);
+module_param(inside_vm, bint, 0444);
 MODULE_PARM_DESC(inside_vm, "KVM/Parallels optimization.");
 
 /* just for backward compatibility */
-- 
cgit v1.2.3


From 2329abfa344a9a824bc4c71f2415528777265510 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:18 +1030
Subject: module_param: make bool parameters really bool (core code)

module_param(bool) used to counter-intuitively take an int.  In
fddd5201 (mid-2009) we allowed bool or int/unsigned int using a messy
trick.

It's time to remove the int/unsigned int option.  For this version
it'll simply give a warning, but it'll break next kernel version.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/init.h   | 3 ++-
 init/main.c            | 2 +-
 kernel/irq/internals.h | 2 +-
 kernel/irq/spurious.c  | 2 +-
 kernel/printk.c        | 8 ++++----
 5 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 9146f39cdddf..6b951095a42f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -2,6 +2,7 @@
 #define _LINUX_INIT_H
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 
 /* These macros are used to mark some functions or 
  * initialized data (doesn't apply to uninitialized data)
@@ -156,7 +157,7 @@ void prepare_namespace(void);
 
 extern void (*late_time_init)(void);
 
-extern int initcall_debug;
+extern bool initcall_debug;
 
 #endif
   
diff --git a/init/main.c b/init/main.c
index 415548e808d2..ff49a6dacfbb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -648,7 +648,7 @@ static void __init do_ctors(void)
 #endif
 }
 
-int initcall_debug;
+bool initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
 static char msgbuf[64];
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index a73dd6c7372d..b7952316016a 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -15,7 +15,7 @@
 
 #define istate core_internal_state__do_not_mess_with_it
 
-extern int noirqdebug;
+extern bool noirqdebug;
 
 /*
  * Bits used by threaded handlers:
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dc813a948be2..611cd6003c45 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -325,7 +325,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 	desc->irqs_unhandled = 0;
 }
 
-int noirqdebug __read_mostly;
+bool noirqdebug __read_mostly;
 
 int noirqdebug_setup(char *str)
 {
diff --git a/kernel/printk.c b/kernel/printk.c
index 63b3bc31fe32..13c0a1143f49 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -521,7 +521,7 @@ static void __call_console_drivers(unsigned start, unsigned end)
 	}
 }
 
-static int __read_mostly ignore_loglevel;
+static bool __read_mostly ignore_loglevel;
 
 static int __init ignore_loglevel_setup(char *str)
 {
@@ -696,9 +696,9 @@ static void zap_locks(void)
 }
 
 #if defined(CONFIG_PRINTK_TIME)
-static int printk_time = 1;
+static bool printk_time = 1;
 #else
-static int printk_time = 0;
+static bool printk_time = 0;
 #endif
 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
 
@@ -1098,7 +1098,7 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
 	return -1;
 }
 
-int console_suspend_enabled = 1;
+bool console_suspend_enabled = 1;
 EXPORT_SYMBOL(console_suspend_enabled);
 
 static int __init console_suspend_disable(char *str)
-- 
cgit v1.2.3


From 90ab5ee94171b3e28de6bb42ee30b527014e0be7 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:20 +1030
Subject: module_param: make bool parameters really bool (drivers & misc)

module_param(bool) used to counter-intuitively take an int.  In
fddd5201 (mid-2009) we allowed bool or int/unsigned int using a messy
trick.

It's time to remove the int/unsigned int option.  For this version
it'll simply give a warning, but it'll break next kernel version.

Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/accessibility/braille/braille_console.c |  2 +-
 drivers/acpi/acpica/acglobal.h                  |  2 +-
 drivers/acpi/apei/ghes.c                        |  2 +-
 drivers/acpi/apei/hest.c                        |  2 +-
 drivers/acpi/dock.c                             |  2 +-
 drivers/acpi/pci_slot.c                         |  2 +-
 drivers/acpi/video.c                            |  6 +++---
 drivers/ata/sata_nv.c                           |  6 +++---
 drivers/ata/sata_sil24.c                        |  2 +-
 drivers/atm/he.c                                |  6 +++---
 drivers/block/drbd/drbd_int.h                   |  4 ++--
 drivers/block/drbd/drbd_main.c                  |  4 ++--
 drivers/block/paride/bpck6.c                    |  5 ++---
 drivers/block/paride/pd.c                       |  3 ++-
 drivers/block/paride/pf.c                       |  4 +++-
 drivers/block/paride/pg.c                       |  3 ++-
 drivers/block/paride/pt.c                       |  4 +++-
 drivers/block/xd.c                              |  2 +-
 drivers/bluetooth/btusb.c                       | 12 ++++++------
 drivers/bluetooth/hci_bcsp.c                    |  4 ++--
 drivers/bluetooth/hci_ldisc.c                   |  2 +-
 drivers/cdrom/cdrom.c                           | 12 ++++++------
 drivers/char/agp/amd64-agp.c                    |  2 +-
 drivers/char/agp/sis-agp.c                      |  2 +-
 drivers/char/i8k.c                              |  8 ++++----
 drivers/char/ipmi/ipmi_si_intf.c                |  2 +-
 drivers/char/lp.c                               |  2 +-
 drivers/char/nwflash.c                          |  2 +-
 drivers/char/pcmcia/synclink_cs.c               |  2 +-
 drivers/char/random.c                           |  2 +-
 drivers/char/tpm/tpm_tis.c                      |  6 +++---
 drivers/edac/r82600_edac.c                      |  2 +-
 drivers/firewire/sbp2.c                         |  2 +-
 drivers/hid/hid-prodikeys.c                     |  2 +-
 drivers/hwmon/abituguru.c                       |  2 +-
 drivers/hwmon/abituguru3.c                      |  4 ++--
 drivers/hwmon/acpi_power_meter.c                |  2 +-
 drivers/hwmon/adm1021.c                         |  2 +-
 drivers/hwmon/ads7828.c                         |  4 ++--
 drivers/hwmon/dme1737.c                         |  4 ++--
 drivers/hwmon/it87.c                            |  4 ++--
 drivers/hwmon/lm93.c                            |  4 ++--
 drivers/hwmon/max1668.c                         |  2 +-
 drivers/hwmon/w83627hf.c                        |  2 +-
 drivers/hwmon/w83781d.c                         |  4 ++--
 drivers/hwmon/w83791d.c                         |  4 ++--
 drivers/hwmon/w83792d.c                         |  2 +-
 drivers/hwmon/w83793.c                          |  2 +-
 drivers/hwmon/w83795.c                          |  2 +-
 drivers/hwmon/w83l786ng.c                       |  2 +-
 drivers/i2c/busses/i2c-highlander.c             |  2 +-
 drivers/i2c/busses/i2c-ibm_iic.c                |  4 ++--
 drivers/i2c/busses/i2c-sis630.c                 |  4 ++--
 drivers/i2c/busses/i2c-viapro.c                 |  2 +-
 drivers/ide/ali14xx.c                           |  2 +-
 drivers/ide/cmd640.c                            |  2 +-
 drivers/ide/dtc2278.c                           |  2 +-
 drivers/ide/gayle.c                             |  2 +-
 drivers/ide/ht6560b.c                           |  2 +-
 drivers/ide/ide-4drives.c                       |  2 +-
 drivers/ide/ide-acpi.c                          |  6 +++---
 drivers/ide/ide-pci-generic.c                   |  2 +-
 drivers/ide/qd65xx.c                            |  2 +-
 drivers/ide/umc8672.c                           |  2 +-
 drivers/infiniband/hw/ehca/ehca_classes.h       |  4 ++--
 drivers/infiniband/hw/ehca/ehca_main.c          |  8 ++++----
 drivers/infiniband/hw/nes/nes.c                 |  2 +-
 drivers/input/joystick/xpad.c                   |  6 +++---
 drivers/input/misc/wistron_btns.c               |  2 +-
 drivers/input/mouse/psmouse-base.c              |  2 +-
 drivers/input/mouse/synaptics_i2c.c             |  6 +++---
 drivers/input/serio/hp_sdc.c                    |  2 +-
 drivers/input/touchscreen/eeti_ts.c             |  4 ++--
 drivers/input/touchscreen/htcpen.c              |  4 ++--
 drivers/input/touchscreen/ucb1400_ts.c          |  2 +-
 drivers/input/touchscreen/usbtouchscreen.c      |  4 ++--
 drivers/isdn/hardware/avm/b1dma.c               |  2 +-
 drivers/isdn/hardware/avm/c4.c                  |  2 +-
 drivers/isdn/sc/init.c                          |  2 +-
 drivers/leds/leds-clevo-mail.c                  |  2 +-
 drivers/leds/leds-ss4200.c                      |  2 +-
 drivers/macintosh/ams/ams-core.c                |  2 +-
 drivers/macintosh/ams/ams-input.c               |  4 ++--
 drivers/macintosh/therm_adt746x.c               |  2 +-
 drivers/media/dvb/dvb-usb/af9005.c              |  2 +-
 drivers/media/dvb/dvb-usb/af9005.h              |  2 +-
 drivers/media/radio/radio-gemtek.c              | 10 +++++-----
 drivers/media/radio/radio-miropcm20.c           |  2 +-
 drivers/media/rc/lirc_dev.c                     |  2 +-
 drivers/media/rc/mceusb.c                       |  4 ++--
 drivers/media/rc/streamzap.c                    |  4 ++--
 drivers/media/rc/winbond-cir.c                  |  4 ++--
 drivers/media/video/c-qcam.c                    |  2 +-
 drivers/media/video/cs5345.c                    |  2 +-
 drivers/media/video/cs53l32a.c                  |  2 +-
 drivers/media/video/cx18/cx18-driver.c          |  2 +-
 drivers/media/video/cx25821/cx25821-alsa.c      |  2 +-
 drivers/media/video/cx88/cx88-alsa.c            |  2 +-
 drivers/media/video/gspca/m5602/m5602_core.c    |  4 ++--
 drivers/media/video/gspca/m5602/m5602_mt9m111.h |  2 +-
 drivers/media/video/gspca/m5602/m5602_ov7660.h  |  2 +-
 drivers/media/video/gspca/m5602/m5602_ov9650.h  |  2 +-
 drivers/media/video/gspca/m5602/m5602_po1030.h  |  2 +-
 drivers/media/video/gspca/m5602/m5602_s5k4aa.h  |  2 +-
 drivers/media/video/gspca/m5602/m5602_s5k83a.h  |  2 +-
 drivers/media/video/gspca/stv06xx/stv06xx.c     |  4 ++--
 drivers/media/video/hdpvr/hdpvr-core.c          |  2 +-
 drivers/media/video/ivtv/ivtv-driver.c          |  2 +-
 drivers/media/video/ivtv/ivtvfb.c               |  2 +-
 drivers/media/video/marvell-ccic/mcam-core.c    |  6 +++---
 drivers/media/video/msp3400-driver.c            |  6 +++---
 drivers/media/video/msp3400-driver.h            |  6 +++---
 drivers/media/video/omap/omap_vout.c            |  6 +++---
 drivers/media/video/omap/omap_vout_vrfb.c       |  2 +-
 drivers/media/video/ov7670.c                    |  2 +-
 drivers/media/video/saa7115.c                   |  2 +-
 drivers/media/video/stk-webcam.c                |  4 ++--
 drivers/media/video/tm6000/tm6000-alsa.c        |  2 +-
 drivers/media/video/tvp514x.c                   |  2 +-
 drivers/media/video/tvp7002.c                   |  2 +-
 drivers/media/video/upd64083.c                  |  2 +-
 drivers/media/video/via-camera.c                |  4 ++--
 drivers/media/video/zoran/zoran_device.c        |  2 +-
 drivers/media/video/zoran/zr36060.c             |  2 +-
 drivers/memstick/host/jmb38x_ms.c               |  2 +-
 drivers/memstick/host/r592.c                    |  2 +-
 drivers/memstick/host/tifm_ms.c                 |  2 +-
 drivers/misc/iwmc3200top/main.c                 | 12 ++++++------
 drivers/mmc/core/core.c                         |  6 +++---
 drivers/mmc/core/core.h                         |  2 +-
 drivers/mmc/host/tifm_sd.c                      |  4 ++--
 drivers/mmc/host/vub300.c                       | 10 +++++-----
 drivers/mtd/nand/pxa3xx_nand.c                  |  2 +-
 drivers/mtd/nand/r852.c                         |  2 +-
 drivers/parport/parport_ip32.c                  |  2 +-
 drivers/pci/hotplug/acpi_pcihp.c                |  2 +-
 drivers/pci/hotplug/acpiphp_core.c              |  2 +-
 drivers/pci/hotplug/acpiphp_ibm.c               |  2 +-
 drivers/pci/hotplug/cpcihp_zt5550.c             |  4 ++--
 drivers/pci/hotplug/cpqphp_core.c               |  4 ++--
 drivers/pci/hotplug/ibmphp_core.c               |  2 +-
 drivers/pci/hotplug/pci_hotplug_core.c          |  2 +-
 drivers/pci/hotplug/pciehp.h                    |  6 +++---
 drivers/pci/hotplug/pciehp_core.c               |  6 +++---
 drivers/pci/hotplug/pcihp_skeleton.c            |  2 +-
 drivers/pci/hotplug/rpaphp.h                    |  2 +-
 drivers/pci/hotplug/rpaphp_core.c               |  2 +-
 drivers/pci/hotplug/shpchp.h                    |  4 ++--
 drivers/pci/hotplug/shpchp_core.c               |  4 ++--
 drivers/pci/pcie/aer/aer_inject.c               |  2 +-
 drivers/pci/pcie/aer/aerdrv_core.c              |  4 ++--
 drivers/pcmcia/yenta_socket.c                   |  6 +++---
 drivers/platform/x86/compal-laptop.c            |  2 +-
 drivers/platform/x86/intel_oaktrail.c           |  2 +-
 drivers/platform/x86/msi-laptop.c               |  2 +-
 drivers/platform/x86/samsung-laptop.c           |  4 ++--
 drivers/platform/x86/thinkpad_acpi.c            | 16 ++++++++--------
 drivers/platform/x86/wmi.c                      |  4 ++--
 drivers/power/ds2760_battery.c                  |  2 +-
 drivers/s390/char/raw3270.c                     |  2 +-
 drivers/s390/char/vmwatchdog.c                  |  4 ++--
 drivers/scsi/aha1542.c                          |  2 +-
 drivers/scsi/dc395x.c                           |  2 +-
 drivers/scsi/nsp32.c                            |  4 ++--
 drivers/scsi/pcmcia/nsp_cs.c                    |  2 +-
 drivers/staging/comedi/comedi_fops.c            |  2 +-
 drivers/staging/comedi/comedi_fops.h            |  3 ++-
 drivers/staging/media/go7007/snd-go7007.c       |  2 +-
 drivers/staging/media/lirc/lirc_bt829.c         |  2 +-
 drivers/staging/media/lirc/lirc_igorplugusb.c   |  4 ++--
 drivers/staging/media/lirc/lirc_parallel.c      |  4 ++--
 drivers/staging/media/lirc/lirc_serial.c        | 10 +++++-----
 drivers/staging/media/lirc/lirc_sir.c           |  2 +-
 drivers/staging/media/lirc/lirc_zilog.c         |  4 ++--
 drivers/staging/quatech_usb2/quatech_usb2.c     |  2 +-
 drivers/staging/serqt_usb2/serqt_usb2.c         |  2 +-
 drivers/staging/speakup/speakup.h               |  2 +-
 drivers/staging/speakup/synth.c                 |  2 +-
 drivers/staging/vme/bridges/vme_tsi148.c        |  2 +-
 drivers/tty/rocket.c                            |  2 +-
 drivers/tty/synclink.c                          |  2 +-
 drivers/tty/synclinkmp.c                        |  2 +-
 drivers/usb/atm/speedtch.c                      |  6 +++---
 drivers/usb/atm/ueagle-atm.c                    |  2 +-
 drivers/usb/core/devio.c                        |  2 +-
 drivers/usb/core/hub.c                          |  8 ++++----
 drivers/usb/core/usb.c                          |  2 +-
 drivers/usb/gadget/amd5536udc.c                 |  8 ++++----
 drivers/usb/gadget/ether.c                      |  4 ++--
 drivers/usb/gadget/file_storage.c               | 10 +++++-----
 drivers/usb/gadget/net2272.c                    |  2 +-
 drivers/usb/gadget/net2280.c                    |  6 +++---
 drivers/usb/gadget/omap_udc.c                   |  2 +-
 drivers/usb/gadget/pch_udc.c                    |  2 +-
 drivers/usb/gadget/serial.c                     |  4 ++--
 drivers/usb/gadget/zero.c                       |  2 +-
 drivers/usb/host/ehci-hcd.c                     |  2 +-
 drivers/usb/host/ohci-hcd.c                     |  4 ++--
 drivers/usb/host/oxu210hp-hcd.c                 |  2 +-
 drivers/usb/host/u132-hcd.c                     |  2 +-
 drivers/usb/host/uhci-hcd.c                     |  2 +-
 drivers/usb/misc/ftdi-elan.c                    |  2 +-
 drivers/usb/misc/iowarrior.c                    |  2 +-
 drivers/usb/musb/cppi_dma.c                     |  2 +-
 drivers/usb/musb/musb_core.c                    |  2 +-
 drivers/usb/serial/aircable.c                   |  2 +-
 drivers/usb/serial/ark3116.c                    |  2 +-
 drivers/usb/serial/belkin_sa.c                  |  2 +-
 drivers/usb/serial/ch341.c                      |  2 +-
 drivers/usb/serial/cp210x.c                     |  2 +-
 drivers/usb/serial/cyberjack.c                  |  2 +-
 drivers/usb/serial/cypress_m8.c                 |  6 +++---
 drivers/usb/serial/digi_acceleport.c            |  2 +-
 drivers/usb/serial/empeg.c                      |  2 +-
 drivers/usb/serial/ftdi_sio.c                   |  2 +-
 drivers/usb/serial/funsoft.c                    |  2 +-
 drivers/usb/serial/garmin_gps.c                 |  2 +-
 drivers/usb/serial/io_edgeport.c                |  2 +-
 drivers/usb/serial/io_ti.c                      |  4 ++--
 drivers/usb/serial/ipaq.c                       |  2 +-
 drivers/usb/serial/ipw.c                        |  2 +-
 drivers/usb/serial/ir-usb.c                     |  2 +-
 drivers/usb/serial/iuu_phoenix.c                |  6 +++---
 drivers/usb/serial/keyspan.c                    |  2 +-
 drivers/usb/serial/keyspan_pda.c                |  2 +-
 drivers/usb/serial/kl5kusb105.c                 |  2 +-
 drivers/usb/serial/mct_u232.c                   |  2 +-
 drivers/usb/serial/mos7720.c                    |  2 +-
 drivers/usb/serial/mos7840.c                    |  2 +-
 drivers/usb/serial/navman.c                     |  2 +-
 drivers/usb/serial/omninet.c                    |  2 +-
 drivers/usb/serial/opticon.c                    |  2 +-
 drivers/usb/serial/option.c                     |  2 +-
 drivers/usb/serial/oti6858.c                    |  2 +-
 drivers/usb/serial/pl2303.c                     |  2 +-
 drivers/usb/serial/qcserial.c                   |  2 +-
 drivers/usb/serial/safe_serial.c                |  6 +++---
 drivers/usb/serial/sierra.c                     |  4 ++--
 drivers/usb/serial/spcp8x5.c                    |  2 +-
 drivers/usb/serial/ssu100.c                     |  2 +-
 drivers/usb/serial/symbolserial.c               |  2 +-
 drivers/usb/serial/ti_usb_3410_5052.c           |  2 +-
 drivers/usb/serial/usb-serial.c                 |  2 +-
 drivers/usb/serial/usb_wwan.c                   |  2 +-
 drivers/usb/serial/visor.c                      |  2 +-
 drivers/usb/serial/whiteheat.c                  |  2 +-
 drivers/video/aty/atyfb_base.c                  |  4 ++--
 drivers/video/aty/radeon_base.c                 | 18 +++++++++---------
 drivers/video/cirrusfb.c                        |  2 +-
 drivers/video/hgafb.c                           |  2 +-
 drivers/video/intelfb/intelfbdrv.c              | 16 ++++++++--------
 drivers/video/logo/logo.c                       |  2 +-
 drivers/video/neofb.c                           | 10 +++++-----
 drivers/video/omap/omapfb_main.c                |  4 ++--
 drivers/video/omap2/dss/core.c                  |  2 +-
 drivers/video/omap2/dss/dsi.c                   |  4 ++--
 drivers/video/omap2/dss/dss.h                   |  2 +-
 drivers/video/omap2/omapfb/omapfb-main.c        |  8 ++++----
 drivers/video/omap2/omapfb/omapfb.h             |  2 +-
 drivers/video/pm2fb.c                           |  8 ++++----
 drivers/video/pm3fb.c                           |  4 ++--
 drivers/video/riva/fbdev.c                      |  6 +++---
 drivers/video/smscufx.c                         |  4 ++--
 drivers/video/sstfb.c                           |  6 +++---
 drivers/video/tdfxfb.c                          |  2 +-
 drivers/video/udlfb.c                           |  6 +++---
 drivers/video/uvesafb.c                         |  6 +++---
 drivers/video/vfb.c                             |  2 +-
 drivers/watchdog/f71808e_wdt.c                  |  2 +-
 drivers/watchdog/mpc8xxx_wdt.c                  |  2 +-
 drivers/xen/xen-pciback/conf_space.c            |  2 +-
 drivers/xen/xen-pciback/xenbus.c                |  2 +-
 fs/lockd/mon.c                                  |  2 +-
 fs/nfs/client.c                                 |  2 +-
 fs/nfs/inode.c                                  |  2 +-
 include/acpi/acpixf.h                           |  2 +-
 include/acpi/apei.h                             |  4 ++--
 include/linux/console.h                         |  2 +-
 include/linux/lockd/lockd.h                     |  2 +-
 include/linux/mmc/host.h                        |  2 +-
 security/apparmor/include/apparmor.h            | 10 +++++-----
 security/apparmor/lsm.c                         | 12 ++++++------
 virt/kvm/iommu.c                                |  2 +-
 283 files changed, 471 insertions(+), 465 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c
index cb423f5aef24..c339a0880e6e 100644
--- a/drivers/accessibility/braille/braille_console.c
+++ b/drivers/accessibility/braille/braille_console.c
@@ -44,7 +44,7 @@ MODULE_LICENSE("GPL");
  */
 
 /* Emit various sounds */
-static int sound;
+static bool sound;
 module_param(sound, bool, 0);
 MODULE_PARM_DESC(sound, "emit sounds");
 
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 76dc02f15574..e6652d716e45 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -108,7 +108,7 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
 /*
  * Optionally enable output from the AML Debug Object.
  */
-u32 ACPI_INIT_GLOBAL(acpi_gbl_enable_aml_debug_object, FALSE);
+bool ACPI_INIT_GLOBAL(acpi_gbl_enable_aml_debug_object, FALSE);
 
 /*
  * Optionally copy the entire DSDT to local memory (instead of simply
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index b8e08cb67a18..ebaf037a787b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -118,7 +118,7 @@ struct ghes_estatus_cache {
 	struct rcu_head rcu;
 };
 
-int ghes_disable;
+bool ghes_disable;
 module_param_named(disable, ghes_disable, bool, 0);
 
 static int ghes_panic_timeout	__read_mostly = 30;
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 05fee06f4d6e..ee7fddc4665c 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -41,7 +41,7 @@
 
 #define HEST_PFX "HEST: "
 
-int hest_disable;
+bool hest_disable;
 EXPORT_SYMBOL_GPL(hest_disable);
 
 /* HEST table parsing */
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index 19a61136d848..88eb14304667 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -43,7 +43,7 @@ MODULE_AUTHOR("Kristen Carlson Accardi");
 MODULE_DESCRIPTION(ACPI_DOCK_DRIVER_DESCRIPTION);
 MODULE_LICENSE("GPL");
 
-static int immediate_undock = 1;
+static bool immediate_undock = 1;
 module_param(immediate_undock, bool, 0644);
 MODULE_PARM_DESC(immediate_undock, "1 (default) will cause the driver to "
 	"undock immediately when the undock button is pressed, 0 will cause"
diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
index 07f7fea8a4e2..e50e31a518af 100644
--- a/drivers/acpi/pci_slot.c
+++ b/drivers/acpi/pci_slot.c
@@ -34,7 +34,7 @@
 #include <acpi/acpi_drivers.h>
 #include <linux/dmi.h>
 
-static int debug;
+static bool debug;
 static int check_sta_before_sun;
 
 #define DRIVER_VERSION 	"0.1"
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 08a44b532f7c..eaef02afc7cf 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -69,21 +69,21 @@ MODULE_AUTHOR("Bruno Ducrot");
 MODULE_DESCRIPTION("ACPI Video Driver");
 MODULE_LICENSE("GPL");
 
-static int brightness_switch_enabled = 1;
+static bool brightness_switch_enabled = 1;
 module_param(brightness_switch_enabled, bool, 0644);
 
 /*
  * By default, we don't allow duplicate ACPI video bus devices
  * under the same VGA controller
  */
-static int allow_duplicates;
+static bool allow_duplicates;
 module_param(allow_duplicates, bool, 0644);
 
 /*
  * Some BIOSes claim they use minimum backlight at boot,
  * and this may bring dimming screen after boot
  */
-static int use_bios_initial_backlight = 1;
+static bool use_bios_initial_backlight = 1;
 module_param(use_bios_initial_backlight, bool, 0644);
 
 static int register_count = 0;
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index e0bc9646a38e..55d6179dde58 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -599,9 +599,9 @@ MODULE_LICENSE("GPL");
 MODULE_DEVICE_TABLE(pci, nv_pci_tbl);
 MODULE_VERSION(DRV_VERSION);
 
-static int adma_enabled;
-static int swncq_enabled = 1;
-static int msi_enabled;
+static bool adma_enabled;
+static bool swncq_enabled = 1;
+static bool msi_enabled;
 
 static void nv_adma_register_mode(struct ata_port *ap)
 {
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 1e9140626a83..e7e610aa9a7a 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -417,7 +417,7 @@ static struct ata_port_operations sil24_ops = {
 #endif
 };
 
-static int sata_sil24_msi;    /* Disable MSI */
+static bool sata_sil24_msi;    /* Disable MSI */
 module_param_named(msi, sata_sil24_msi, bool, S_IRUGO);
 MODULE_PARM_DESC(msi, "Enable MSI (Default: false)");
 
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 9a51df4f5b74..b182c2f7d777 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -112,12 +112,12 @@ static u8 read_prom_byte(struct he_dev *he_dev, int addr);
 /* globals */
 
 static struct he_dev *he_devs;
-static int disable64;
+static bool disable64;
 static short nvpibits = -1;
 static short nvcibits = -1;
 static short rx_skb_reserve = 16;
-static int irq_coalesce = 1;
-static int sdh = 0;
+static bool irq_coalesce = 1;
+static bool sdh = 0;
 
 /* Read from EEPROM = 0000 0011b */
 static unsigned int readtab[] = {
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 9cf20355ceec..8d680562ba73 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -59,8 +59,8 @@
 
 /* module parameter, defined in drbd_main.c */
 extern unsigned int minor_count;
-extern int disable_sendpage;
-extern int allow_oos;
+extern bool disable_sendpage;
+extern bool allow_oos;
 extern unsigned int cn_idx;
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 0358e55356c8..211fc44f84be 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -117,8 +117,8 @@ module_param(fault_devs, int, 0644);
 
 /* module parameter, defined */
 unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
-int disable_sendpage;
-int allow_oos;
+bool disable_sendpage;
+bool allow_oos;
 unsigned int cn_idx = CN_IDX_DRBD;
 int proc_details;       /* Detail level in proc drbd*/
 
diff --git a/drivers/block/paride/bpck6.c b/drivers/block/paride/bpck6.c
index ad124525ac23..ec64e7f5d1ce 100644
--- a/drivers/block/paride/bpck6.c
+++ b/drivers/block/paride/bpck6.c
@@ -20,9 +20,6 @@
 */
 
 
-/* PARAMETERS */
-static int verbose; /* set this to 1 to see debugging messages and whatnot */
-
 #define BACKPACK_VERSION "2.0.2"
 
 #include <linux/module.h>
@@ -36,6 +33,8 @@ static int verbose; /* set this to 1 to see debugging messages and whatnot */
 #include "ppc6lnx.c"
 #include "paride.h"
 
+/* PARAMETERS */
+static bool verbose; /* set this to 1 to see debugging messages and whatnot */
  
 
 #define PPCSTRUCT(pi) ((Interface *)(pi->private))
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 869e7676d46f..831e3ac156e6 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -124,8 +124,9 @@
    by default.
 
 */
+#include <linux/types.h>
 
-static int verbose = 0;
+static bool verbose = 0;
 static int major = PD_MAJOR;
 static char *name = PD_NAME;
 static int cluster = 64;
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f21b520ef419..ec8f9ed6326e 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -118,13 +118,15 @@
 #define PF_NAME		"pf"
 #define PF_UNITS	4
 
+#include <linux/types.h>
+
 /* Here are things one can override from the insmod command.
    Most are autoprobed by paride unless set here.  Verbose is off
    by default.
 
 */
 
-static int verbose = 0;
+static bool verbose = 0;
 static int major = PF_MAJOR;
 static char *name = PF_NAME;
 static int cluster = 64;
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index a79fb4f7ff62..4a27b1de5fcb 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -130,13 +130,14 @@
 #define PI_PG	4
 #endif
 
+#include <linux/types.h>
 /* Here are things one can override from the insmod command.
    Most are autoprobed by paride unless set here.  Verbose is 0
    by default.
 
 */
 
-static int verbose = 0;
+static bool verbose = 0;
 static int major = PG_MAJOR;
 static char *name = PG_NAME;
 static int disable = 0;
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 7179f79d7468..2596042eb987 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -109,13 +109,15 @@
 #define PT_NAME		"pt"
 #define PT_UNITS	4
 
+#include <linux/types.h>
+
 /* Here are things one can override from the insmod command.
    Most are autoprobed by paride unless set here.  Verbose is on
    by default.
 
 */
 
-static int verbose = 0;
+static bool verbose = 0;
 static int major = PT_MAJOR;
 static char *name = PT_NAME;
 static int disable = 0;
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 4abd2bcd20fb..51a972704db5 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -148,7 +148,7 @@ static volatile int xdc_busy;
 static struct timer_list xd_watchdog_int;
 
 static volatile u_char xd_error;
-static int nodma = XD_DONT_USE_DMA;
+static bool nodma = XD_DONT_USE_DMA;
 
 static struct request_queue *xd_queue;
 
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 55ac349695c4..f00f596c1029 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -37,13 +37,13 @@
 
 #define VERSION "0.6"
 
-static int ignore_dga;
-static int ignore_csr;
-static int ignore_sniffer;
-static int disable_scofix;
-static int force_scofix;
+static bool ignore_dga;
+static bool ignore_csr;
+static bool ignore_sniffer;
+static bool disable_scofix;
+static bool force_scofix;
 
-static int reset = 1;
+static bool reset = 1;
 
 static struct usb_driver btusb_driver;
 
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index 9c5b2dc38e29..a767d4de45a4 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -49,8 +49,8 @@
 
 #define VERSION "0.3"
 
-static int txcrc = 1;
-static int hciextn = 1;
+static bool txcrc = 1;
+static bool hciextn = 1;
 
 #define BCSP_TXWINSIZE	4
 
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 48ad2a7ab080..07114489994f 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -48,7 +48,7 @@
 
 #define VERSION "2.2"
 
-static int reset = 0;
+static bool reset = 0;
 
 static struct hci_uart_proto *hup[HCI_UART_MAX_PROTO];
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 2118211aff99..1bbf7645a97c 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -285,17 +285,17 @@
 #include <asm/uaccess.h>
 
 /* used to tell the module to turn on full debugging messages */
-static int debug;
+static bool debug;
 /* used to keep tray locked at all times */
 static int keeplocked;
 /* default compatibility mode */
-static int autoclose=1;
-static int autoeject;
-static int lockdoor = 1;
+static bool autoclose=1;
+static bool autoeject;
+static bool lockdoor = 1;
 /* will we ever get to use this... sigh. */
-static int check_media_type;
+static bool check_media_type;
 /* automatically restart mrw format */
-static int mrw_format_restart = 1;
+static bool mrw_format_restart = 1;
 module_param(debug, bool, 0);
 module_param(autoclose, bool, 0);
 module_param(autoeject, bool, 0);
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 780498d76581..444f8b6ab411 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -33,7 +33,7 @@
 #define ULI_X86_64_ENU_SCR_REG		0x54
 
 static struct resource *aperture_resource;
-static int __initdata agp_try_unsupported = 1;
+static bool __initdata agp_try_unsupported = 1;
 static int agp_bridges_found;
 
 static void amd64_tlbflush(struct agp_memory *temp)
diff --git a/drivers/char/agp/sis-agp.c b/drivers/char/agp/sis-agp.c
index 29aacd81de78..08704ae53956 100644
--- a/drivers/char/agp/sis-agp.c
+++ b/drivers/char/agp/sis-agp.c
@@ -17,7 +17,7 @@
 #define PCI_DEVICE_ID_SI_662	0x0662
 #define PCI_DEVICE_ID_SI_671	0x0671
 
-static int __devinitdata agp_sis_force_delay = 0;
+static bool __devinitdata agp_sis_force_delay = 0;
 static int __devinitdata agp_sis_agp_spec = -1;
 
 static int sis_fetch_size(void)
diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index 6e40072fbf67..40cc0cf2ded6 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -69,19 +69,19 @@ MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)");
 MODULE_DESCRIPTION("Driver for accessing SMM BIOS on Dell laptops");
 MODULE_LICENSE("GPL");
 
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Force loading without checking for supported models");
 
-static int ignore_dmi;
+static bool ignore_dmi;
 module_param(ignore_dmi, bool, 0);
 MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match");
 
-static int restricted;
+static bool restricted;
 module_param(restricted, bool, 0);
 MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
 
-static int power_status;
+static bool power_status;
 module_param(power_status, bool, 0600);
 MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
 
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 9397ab49b72e..50fcf9c04569 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1227,7 +1227,7 @@ static int smi_num; /* Used to sequence the SMIs */
 #define DEFAULT_REGSPACING	1
 #define DEFAULT_REGSIZE		1
 
-static int           si_trydefaults = 1;
+static bool          si_trydefaults = 1;
 static char          *si_type[SI_MAX_PARMS];
 #define MAX_SI_TYPE_STR 30
 static char          si_type_str[MAX_SI_TYPE_STR];
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 97c3edb95ae7..f43485607063 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -829,7 +829,7 @@ static struct console lpcons = {
 
 static int parport_nr[LP_NO] = { [0 ... LP_NO-1] = LP_PARPORT_UNSPEC };
 static char *parport[LP_NO];
-static int reset;
+static bool reset;
 
 module_param_array(parport, charp, NULL, 0);
 module_param(reset, bool, 0);
diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c
index a12f52400dbc..bf586ae1ee83 100644
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -51,7 +51,7 @@ static int write_block(unsigned long p, const char __user *buf, int count);
 #define KFLASH_ID	0x89A6		//Intel flash
 #define KFLASH_ID4	0xB0D4		//Intel flash 4Meg
 
-static int flashdebug;		//if set - we will display progress msgs
+static bool flashdebug;		//if set - we will display progress msgs
 
 static int gbWriteEnable;
 static int gbWriteBase64Enable;
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 15781396af25..07f6a5abe372 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -439,7 +439,7 @@ static int mgslpc_device_count = 0;
  * .text section address and breakpoint on module load.
  * This is useful for use with gdb and add-symbol-file command.
  */
-static int break_on_load=0;
+static bool break_on_load=0;
 
 /*
  * Driver major number, defaults to zero to get auto
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 85da8740586b..732215b805c1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -387,7 +387,7 @@ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
 static struct fasync_struct *fasync;
 
 #if 0
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 #define DEBUG_ENT(fmt, arg...) do { \
 	if (debug) \
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 10cc44ceb5d1..a1748621111b 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -255,7 +255,7 @@ out:
 	return size;
 }
 
-static int itpm;
+static bool itpm;
 module_param(itpm, bool, 0444);
 MODULE_PARM_DESC(itpm, "Force iTPM workarounds (found on some Lenovo laptops)");
 
@@ -500,7 +500,7 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int interrupts = 1;
+static bool interrupts = 1;
 module_param(interrupts, bool, 0444);
 MODULE_PARM_DESC(interrupts, "Enable interrupts");
 
@@ -828,7 +828,7 @@ static struct platform_driver tis_drv = {
 
 static struct platform_device *pdev;
 
-static int force;
+static bool force;
 module_param(force, bool, 0444);
 MODULE_PARM_DESC(force, "Force device probe rather than using ACPI entry");
 static int __init init_tis(void)
diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c
index b153674431f1..e294e1b3616c 100644
--- a/drivers/edac/r82600_edac.c
+++ b/drivers/edac/r82600_edac.c
@@ -131,7 +131,7 @@ struct r82600_error_info {
 	u32 eapr;
 };
 
-static unsigned int disable_hardware_scrub;
+static bool disable_hardware_scrub;
 
 static struct edac_pci_ctl_info *r82600_pci;
 
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 68375bc3aef6..80e95aa3bf14 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -66,7 +66,7 @@
  *
  * Concurrent logins are useful together with cluster filesystems.
  */
-static int sbp2_param_exclusive_login = 1;
+static bool sbp2_param_exclusive_login = 1;
 module_param_named(exclusive_login, sbp2_param_exclusive_login, bool, 0644);
 MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
 		 "(default = Y, use N for concurrent initiators)");
diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c
index f779009104eb..b71b77ab0dc7 100644
--- a/drivers/hid/hid-prodikeys.c
+++ b/drivers/hid/hid-prodikeys.c
@@ -90,7 +90,7 @@ static const char longname[] = "Prodikeys PC-MIDI Keyboard";
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
-static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
 
 module_param_array(index, int, NULL, 0444);
 module_param_array(id, charp, NULL, 0444);
diff --git a/drivers/hwmon/abituguru.c b/drivers/hwmon/abituguru.c
index 65a35cf5b3c5..3b728e8f169b 100644
--- a/drivers/hwmon/abituguru.c
+++ b/drivers/hwmon/abituguru.c
@@ -145,7 +145,7 @@ static const u8 abituguru_pwm_max[5] = { 0, 255, 255, 75, 75 };
 
 
 /* Insmod parameters */
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Set to one to force detection.");
 static int bank1_types[ABIT_UGURU_MAX_BANK1_SENSORS] = { -1, -1, -1, -1, -1,
diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c
index d30855a75786..34a14a77e008 100644
--- a/drivers/hwmon/abituguru3.c
+++ b/drivers/hwmon/abituguru3.c
@@ -603,11 +603,11 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = {
 
 
 /* Insmod parameters */
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Set to one to force detection.");
 /* Default verbose is 1, since this driver is still in the testing phase */
-static int verbose = 1;
+static bool verbose = 1;
 module_param(verbose, bool, 0644);
 MODULE_PARM_DESC(verbose, "Enable/disable verbose error reporting");
 
diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c
index 522860ab6ce8..554f046bcf20 100644
--- a/drivers/hwmon/acpi_power_meter.c
+++ b/drivers/hwmon/acpi_power_meter.c
@@ -58,7 +58,7 @@ ACPI_MODULE_NAME(ACPI_POWER_METER_NAME);
 #define POWER_ALARM_NAME	"power1_alarm"
 
 static int cap_in_hardware;
-static int force_cap_on;
+static bool force_cap_on;
 
 static int can_cap_in_hardware(void)
 {
diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index 1ad0a885c5a5..0158cc35cb2e 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -103,7 +103,7 @@ static int adm1021_remove(struct i2c_client *client);
 static struct adm1021_data *adm1021_update_device(struct device *dev);
 
 /* (amalysh) read only mode, otherwise any limit's writing confuse BIOS */
-static int read_only;
+static bool read_only;
 
 
 static const struct i2c_device_id adm1021_id[] = {
diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index cfcc3b6fb6bf..ed60242d6a0a 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -48,8 +48,8 @@ static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b,
 	I2C_CLIENT_END };
 
 /* Module parameters */
-static int se_input = 1; /* Default is SE, 0 == diff */
-static int int_vref = 1; /* Default is internal ref ON */
+static bool se_input = 1; /* Default is SE, 0 == diff */
+static bool int_vref = 1; /* Default is internal ref ON */
 static int vref_mv = ADS7828_INT_VREF_MV; /* set if vref != 2.5V */
 module_param(se_input, bool, S_IRUGO);
 module_param(int_vref, bool, S_IRUGO);
diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index d9803958e49f..ffb229af7861 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -45,7 +45,7 @@
 static struct platform_device *pdev;
 
 /* Module load parameters */
-static int force_start;
+static bool force_start;
 module_param(force_start, bool, 0);
 MODULE_PARM_DESC(force_start, "Force the chip to start monitoring inputs");
 
@@ -53,7 +53,7 @@ static unsigned short force_id;
 module_param(force_id, ushort, 0);
 MODULE_PARM_DESC(force_id, "Override the detected device ID");
 
-static int probe_all_addr;
+static bool probe_all_addr;
 module_param(probe_all_addr, bool, 0);
 MODULE_PARM_DESC(probe_all_addr, "Include probing of non-standard LPC "
 		 "addresses");
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 38c0b87676de..603ef2af2707 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -146,10 +146,10 @@ static inline void superio_exit(void)
 #define IT87_SIO_BEEP_PIN_REG	0xf6	/* Beep pin mapping */
 
 /* Update battery voltage after every reading if true */
-static int update_vbat;
+static bool update_vbat;
 
 /* Not all BIOSes properly configure the PWM registers */
-static int fix_pwm_polarity;
+static bool fix_pwm_polarity;
 
 /* Many IT87 constants specified below */
 
diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c
index 3b43df418613..8bd6c5c9e05b 100644
--- a/drivers/hwmon/lm93.c
+++ b/drivers/hwmon/lm93.c
@@ -151,12 +151,12 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
 /* Insmod parameters */
 
-static int disable_block;
+static bool disable_block;
 module_param(disable_block, bool, 0);
 MODULE_PARM_DESC(disable_block,
 	"Set to non-zero to disable SMBus block data transactions.");
 
-static int init;
+static bool init;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to non-zero to force chip initialization.");
 
diff --git a/drivers/hwmon/max1668.c b/drivers/hwmon/max1668.c
index 6914195cfd35..88953f99e914 100644
--- a/drivers/hwmon/max1668.c
+++ b/drivers/hwmon/max1668.c
@@ -59,7 +59,7 @@ static unsigned short max1668_addr_list[] = {
 #define DEV_ID_MAX1989		0xb
 
 /* read only mode module parameter */
-static int read_only;
+static bool read_only;
 module_param(read_only, bool, 0);
 MODULE_PARM_DESC(read_only, "Don't set any values, read only mode");
 
diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c
index bde50e34d013..374118f2b9f9 100644
--- a/drivers/hwmon/w83627hf.c
+++ b/drivers/hwmon/w83627hf.c
@@ -71,7 +71,7 @@ module_param(force_i2c, byte, 0);
 MODULE_PARM_DESC(force_i2c,
 		 "Initialize the i2c address of the sensors");
 
-static int init = 1;
+static bool init = 1;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to zero to bypass chip initialization");
 
diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c
index 65b685e2c7b7..17a8fa2d9ae9 100644
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -67,11 +67,11 @@ module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
 		    "{bus, clientaddr, subclientaddr1, subclientaddr2}");
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to one to reset chip on load");
 
-static int init = 1;
+static bool init = 1;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to zero to bypass chip initialization");
 
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index 6e5d0ae594b0..35aa5149307a 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -58,11 +58,11 @@ module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
 			"{bus, clientaddr, subclientaddr1, subclientaddr2}");
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to one to force a hardware chip reset");
 
-static int init;
+static bool init;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to one to force extra software initialization");
 
diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c
index 9ded133e43f0..d3100eab6b2f 100644
--- a/drivers/hwmon/w83792d.c
+++ b/drivers/hwmon/w83792d.c
@@ -56,7 +56,7 @@ module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
 			"{bus, clientaddr, subclientaddr1, subclientaddr2}");
 
-static int init;
+static bool init;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to one to force chip initialization");
 
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index 3cc6fef22087..45ec7e7c3c27 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -61,7 +61,7 @@ module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
 		       "{bus, clientaddr, subclientaddr1, subclientaddr2}");
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to 1 to reset chip, not recommended");
 
diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c
index 3ee398d0e4c9..aa58b25565bc 100644
--- a/drivers/hwmon/w83795.c
+++ b/drivers/hwmon/w83795.c
@@ -42,7 +42,7 @@ static const unsigned short normal_i2c[] = {
 };
 
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to 1 to reset chip, not recommended");
 
diff --git a/drivers/hwmon/w83l786ng.c b/drivers/hwmon/w83l786ng.c
index 0254e181893d..063bd9508d8a 100644
--- a/drivers/hwmon/w83l786ng.c
+++ b/drivers/hwmon/w83l786ng.c
@@ -39,7 +39,7 @@ static const unsigned short normal_i2c[] = { 0x2e, 0x2f, I2C_CLIENT_END };
 
 /* Insmod parameters */
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to 1 to reset chip, not recommended");
 
diff --git a/drivers/i2c/busses/i2c-highlander.c b/drivers/i2c/busses/i2c-highlander.c
index 63bb1cc2a042..fa88868cb556 100644
--- a/drivers/i2c/busses/i2c-highlander.c
+++ b/drivers/i2c/busses/i2c-highlander.c
@@ -52,7 +52,7 @@ struct highlander_i2c_dev {
 	size_t			buf_len;
 };
 
-static int iic_force_poll, iic_force_normal;
+static bool iic_force_poll, iic_force_normal;
 static int iic_timeout = 1000, iic_read_delay;
 
 static inline void highlander_i2c_irq_enable(struct highlander_i2c_dev *dev)
diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
index 3c110fbc409b..c08ceb957aa7 100644
--- a/drivers/i2c/busses/i2c-ibm_iic.c
+++ b/drivers/i2c/busses/i2c-ibm_iic.c
@@ -51,11 +51,11 @@
 MODULE_DESCRIPTION("IBM IIC driver v" DRIVER_VERSION);
 MODULE_LICENSE("GPL");
 
-static int iic_force_poll;
+static bool iic_force_poll;
 module_param(iic_force_poll, bool, 0);
 MODULE_PARM_DESC(iic_force_poll, "Force polling mode");
 
-static int iic_force_fast;
+static bool iic_force_fast;
 module_param(iic_force_fast, bool, 0);
 MODULE_PARM_DESC(iic_force_fast, "Force fast mode (400 kHz)");
 
diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c
index e6f539e26f65..58893772c3d8 100644
--- a/drivers/i2c/busses/i2c-sis630.c
+++ b/drivers/i2c/busses/i2c-sis630.c
@@ -93,8 +93,8 @@
 static struct pci_driver sis630_driver;
 
 /* insmod parameters */
-static int high_clock;
-static int force;
+static bool high_clock;
+static bool force;
 module_param(high_clock, bool, 0);
 MODULE_PARM_DESC(high_clock, "Set Host Master Clock to 56KHz (default 14KHz).");
 module_param(force, bool, 0);
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 0b012f1f8ac5..2a62c998044a 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -91,7 +91,7 @@ static unsigned short SMBHSTCFG = 0xD2;
 
 /* If force is set to anything different from 0, we forcibly enable the
    VT596. DANGEROUS! */
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Forcibly enable the SMBus. DANGEROUS!");
 
diff --git a/drivers/ide/ali14xx.c b/drivers/ide/ali14xx.c
index 25b9fe3a9f8e..d3be99fb4154 100644
--- a/drivers/ide/ali14xx.c
+++ b/drivers/ide/ali14xx.c
@@ -221,7 +221,7 @@ static int __init ali14xx_probe(void)
 	return ide_legacy_device_add(&ali14xx_port_info, 0);
 }
 
-static int probe_ali14xx;
+static bool probe_ali14xx;
 
 module_param_named(probe, probe_ali14xx, bool, 0);
 MODULE_PARM_DESC(probe, "probe for ALI M14xx chipsets");
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index a81bd7575792..14717304b388 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -111,7 +111,7 @@
 
 #define DRV_NAME "cmd640"
 
-static int cmd640_vlb;
+static bool cmd640_vlb;
 
 /*
  * CMD640 specific registers definition.
diff --git a/drivers/ide/dtc2278.c b/drivers/ide/dtc2278.c
index 6929f7fce93a..46af4743b3e6 100644
--- a/drivers/ide/dtc2278.c
+++ b/drivers/ide/dtc2278.c
@@ -130,7 +130,7 @@ static int __init dtc2278_probe(void)
 	return ide_legacy_device_add(&dtc2278_port_info, 0);
 }
 
-static int probe_dtc2278;
+static bool probe_dtc2278;
 
 module_param_named(probe, probe_dtc2278, bool, 0);
 MODULE_PARM_DESC(probe, "probe for DTC2278xx chipsets");
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index 3feaa26410be..51beb85250d4 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -50,7 +50,7 @@
 					       GAYLE_NUM_HWIFS-1)
 #define GAYLE_HAS_CONTROL_REG	(!ide_doubler)
 
-static int ide_doubler;
+static bool ide_doubler;
 module_param_named(doubler, ide_doubler, bool, 0);
 MODULE_PARM_DESC(doubler, "enable support for IDE doublers");
 
diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c
index 808bcdcbf8e1..986f2513eab4 100644
--- a/drivers/ide/ht6560b.c
+++ b/drivers/ide/ht6560b.c
@@ -317,7 +317,7 @@ static void __init ht6560b_init_dev(ide_drive_t *drive)
 	ide_set_drivedata(drive, (void *)t);
 }
 
-static int probe_ht6560b;
+static bool probe_ht6560b;
 
 module_param_named(probe, probe_ht6560b, bool, 0);
 MODULE_PARM_DESC(probe, "probe for HT6560B chipset");
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 979d342c338a..547d7cf2e016 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -6,7 +6,7 @@
 
 #define DRV_NAME "ide-4drives"
 
-static int probe_4drives;
+static bool probe_4drives;
 
 module_param_named(probe, probe_4drives, bool, 0);
 MODULE_PARM_DESC(probe, "probe for generic IDE chipset with 4 drives/port");
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index f22edc66b030..f1a6796b165c 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -53,15 +53,15 @@ struct ide_acpi_hwif_link {
 #define DEBPRINT(fmt, args...)	do {} while (0)
 #endif	/* DEBUGGING */
 
-static int ide_noacpi;
+static bool ide_noacpi;
 module_param_named(noacpi, ide_noacpi, bool, 0);
 MODULE_PARM_DESC(noacpi, "disable IDE ACPI support");
 
-static int ide_acpigtf;
+static bool ide_acpigtf;
 module_param_named(acpigtf, ide_acpigtf, bool, 0);
 MODULE_PARM_DESC(acpigtf, "enable IDE ACPI _GTF support");
 
-static int ide_acpionboot;
+static bool ide_acpionboot;
 module_param_named(acpionboot, ide_acpionboot, bool, 0);
 MODULE_PARM_DESC(acpionboot, "call IDE ACPI methods on boot");
 
diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c
index a743e68a8903..7f56b738d762 100644
--- a/drivers/ide/ide-pci-generic.c
+++ b/drivers/ide/ide-pci-generic.c
@@ -28,7 +28,7 @@
 
 #define DRV_NAME "ide_pci_generic"
 
-static int ide_generic_all;		/* Set to claim all devices */
+static bool ide_generic_all;		/* Set to claim all devices */
 
 module_param_named(all_generic_ide, ide_generic_all, bool, 0444);
 MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers.");
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
index 3f0244fd8e62..8bbfe5557c7b 100644
--- a/drivers/ide/qd65xx.c
+++ b/drivers/ide/qd65xx.c
@@ -417,7 +417,7 @@ static int __init qd_probe(int base)
 	return rc;
 }
 
-static int probe_qd65xx;
+static bool probe_qd65xx;
 
 module_param_named(probe, probe_qd65xx, bool, 0);
 MODULE_PARM_DESC(probe, "probe for QD65xx chipsets");
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 47adcd09cb26..5cfb78120669 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -160,7 +160,7 @@ static int __init umc8672_probe(void)
 	return ide_legacy_device_add(&umc8672_port_info, 0);
 }
 
-static int probe_umc8672;
+static bool probe_umc8672;
 
 module_param_named(probe, probe_umc8672, bool, 0);
 MODULE_PARM_DESC(probe, "probe for UMC8672 chipset");
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index aaf6023a4835..f08f6eaf3fa8 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -379,8 +379,8 @@ extern spinlock_t shca_list_lock;
 
 extern int ehca_static_rate;
 extern int ehca_port_act_time;
-extern int ehca_use_hp_mr;
-extern int ehca_scaling_code;
+extern bool ehca_use_hp_mr;
+extern bool ehca_scaling_code;
 extern int ehca_lock_hcalls;
 extern int ehca_nr_ports;
 extern int ehca_max_cq;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 8af8d4f7bdb1..832e7a7d0aee 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -59,16 +59,16 @@ MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
 MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
 MODULE_VERSION(HCAD_VERSION);
 
-static int ehca_open_aqp1     = 0;
+static bool ehca_open_aqp1    = 0;
 static int ehca_hw_level      = 0;
-static int ehca_poll_all_eqs  = 1;
+static bool ehca_poll_all_eqs = 1;
 
 int ehca_debug_level   = 0;
 int ehca_nr_ports      = -1;
-int ehca_use_hp_mr     = 0;
+bool ehca_use_hp_mr    = 0;
 int ehca_port_act_time = 30;
 int ehca_static_rate   = -1;
-int ehca_scaling_code  = 0;
+bool ehca_scaling_code = 0;
 int ehca_lock_hcalls   = -1;
 int ehca_max_cq        = -1;
 int ehca_max_qp        = -1;
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 5965b3df8f2f..7013da5e9eda 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -96,7 +96,7 @@ unsigned int wqm_quanta = 0x10000;
 module_param(wqm_quanta, int, 0644);
 MODULE_PARM_DESC(wqm_quanta, "WQM quanta");
 
-static unsigned int limit_maxrdreqsz;
+static bool limit_maxrdreqsz;
 module_param(limit_maxrdreqsz, bool, 0644);
 MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes");
 
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 32bbd4c77b7c..fd7a0d5bc94d 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -98,15 +98,15 @@
 #define XTYPE_XBOX360W    2
 #define XTYPE_UNKNOWN     3
 
-static int dpad_to_buttons;
+static bool dpad_to_buttons;
 module_param(dpad_to_buttons, bool, S_IRUGO);
 MODULE_PARM_DESC(dpad_to_buttons, "Map D-PAD to buttons rather than axes for unknown pads");
 
-static int triggers_to_buttons;
+static bool triggers_to_buttons;
 module_param(triggers_to_buttons, bool, S_IRUGO);
 MODULE_PARM_DESC(triggers_to_buttons, "Map triggers to buttons rather than axes for unknown pads");
 
-static int sticks_to_null;
+static bool sticks_to_null;
 module_param(sticks_to_null, bool, S_IRUGO);
 MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads");
 
diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c
index 52b419348983..e2bdfd4bea70 100644
--- a/drivers/input/misc/wistron_btns.c
+++ b/drivers/input/misc/wistron_btns.c
@@ -48,7 +48,7 @@ MODULE_DESCRIPTION("Wistron laptop button driver");
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION("0.3");
 
-static int force; /* = 0; */
+static bool force; /* = 0; */
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Load even if computer is not in database");
 
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index de7e8bc17b1f..e6c9931f02c7 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -60,7 +60,7 @@ static unsigned int psmouse_rate = 100;
 module_param_named(rate, psmouse_rate, uint, 0644);
 MODULE_PARM_DESC(rate, "Report rate, in reports per second.");
 
-static unsigned int psmouse_smartscroll = 1;
+static bool psmouse_smartscroll = 1;
 module_param_named(smartscroll, psmouse_smartscroll, bool, 0644);
 MODULE_PARM_DESC(smartscroll, "Logitech Smartscroll autorepeat, 1 = enabled (default), 0 = disabled.");
 
diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index 4b755cb5b38c..1c58aafa523f 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -185,17 +185,17 @@
 #define NO_DATA_SLEEP_MSECS	(MSEC_PER_SEC / 4)
 
 /* Control touchpad's No Deceleration option */
-static int no_decel = 1;
+static bool no_decel = 1;
 module_param(no_decel, bool, 0644);
 MODULE_PARM_DESC(no_decel, "No Deceleration. Default = 1 (on)");
 
 /* Control touchpad's Reduced Reporting option */
-static int reduce_report;
+static bool reduce_report;
 module_param(reduce_report, bool, 0644);
 MODULE_PARM_DESC(reduce_report, "Reduced Reporting. Default = 0 (off)");
 
 /* Control touchpad's No Filter option */
-static int no_filter;
+static bool no_filter;
 module_param(no_filter, bool, 0644);
 MODULE_PARM_DESC(no_filter, "No Filter. Default = 0 (off)");
 
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index 979c443bf1ef..be3316073ae7 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -105,7 +105,7 @@ EXPORT_SYMBOL(__hp_sdc_enqueue_transaction);
 EXPORT_SYMBOL(hp_sdc_enqueue_transaction);
 EXPORT_SYMBOL(hp_sdc_dequeue_transaction);
 
-static unsigned int hp_sdc_disabled;
+static bool hp_sdc_disabled;
 module_param_named(no_hpsdc, hp_sdc_disabled, bool, 0);
 MODULE_PARM_DESC(no_hpsdc, "Do not enable HP SDC driver.");
 
diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c
index 7f8f538a9806..1df19bb8534a 100644
--- a/drivers/input/touchscreen/eeti_ts.c
+++ b/drivers/input/touchscreen/eeti_ts.c
@@ -35,11 +35,11 @@
 #include <linux/input/eeti_ts.h>
 #include <linux/slab.h>
 
-static int flip_x;
+static bool flip_x;
 module_param(flip_x, bool, 0644);
 MODULE_PARM_DESC(flip_x, "flip x coordinate");
 
-static int flip_y;
+static bool flip_y;
 module_param(flip_y, bool, 0644);
 MODULE_PARM_DESC(flip_y, "flip y coordinate");
 
diff --git a/drivers/input/touchscreen/htcpen.c b/drivers/input/touchscreen/htcpen.c
index 81e338623944..d13143b68b3e 100644
--- a/drivers/input/touchscreen/htcpen.c
+++ b/drivers/input/touchscreen/htcpen.c
@@ -40,10 +40,10 @@ MODULE_LICENSE("GPL");
 #define X_AXIS_MAX		2040
 #define Y_AXIS_MAX		2040
 
-static int invert_x;
+static bool invert_x;
 module_param(invert_x, bool, 0644);
 MODULE_PARM_DESC(invert_x, "If set, X axis is inverted");
-static int invert_y;
+static bool invert_y;
 module_param(invert_y, bool, 0644);
 MODULE_PARM_DESC(invert_y, "If set, Y axis is inverted");
 
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index d2b57536feea..46e83ad53f43 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -30,7 +30,7 @@
 
 #define UCB1400_TS_POLL_PERIOD	10 /* ms */
 
-static int adcsync;
+static bool adcsync;
 static int ts_delay = 55; /* us */
 static int ts_delay_pressure;	/* us */
 
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 06cef3ccc63a..3a5ebf452e81 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -60,11 +60,11 @@
 #define DRIVER_AUTHOR		"Daniel Ritz <daniel.ritz@gmx.ch>"
 #define DRIVER_DESC		"USB Touchscreen Driver"
 
-static int swap_xy;
+static bool swap_xy;
 module_param(swap_xy, bool, 0644);
 MODULE_PARM_DESC(swap_xy, "If set X and Y axes are swapped.");
 
-static int hwcalib_xy;
+static bool hwcalib_xy;
 module_param(hwcalib_xy, bool, 0644);
 MODULE_PARM_DESC(hwcalib_xy, "If set hw-calibrated X/Y are used if available");
 
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 9c8d7aa053c5..a0ed668d4d2a 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -40,7 +40,7 @@ MODULE_DESCRIPTION("CAPI4Linux: DMA support for active AVM cards");
 MODULE_AUTHOR("Carsten Paeth");
 MODULE_LICENSE("GPL");
 
-static int suppress_pollack = 0;
+static bool suppress_pollack = 0;
 module_param(suppress_pollack, bool, 0);
 
 /* ------------------------------------------------------------- */
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index d3530f6e8115..9743b24ef9d6 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -40,7 +40,7 @@ static char *revision = "$Revision: 1.1.2.2 $";
 
 /* ------------------------------------------------------------- */
 
-static int suppress_pollack;
+static bool suppress_pollack;
 
 static struct pci_device_id c4_pci_tbl[] = {
 	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285, PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_C4, 0, 0, (unsigned long)4 },
diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c
index ca710ab278ec..023de789f250 100644
--- a/drivers/isdn/sc/init.c
+++ b/drivers/isdn/sc/init.c
@@ -30,7 +30,7 @@ static const char *boardname[] = { "DataCommute/BRI", "DataCommute/PRI", "TeleCo
 static unsigned int io[] = {0,0,0,0};
 static unsigned char irq[] = {0,0,0,0};
 static unsigned long ram[] = {0,0,0,0};
-static int do_reset = 0;
+static bool do_reset = 0;
 
 module_param_array(io, int, NULL, 0);
 module_param_array(irq, int, NULL, 0);
diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c
index a498135a4e80..1ed1677c916f 100644
--- a/drivers/leds/leds-clevo-mail.c
+++ b/drivers/leds/leds-clevo-mail.c
@@ -18,7 +18,7 @@ MODULE_AUTHOR("Márton Németh <nm127@freemail.hu>");
 MODULE_DESCRIPTION("Clevo mail LED driver");
 MODULE_LICENSE("GPL");
 
-static unsigned int __initdata nodetect;
+static bool __initdata nodetect;
 module_param_named(nodetect, nodetect, bool, 0);
 MODULE_PARM_DESC(nodetect, "Skip DMI hardware detection");
 
diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c
index 614ebebaaa28..57371e1485ab 100644
--- a/drivers/leds/leds-ss4200.c
+++ b/drivers/leds/leds-ss4200.c
@@ -79,7 +79,7 @@ static int __init ss4200_led_dmi_callback(const struct dmi_system_id *id)
 	return 1;
 }
 
-static unsigned int __initdata nodetect;
+static bool __initdata nodetect;
 module_param_named(nodetect, nodetect, bool, 0);
 MODULE_PARM_DESC(nodetect, "Skip DMI-based hardware detection");
 
diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c
index 399beb1638d1..5c6a2d876562 100644
--- a/drivers/macintosh/ams/ams-core.c
+++ b/drivers/macintosh/ams/ams-core.c
@@ -31,7 +31,7 @@
 /* There is only one motion sensor per machine */
 struct ams ams_info;
 
-static unsigned int verbose;
+static bool verbose;
 module_param(verbose, bool, 0644);
 MODULE_PARM_DESC(verbose, "Show free falls and shocks in kernel output");
 
diff --git a/drivers/macintosh/ams/ams-input.c b/drivers/macintosh/ams/ams-input.c
index 8a712392cd38..b27e530a87a4 100644
--- a/drivers/macintosh/ams/ams-input.c
+++ b/drivers/macintosh/ams/ams-input.c
@@ -19,11 +19,11 @@
 
 #include "ams.h"
 
-static unsigned int joystick;
+static bool joystick;
 module_param(joystick, bool, S_IRUGO);
 MODULE_PARM_DESC(joystick, "Enable the input class device on module load");
 
-static unsigned int invert;
+static bool invert;
 module_param(invert, bool, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(invert, "Invert input data on X and Y axis");
 
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index 02367308ff2e..c60d025044ee 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -52,7 +52,7 @@ static const char *sensor_location[3];
 
 static int limit_adjust;
 static int fan_speed = -1;
-static int verbose;
+static bool verbose;
 
 MODULE_AUTHOR("Colin Leroy <colin@colino.net>");
 MODULE_DESCRIPTION("Driver for ADT746x thermostat in iBook G4 and "
diff --git a/drivers/media/dvb/dvb-usb/af9005.c b/drivers/media/dvb/dvb-usb/af9005.c
index bd51a764351b..4fc024d77040 100644
--- a/drivers/media/dvb/dvb-usb/af9005.c
+++ b/drivers/media/dvb/dvb-usb/af9005.c
@@ -30,7 +30,7 @@ MODULE_PARM_DESC(debug,
 		 "set debugging level (1=info,xfer=2,rc=4,reg=8,i2c=16,fw=32 (or-able))."
 		 DVB_USB_DEBUG_STATUS);
 /* enable obnoxious led */
-int dvb_usb_af9005_led = 1;
+bool dvb_usb_af9005_led = 1;
 module_param_named(led, dvb_usb_af9005_led, bool, 0644);
 MODULE_PARM_DESC(led, "enable led (default: 1).");
 
diff --git a/drivers/media/dvb/dvb-usb/af9005.h b/drivers/media/dvb/dvb-usb/af9005.h
index c71c77bd7f4b..6a2bf3de8456 100644
--- a/drivers/media/dvb/dvb-usb/af9005.h
+++ b/drivers/media/dvb/dvb-usb/af9005.h
@@ -35,7 +35,7 @@ extern int dvb_usb_af9005_debug;
 #define deb_i2c(args...)  dprintk(dvb_usb_af9005_debug,0x10,args)
 #define deb_fw(args...)   dprintk(dvb_usb_af9005_debug,0x20,args)
 
-extern int dvb_usb_af9005_led;
+extern bool dvb_usb_af9005_led;
 
 /* firmware */
 #define FW_BULKOUT_SIZE 250
diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c
index edadc8449a3d..36ce0611c037 100644
--- a/drivers/media/radio/radio-gemtek.c
+++ b/drivers/media/radio/radio-gemtek.c
@@ -47,11 +47,11 @@ MODULE_VERSION("0.0.4");
 #endif
 
 static int io		= CONFIG_RADIO_GEMTEK_PORT;
-static int probe	= CONFIG_RADIO_GEMTEK_PROBE;
-static int hardmute;
-static int shutdown	= 1;
-static int keepmuted	= 1;
-static int initmute	= 1;
+static bool probe	= CONFIG_RADIO_GEMTEK_PROBE;
+static bool hardmute;
+static bool shutdown	= 1;
+static bool keepmuted	= 1;
+static bool initmute	= 1;
 static int radio_nr	= -1;
 
 module_param(io, int, 0444);
diff --git a/drivers/media/radio/radio-miropcm20.c b/drivers/media/radio/radio-miropcm20.c
index 3fb76e3834c9..87c1ee13b058 100644
--- a/drivers/media/radio/radio-miropcm20.c
+++ b/drivers/media/radio/radio-miropcm20.c
@@ -23,7 +23,7 @@ static int radio_nr = -1;
 module_param(radio_nr, int, 0);
 MODULE_PARM_DESC(radio_nr, "Set radio device number (/dev/radioX).  Default: -1 (autodetect)");
 
-static int mono;
+static bool mono;
 module_param(mono, bool, 0);
 MODULE_PARM_DESC(mono, "Force tuner into mono mode.");
 
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index 27997a9ceb0d..ca12d3289bfe 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -38,7 +38,7 @@
 #include <media/lirc.h>
 #include <media/lirc_dev.h>
 
-static int debug;
+static bool debug;
 
 #define IRCTL_DEV_NAME	"BaseRemoteCtl"
 #define NOPLUG		-1
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 20bb12d6fbbe..21105bf9594d 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -156,9 +156,9 @@
 
 /* module parameters */
 #ifdef CONFIG_USB_DEBUG
-static int debug = 1;
+static bool debug = 1;
 #else
-static int debug;
+static bool debug;
 #endif
 
 #define mce_dbg(dev, fmt, ...)					\
diff --git a/drivers/media/rc/streamzap.c b/drivers/media/rc/streamzap.c
index b1d29d09eeae..d6f4bfe09391 100644
--- a/drivers/media/rc/streamzap.c
+++ b/drivers/media/rc/streamzap.c
@@ -43,9 +43,9 @@
 #define DRIVER_DESC	"Streamzap Remote Control driver"
 
 #ifdef CONFIG_USB_DEBUG
-static int debug = 1;
+static bool debug = 1;
 #else
-static int debug;
+static bool debug;
 #endif
 
 #define USB_STREAMZAP_VENDOR_ID		0x0e9c
diff --git a/drivers/media/rc/winbond-cir.c b/drivers/media/rc/winbond-cir.c
index e7f7a57bf684..b09c5fae489b 100644
--- a/drivers/media/rc/winbond-cir.c
+++ b/drivers/media/rc/winbond-cir.c
@@ -226,11 +226,11 @@ module_param(protocol, uint, 0444);
 MODULE_PARM_DESC(protocol, "IR protocol to use for the power-on command "
 		 "(0 = RC5, 1 = NEC, 2 = RC6A, default)");
 
-static int invert; /* default = 0 */
+static bool invert; /* default = 0 */
 module_param(invert, bool, 0444);
 MODULE_PARM_DESC(invert, "Invert the signal from the IR receiver");
 
-static int txandrx; /* default = 0 */
+static bool txandrx; /* default = 0 */
 module_param(txandrx, bool, 0444);
 MODULE_PARM_DESC(invert, "Allow simultaneous TX and RX");
 
diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c
index cd8ff0473184..fda32f52554a 100644
--- a/drivers/media/video/c-qcam.c
+++ b/drivers/media/video/c-qcam.c
@@ -72,7 +72,7 @@ struct qcam {
 
 static int parport[MAX_CAMS] = { [1 ... MAX_CAMS-1] = -1 };
 static int probe = 2;
-static int force_rgb;
+static bool force_rgb;
 static int video_nr = -1;
 
 /* FIXME: parport=auto would never have worked, surely? --RR */
diff --git a/drivers/media/video/cs5345.c b/drivers/media/video/cs5345.c
index 5909f2557ab4..1d64af9adf71 100644
--- a/drivers/media/video/cs5345.c
+++ b/drivers/media/video/cs5345.c
@@ -31,7 +31,7 @@ MODULE_DESCRIPTION("i2c device driver for cs5345 Audio ADC");
 MODULE_AUTHOR("Hans Verkuil");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 
 module_param(debug, bool, 0644);
 
diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c
index d93e5ab45fd3..51c5b9ad67d8 100644
--- a/drivers/media/video/cs53l32a.c
+++ b/drivers/media/video/cs53l32a.c
@@ -35,7 +35,7 @@ MODULE_DESCRIPTION("i2c device driver for cs53l32a Audio ADC");
 MODULE_AUTHOR("Martin Vaughan");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 
 module_param(debug, bool, 0644);
 
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index c6ff32a6137c..349bd9c2aff5 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -75,7 +75,7 @@ static int radio[CX18_MAX_CARDS] = { -1, -1, -1, -1, -1, -1, -1, -1,
 				     -1, -1, -1, -1, -1, -1, -1, -1 };
 static unsigned cardtype_c = 1;
 static unsigned tuner_c = 1;
-static unsigned radio_c = 1;
+static bool radio_c = 1;
 static char pal[] = "--";
 static char secam[] = "--";
 static char ntsc[] = "-";
diff --git a/drivers/media/video/cx25821/cx25821-alsa.c b/drivers/media/video/cx25821/cx25821-alsa.c
index 09e99de5fd21..58be4f3bb3cb 100644
--- a/drivers/media/video/cx25821/cx25821-alsa.c
+++ b/drivers/media/video/cx25821/cx25821-alsa.c
@@ -102,7 +102,7 @@ struct cx25821_audio_dev {
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;	/* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;	/* ID for this card */
-static int enable[SNDRV_CARDS] = { 1, [1 ... (SNDRV_CARDS - 1)] = 1 };
+static bool enable[SNDRV_CARDS] = { 1, [1 ... (SNDRV_CARDS - 1)] = 1 };
 
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable cx25821 soundcard. default enabled.");
diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c
index 68d1240f493c..04bf6627d362 100644
--- a/drivers/media/video/cx88/cx88-alsa.c
+++ b/drivers/media/video/cx88/cx88-alsa.c
@@ -96,7 +96,7 @@ typedef struct cx88_audio_dev snd_cx88_card_t;
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;	/* Index 0-MAX */
 static const char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;	/* ID for this card */
-static int enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1};
+static bool enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1};
 
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable cx88x soundcard. default enabled.");
diff --git a/drivers/media/video/gspca/m5602/m5602_core.c b/drivers/media/video/gspca/m5602/m5602_core.c
index 9fe3816b2aa0..0c4493675438 100644
--- a/drivers/media/video/gspca/m5602/m5602_core.c
+++ b/drivers/media/video/gspca/m5602/m5602_core.c
@@ -27,8 +27,8 @@
 
 /* Kernel module parameters */
 int force_sensor;
-static int dump_bridge;
-int dump_sensor;
+static bool dump_bridge;
+bool dump_sensor;
 
 static const struct usb_device_id m5602_table[] = {
 	{USB_DEVICE(0x0402, 0x5602)},
diff --git a/drivers/media/video/gspca/m5602/m5602_mt9m111.h b/drivers/media/video/gspca/m5602/m5602_mt9m111.h
index b1f0c492036a..8c672b5c8c6a 100644
--- a/drivers/media/video/gspca/m5602/m5602_mt9m111.h
+++ b/drivers/media/video/gspca/m5602/m5602_mt9m111.h
@@ -106,7 +106,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int mt9m111_probe(struct sd *sd);
 int mt9m111_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_ov7660.h b/drivers/media/video/gspca/m5602/m5602_ov7660.h
index 2efd607987ec..2b6a13b508f7 100644
--- a/drivers/media/video/gspca/m5602/m5602_ov7660.h
+++ b/drivers/media/video/gspca/m5602/m5602_ov7660.h
@@ -86,7 +86,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int ov7660_probe(struct sd *sd);
 int ov7660_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_ov9650.h b/drivers/media/video/gspca/m5602/m5602_ov9650.h
index da9a129b739d..f7aa5bf68983 100644
--- a/drivers/media/video/gspca/m5602/m5602_ov9650.h
+++ b/drivers/media/video/gspca/m5602/m5602_ov9650.h
@@ -135,7 +135,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int ov9650_probe(struct sd *sd);
 int ov9650_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_po1030.h b/drivers/media/video/gspca/m5602/m5602_po1030.h
index 338359596398..81a2bcb88fe3 100644
--- a/drivers/media/video/gspca/m5602/m5602_po1030.h
+++ b/drivers/media/video/gspca/m5602/m5602_po1030.h
@@ -147,7 +147,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int po1030_probe(struct sd *sd);
 int po1030_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_s5k4aa.h b/drivers/media/video/gspca/m5602/m5602_s5k4aa.h
index 8cc7a3f6da72..8e0035e731c7 100644
--- a/drivers/media/video/gspca/m5602/m5602_s5k4aa.h
+++ b/drivers/media/video/gspca/m5602/m5602_s5k4aa.h
@@ -65,7 +65,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int s5k4aa_probe(struct sd *sd);
 int s5k4aa_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_s5k83a.h b/drivers/media/video/gspca/m5602/m5602_s5k83a.h
index 80a63a236e24..79952247b534 100644
--- a/drivers/media/video/gspca/m5602/m5602_s5k83a.h
+++ b/drivers/media/video/gspca/m5602/m5602_s5k83a.h
@@ -41,7 +41,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int s5k83a_probe(struct sd *sd);
 int s5k83a_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/stv06xx/stv06xx.c b/drivers/media/video/gspca/stv06xx/stv06xx.c
index 0ab425fbea9a..6f878f6c6e99 100644
--- a/drivers/media/video/gspca/stv06xx/stv06xx.c
+++ b/drivers/media/video/gspca/stv06xx/stv06xx.c
@@ -36,8 +36,8 @@ MODULE_AUTHOR("Erik Andrén");
 MODULE_DESCRIPTION("STV06XX USB Camera Driver");
 MODULE_LICENSE("GPL");
 
-static int dump_bridge;
-static int dump_sensor;
+static bool dump_bridge;
+static bool dump_sensor;
 
 int stv06xx_write_bridge(struct sd *sd, u16 address, u16 i2c_data)
 {
diff --git a/drivers/media/video/hdpvr/hdpvr-core.c b/drivers/media/video/hdpvr/hdpvr-core.c
index 3f1a5b1beeba..e5eb56a5b618 100644
--- a/drivers/media/video/hdpvr/hdpvr-core.c
+++ b/drivers/media/video/hdpvr/hdpvr-core.c
@@ -49,7 +49,7 @@ module_param(default_audio_input, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(default_audio_input, "default audio input: 0=RCA back / "
 		 "1=RCA front / 2=S/PDIF");
 
-static int boost_audio;
+static bool boost_audio;
 module_param(boost_audio, bool, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(boost_audio, "boost the audio signal");
 
diff --git a/drivers/media/video/ivtv/ivtv-driver.c b/drivers/media/video/ivtv/ivtv-driver.c
index 41108a9a195e..544af91cbdc1 100644
--- a/drivers/media/video/ivtv/ivtv-driver.c
+++ b/drivers/media/video/ivtv/ivtv-driver.c
@@ -99,7 +99,7 @@ static int i2c_clock_period[IVTV_MAX_CARDS] = { -1, -1, -1, -1, -1, -1, -1, -1,
 
 static unsigned int cardtype_c = 1;
 static unsigned int tuner_c = 1;
-static unsigned int radio_c = 1;
+static bool radio_c = 1;
 static unsigned int i2c_clock_period_c = 1;
 static char pal[] = "---";
 static char secam[] = "--";
diff --git a/drivers/media/video/ivtv/ivtvfb.c b/drivers/media/video/ivtv/ivtvfb.c
index 6b7c9c823330..d0fbfcf7133d 100644
--- a/drivers/media/video/ivtv/ivtvfb.c
+++ b/drivers/media/video/ivtv/ivtvfb.c
@@ -58,7 +58,7 @@
 /* card parameters */
 static int ivtvfb_card_id = -1;
 static int ivtvfb_debug = 0;
-static int osd_laced;
+static bool osd_laced;
 static int osd_depth;
 static int osd_upper;
 static int osd_left;
diff --git a/drivers/media/video/marvell-ccic/mcam-core.c b/drivers/media/video/marvell-ccic/mcam-core.c
index 80ec64d2d6d8..2c8fc0f6d690 100644
--- a/drivers/media/video/marvell-ccic/mcam-core.c
+++ b/drivers/media/video/marvell-ccic/mcam-core.c
@@ -51,7 +51,7 @@ static int delivered;
  * sense.
  */
 
-static int alloc_bufs_at_read;
+static bool alloc_bufs_at_read;
 module_param(alloc_bufs_at_read, bool, 0444);
 MODULE_PARM_DESC(alloc_bufs_at_read,
 		"Non-zero value causes DMA buffers to be allocated when the "
@@ -73,11 +73,11 @@ MODULE_PARM_DESC(dma_buf_size,
 		"parameters require larger buffers, an attempt to reallocate "
 		"will be made.");
 #else /* MCAM_MODE_VMALLOC */
-static const int alloc_bufs_at_read = 0;
+static const bool alloc_bufs_at_read = 0;
 static const int n_dma_bufs = 3;  /* Used by S/G_PARM */
 #endif /* MCAM_MODE_VMALLOC */
 
-static int flip;
+static bool flip;
 module_param(flip, bool, 0444);
 MODULE_PARM_DESC(flip,
 		"If set, the sensor will be instructed to flip the image "
diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c
index d0f538857285..d7cd0f633f63 100644
--- a/drivers/media/video/msp3400-driver.c
+++ b/drivers/media/video/msp3400-driver.c
@@ -69,12 +69,12 @@ MODULE_LICENSE("GPL");
 /* module parameters */
 static int opmode   = OPMODE_AUTO;
 int msp_debug;		 /* msp_debug output */
-int msp_once;		 /* no continuous stereo monitoring */
-int msp_amsound;	 /* hard-wire AM sound at 6.5 Hz (france),
+bool msp_once;		 /* no continuous stereo monitoring */
+bool msp_amsound;	 /* hard-wire AM sound at 6.5 Hz (france),
 			    the autoscan seems work well only with FM... */
 int msp_standard = 1;    /* Override auto detect of audio msp_standard,
 			    if needed. */
-int msp_dolby;
+bool msp_dolby;
 
 int msp_stereo_thresh = 0x190; /* a2 threshold for stereo/bilingual
 					(msp34xxg only) 0x00a0-0x03c0 */
diff --git a/drivers/media/video/msp3400-driver.h b/drivers/media/video/msp3400-driver.h
index 831e8db4368c..fbe5e0715f93 100644
--- a/drivers/media/video/msp3400-driver.h
+++ b/drivers/media/video/msp3400-driver.h
@@ -44,10 +44,10 @@
 
 /* module parameters */
 extern int msp_debug;
-extern int msp_once;
-extern int msp_amsound;
+extern bool msp_once;
+extern bool msp_amsound;
 extern int msp_standard;
-extern int msp_dolby;
+extern bool msp_dolby;
 extern int msp_stereo_thresh;
 
 struct msp_state {
diff --git a/drivers/media/video/omap/omap_vout.c b/drivers/media/video/omap/omap_vout.c
index ee0d0b39cd17..0de598bf66bb 100644
--- a/drivers/media/video/omap/omap_vout.c
+++ b/drivers/media/video/omap/omap_vout.c
@@ -70,9 +70,9 @@ static u32 video1_numbuffers = 3;
 static u32 video2_numbuffers = 3;
 static u32 video1_bufsize = OMAP_VOUT_MAX_BUF_SIZE;
 static u32 video2_bufsize = OMAP_VOUT_MAX_BUF_SIZE;
-static u32 vid1_static_vrfb_alloc;
-static u32 vid2_static_vrfb_alloc;
-static int debug;
+static bool vid1_static_vrfb_alloc;
+static bool vid2_static_vrfb_alloc;
+static bool debug;
 
 /* Module parameters */
 module_param(video1_numbuffers, uint, S_IRUGO);
diff --git a/drivers/media/video/omap/omap_vout_vrfb.c b/drivers/media/video/omap/omap_vout_vrfb.c
index ebebcac49225..4be26abf6cea 100644
--- a/drivers/media/video/omap/omap_vout_vrfb.c
+++ b/drivers/media/video/omap/omap_vout_vrfb.c
@@ -84,7 +84,7 @@ void omap_vout_free_vrfb_buffers(struct omap_vout_device *vout)
 }
 
 int omap_vout_setup_vrfb_bufs(struct platform_device *pdev, int vid_num,
-			u32 static_vrfb_allocation)
+			      bool static_vrfb_allocation)
 {
 	int ret = 0, i, j;
 	struct omap_vout_device *vout;
diff --git a/drivers/media/video/ov7670.c b/drivers/media/video/ov7670.c
index 8aa058531280..6a564964853a 100644
--- a/drivers/media/video/ov7670.c
+++ b/drivers/media/video/ov7670.c
@@ -25,7 +25,7 @@ MODULE_AUTHOR("Jonathan Corbet <corbet@lwn.net>");
 MODULE_DESCRIPTION("A low-level driver for OmniVision ov7670 sensors");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
 
diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c
index 5cfdbc78b918..0ef5484696b6 100644
--- a/drivers/media/video/saa7115.c
+++ b/drivers/media/video/saa7115.c
@@ -57,7 +57,7 @@ MODULE_AUTHOR(  "Maxim Yevtyushkin, Kevin Thayer, Chris Kennedy, "
 		"Hans Verkuil, Mauro Carvalho Chehab");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c
index b7fb5a5cad7e..3c61aec517ac 100644
--- a/drivers/media/video/stk-webcam.c
+++ b/drivers/media/video/stk-webcam.c
@@ -38,11 +38,11 @@
 #include "stk-webcam.h"
 
 
-static int hflip = 1;
+static bool hflip = 1;
 module_param(hflip, bool, 0444);
 MODULE_PARM_DESC(hflip, "Horizontal image flip (mirror). Defaults to 1");
 
-static int vflip = 1;
+static bool vflip = 1;
 module_param(vflip, bool, 0444);
 MODULE_PARM_DESC(vflip, "Vertical image flip. Defaults to 1");
 
diff --git a/drivers/media/video/tm6000/tm6000-alsa.c b/drivers/media/video/tm6000/tm6000-alsa.c
index 7d675c72fd47..bb2047c10358 100644
--- a/drivers/media/video/tm6000/tm6000-alsa.c
+++ b/drivers/media/video/tm6000/tm6000-alsa.c
@@ -42,7 +42,7 @@
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;	/* Index 0-MAX */
 
-static int enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1};
+static bool enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1};
 
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable tm6000x soundcard. default enabled.");
diff --git a/drivers/media/video/tvp514x.c b/drivers/media/video/tvp514x.c
index 926f03931156..dd26cacd0556 100644
--- a/drivers/media/video/tvp514x.c
+++ b/drivers/media/video/tvp514x.c
@@ -52,7 +52,7 @@
 #define LOCK_RETRY_DELAY                (200)
 
 /* Debug functions */
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
 
diff --git a/drivers/media/video/tvp7002.c b/drivers/media/video/tvp7002.c
index 7875e80cb2ff..236c559d5f51 100644
--- a/drivers/media/video/tvp7002.c
+++ b/drivers/media/video/tvp7002.c
@@ -63,7 +63,7 @@ MODULE_LICENSE("GPL");
 #define TVP7002_CL_MASK		0x0f
 
 /* Debug functions */
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debug level (0-2)");
 
diff --git a/drivers/media/video/upd64083.c b/drivers/media/video/upd64083.c
index 9bbe61700fd5..65d065aa6091 100644
--- a/drivers/media/video/upd64083.c
+++ b/drivers/media/video/upd64083.c
@@ -34,7 +34,7 @@ MODULE_DESCRIPTION("uPD64083 driver");
 MODULE_AUTHOR("T. Adachi, Takeru KOMORIYA, Hans Verkuil");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
diff --git a/drivers/media/video/via-camera.c b/drivers/media/video/via-camera.c
index cbf13d09b4ac..bfae41ba53c3 100644
--- a/drivers/media/video/via-camera.c
+++ b/drivers/media/video/via-camera.c
@@ -34,13 +34,13 @@ MODULE_AUTHOR("Jonathan Corbet <corbet@lwn.net>");
 MODULE_DESCRIPTION("VIA framebuffer-based camera controller driver");
 MODULE_LICENSE("GPL");
 
-static int flip_image;
+static bool flip_image;
 module_param(flip_image, bool, 0444);
 MODULE_PARM_DESC(flip_image,
 		"If set, the sensor will be instructed to flip the image "
 		"vertically.");
 
-static int override_serial;
+static bool override_serial;
 module_param(override_serial, bool, 0444);
 MODULE_PARM_DESC(override_serial,
 		"The camera driver will normally refuse to load if "
diff --git a/drivers/media/video/zoran/zoran_device.c b/drivers/media/video/zoran/zoran_device.c
index e8a27844bf39..e86173bd1327 100644
--- a/drivers/media/video/zoran/zoran_device.c
+++ b/drivers/media/video/zoran/zoran_device.c
@@ -57,7 +57,7 @@
 		   ZR36057_ISR_GIRQ1 | \
 		   ZR36057_ISR_JPEGRepIRQ )
 
-static int lml33dpath;		/* default = 0
+static bool lml33dpath;		/* default = 0
 				 * 1 will use digital path in capture
 				 * mode instead of analog. It can be
 				 * used for picture adjustments using
diff --git a/drivers/media/video/zoran/zr36060.c b/drivers/media/video/zoran/zr36060.c
index 5e4f57cbf314..f08546fe2234 100644
--- a/drivers/media/video/zoran/zr36060.c
+++ b/drivers/media/video/zoran/zr36060.c
@@ -50,7 +50,7 @@
 /* amount of chips attached via this driver */
 static int zr36060_codecs;
 
-static int low_bitrate;
+static bool low_bitrate;
 module_param(low_bitrate, bool, 0);
 MODULE_PARM_DESC(low_bitrate, "Buz compatibility option, halves bitrate");
 
diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c
index 6ce70e9615d3..5319e9b65847 100644
--- a/drivers/memstick/host/jmb38x_ms.c
+++ b/drivers/memstick/host/jmb38x_ms.c
@@ -21,7 +21,7 @@
 
 #define DRIVER_NAME "jmb38x_ms"
 
-static int no_dma;
+static bool no_dma;
 module_param(no_dma, bool, 0644);
 
 enum {
diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c
index 668f5c6a0399..29b2172ae18f 100644
--- a/drivers/memstick/host/r592.c
+++ b/drivers/memstick/host/r592.c
@@ -23,7 +23,7 @@
 #include <linux/swab.h>
 #include "r592.h"
 
-static int r592_enable_dma = 1;
+static bool r592_enable_dma = 1;
 static int debug;
 
 static const char *tpc_names[] = {
diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c
index b7aacf47703a..6902b83eb1b4 100644
--- a/drivers/memstick/host/tifm_ms.c
+++ b/drivers/memstick/host/tifm_ms.c
@@ -22,7 +22,7 @@
 
 #define DRIVER_NAME "tifm_ms"
 
-static int no_dma;
+static bool no_dma;
 module_param(no_dma, bool, 0644);
 
 /*
diff --git a/drivers/misc/iwmc3200top/main.c b/drivers/misc/iwmc3200top/main.c
index b1f4563be9ae..701eb600b127 100644
--- a/drivers/misc/iwmc3200top/main.c
+++ b/drivers/misc/iwmc3200top/main.c
@@ -376,20 +376,20 @@ static int blocks;
 module_param(blocks, int, 0604);
 MODULE_PARM_DESC(blocks, "max_blocks_to_send");
 
-static int dump;
+static bool dump;
 module_param(dump, bool, 0604);
 MODULE_PARM_DESC(dump, "dump_hex_content");
 
-static int jump = 1;
+static bool jump = 1;
 module_param(jump, bool, 0604);
 
-static int direct = 1;
+static bool direct = 1;
 module_param(direct, bool, 0604);
 
-static int checksum = 1;
+static bool checksum = 1;
 module_param(checksum, bool, 0604);
 
-static int fw_download = 1;
+static bool fw_download = 1;
 module_param(fw_download, bool, 0604);
 
 static int block_size = IWMC_SDIO_BLK_SIZE;
@@ -398,7 +398,7 @@ module_param(block_size, int, 0404);
 static int download_trans_blks = IWMC_DEFAULT_TR_BLK;
 module_param(download_trans_blks, int, 0604);
 
-static int rubbish_barker;
+static bool rubbish_barker;
 module_param(rubbish_barker, bool, 0604);
 
 #ifdef CONFIG_IWMC3200TOP_DEBUG
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 950b97d7412a..75d7d7e17366 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -48,7 +48,7 @@ static struct workqueue_struct *workqueue;
  * performance cost, and for other reasons may not always be desired.
  * So we allow it it to be disabled.
  */
-int use_spi_crc = 1;
+bool use_spi_crc = 1;
 module_param(use_spi_crc, bool, 0);
 
 /*
@@ -58,9 +58,9 @@ module_param(use_spi_crc, bool, 0);
  * overridden if necessary.
  */
 #ifdef CONFIG_MMC_UNSAFE_RESUME
-int mmc_assume_removable;
+bool mmc_assume_removable;
 #else
-int mmc_assume_removable = 1;
+bool mmc_assume_removable = 1;
 #endif
 EXPORT_SYMBOL(mmc_assume_removable);
 module_param_named(removable, mmc_assume_removable, bool, 0644);
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 14664f1fb16f..afa6bd2b7b70 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -64,7 +64,7 @@ int mmc_attach_sd(struct mmc_host *host);
 int mmc_attach_sdio(struct mmc_host *host);
 
 /* Module parameters */
-extern int use_spi_crc;
+extern bool use_spi_crc;
 
 /* Debugfs information for hosts and cards */
 void mmc_add_host_debugfs(struct mmc_host *host);
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index f70d04664cac..69d249f51d6a 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -22,8 +22,8 @@
 #define DRIVER_NAME "tifm_sd"
 #define DRIVER_VERSION "0.8"
 
-static int no_dma = 0;
-static int fixed_timeout = 0;
+static bool no_dma = 0;
+static bool fixed_timeout = 0;
 module_param(no_dma, bool, 0644);
 module_param(fixed_timeout, bool, 0644);
 
diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c
index 2ec978bc32ba..3135a1a5d75d 100644
--- a/drivers/mmc/host/vub300.c
+++ b/drivers/mmc/host/vub300.c
@@ -223,25 +223,25 @@ enum SD_RESPONSE_TYPE {
 #define FUN(c) (0x000007 & (c->arg>>28))
 #define REG(c) (0x01FFFF & (c->arg>>9))
 
-static int limit_speed_to_24_MHz;
+static bool limit_speed_to_24_MHz;
 module_param(limit_speed_to_24_MHz, bool, 0644);
 MODULE_PARM_DESC(limit_speed_to_24_MHz, "Limit Max SDIO Clock Speed to 24 MHz");
 
-static int pad_input_to_usb_pkt;
+static bool pad_input_to_usb_pkt;
 module_param(pad_input_to_usb_pkt, bool, 0644);
 MODULE_PARM_DESC(pad_input_to_usb_pkt,
 		 "Pad USB data input transfers to whole USB Packet");
 
-static int disable_offload_processing;
+static bool disable_offload_processing;
 module_param(disable_offload_processing, bool, 0644);
 MODULE_PARM_DESC(disable_offload_processing, "Disable Offload Processing");
 
-static int force_1_bit_data_xfers;
+static bool force_1_bit_data_xfers;
 module_param(force_1_bit_data_xfers, bool, 0644);
 MODULE_PARM_DESC(force_1_bit_data_xfers,
 		 "Force SDIO Data Transfers to 1-bit Mode");
 
-static int force_polling_for_irqs;
+static bool force_polling_for_irqs;
 module_param(force_polling_for_irqs, bool, 0644);
 MODULE_PARM_DESC(force_polling_for_irqs, "Force Polling for SDIO interrupts");
 
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 8544d6bf50a0..5c3d719c37e6 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -185,7 +185,7 @@ struct pxa3xx_nand_info {
 	uint32_t		ndcb2;
 };
 
-static int use_dma = 1;
+static bool use_dma = 1;
 module_param(use_dma, bool, 0444);
 MODULE_PARM_DESC(use_dma, "enable DMA for data transferring to/from NAND HW");
 
diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c
index f20f393bfda6..769a4e096b3c 100644
--- a/drivers/mtd/nand/r852.c
+++ b/drivers/mtd/nand/r852.c
@@ -22,7 +22,7 @@
 #include "r852.h"
 
 
-static int r852_enable_dma = 1;
+static bool r852_enable_dma = 1;
 module_param(r852_enable_dma, bool, S_IRUGO);
 MODULE_PARM_DESC(r852_enable_dma, "Enable usage of the DMA (default)");
 
diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c
index 0dc34f12f92e..d4716273651e 100644
--- a/drivers/parport/parport_ip32.c
+++ b/drivers/parport/parport_ip32.c
@@ -135,7 +135,7 @@
 #define PARPORT_IP32_ENABLE_EPP	(1U << 3)
 #define PARPORT_IP32_ENABLE_ECP	(1U << 4)
 static unsigned int features =	~0U;
-static int verbose_probing =	DEFAULT_VERBOSE_PROBING;
+static bool verbose_probing =	DEFAULT_VERBOSE_PROBING;
 
 /* We do not support more than one port. */
 static struct parport *this_port = NULL;
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 095f29e13734..2a47e82821da 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -44,7 +44,7 @@
 #define	METHOD_NAME__SUN	"_SUN"
 #define	METHOD_NAME_OSHP	"OSHP"
 
-static int debug_acpi;
+static bool debug_acpi;
 
 static acpi_status
 decode_type0_hpx_record(union acpi_object *record, struct hotplug_params *hpx)
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index efa9f2de51c1..aa41631e9e02 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -47,7 +47,7 @@
 /* name size which is used for entries in pcihpfs */
 #define SLOT_NAME_SIZE  21              /* {_SUN} */
 
-static int debug;
+static bool debug;
 int acpiphp_debug;
 
 /* local variables */
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index e525263210ee..c35e8ad6db01 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -43,7 +43,7 @@
 #define DRIVER_AUTHOR	"Irene Zubarev <zubarev@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>"
 #define DRIVER_DESC	"ACPI Hot Plug PCI Controller Driver IBM extension"
 
-static int debug;
+static bool debug;
 
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c
index 41f6a8d79c81..6bf8d2ab164f 100644
--- a/drivers/pci/hotplug/cpcihp_zt5550.c
+++ b/drivers/pci/hotplug/cpcihp_zt5550.c
@@ -57,8 +57,8 @@
 #define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME , ## arg)
 
 /* local variables */
-static int debug;
-static int poll;
+static bool debug;
+static bool poll;
 static struct cpci_hp_controller_ops zt5550_hpc_ops;
 static struct cpci_hp_controller zt5550_hpc;
 
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index f1ce99cceac6..187a199da93c 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -57,8 +57,8 @@ struct irq_routing_table *cpqhp_routing_table;
 static void __iomem *smbios_table;
 static void __iomem *smbios_start;
 static void __iomem *cpqhp_rom_start;
-static int power_mode;
-static int debug;
+static bool power_mode;
+static bool debug;
 static int initialized;
 
 #define DRIVER_VERSION	"0.9.8"
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index d934dd4fa873..5506e0e8fbc0 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -49,7 +49,7 @@
 
 int ibmphp_debug;
 
-static int debug;
+static bool debug;
 module_param(debug, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC (debug, "Debugging mode enabled or not");
 MODULE_LICENSE ("GPL");
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 6d2eea93298f..202f4a969eb5 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -51,7 +51,7 @@
 
 
 /* local variables */
-static int debug;
+static bool debug;
 
 #define DRIVER_VERSION	"0.5"
 #define DRIVER_AUTHOR	"Greg Kroah-Hartman <greg@kroah.com>, Scott Murray <scottm@somanetworks.com>"
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 9a33fdde2d16..4b7cce1de6ec 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -40,10 +40,10 @@
 
 #define MY_NAME	"pciehp"
 
-extern int pciehp_poll_mode;
+extern bool pciehp_poll_mode;
 extern int pciehp_poll_time;
-extern int pciehp_debug;
-extern int pciehp_force;
+extern bool pciehp_debug;
+extern bool pciehp_force;
 extern struct workqueue_struct *pciehp_wq;
 
 #define dbg(format, arg...)						\
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index b8c99d35ac97..365c6b96c642 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -38,10 +38,10 @@
 #include <linux/time.h>
 
 /* Global variables */
-int pciehp_debug;
-int pciehp_poll_mode;
+bool pciehp_debug;
+bool pciehp_poll_mode;
 int pciehp_poll_time;
-int pciehp_force;
+bool pciehp_force;
 struct workqueue_struct *pciehp_wq;
 
 #define DRIVER_VERSION	"0.4"
diff --git a/drivers/pci/hotplug/pcihp_skeleton.c b/drivers/pci/hotplug/pcihp_skeleton.c
index 5175d9b26f0b..b20ceaaa31f4 100644
--- a/drivers/pci/hotplug/pcihp_skeleton.c
+++ b/drivers/pci/hotplug/pcihp_skeleton.c
@@ -59,7 +59,7 @@ static LIST_HEAD(slot_list);
 #define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME , ## arg)
 
 /* local variables */
-static int debug;
+static bool debug;
 static int num_slots;
 
 #define DRIVER_VERSION	"0.3"
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 419919a87b0f..df5677440a08 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -46,7 +46,7 @@
 #define PRESENT         1	/* Card in slot */
 
 #define MY_NAME "rpaphp"
-extern int rpaphp_debug;
+extern bool rpaphp_debug;
 #define dbg(format, arg...)					\
 	do {							\
 		if (rpaphp_debug)					\
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 758adb5f47fd..127d6e600185 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -37,7 +37,7 @@
 				/* and pci_do_scan_bus */
 #include "rpaphp.h"
 
-int rpaphp_debug;
+bool rpaphp_debug;
 LIST_HEAD(rpaphp_slot_head);
 
 #define DRIVER_VERSION	"0.1"
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index e0c90e643b5f..ca64932e658b 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -43,9 +43,9 @@
 	#define MY_NAME	THIS_MODULE->name
 #endif
 
-extern int shpchp_poll_mode;
+extern bool shpchp_poll_mode;
 extern int shpchp_poll_time;
-extern int shpchp_debug;
+extern bool shpchp_debug;
 extern struct workqueue_struct *shpchp_wq;
 extern struct workqueue_struct *shpchp_ordered_wq;
 
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index dd7e0c51a33e..7414fd9ad1d2 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -36,8 +36,8 @@
 #include "shpchp.h"
 
 /* Global variables */
-int shpchp_debug;
-int shpchp_poll_mode;
+bool shpchp_debug;
+bool shpchp_poll_mode;
 int shpchp_poll_time;
 struct workqueue_struct *shpchp_wq;
 struct workqueue_struct *shpchp_ordered_wq;
diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c
index 95489cd9a555..52229863e9fe 100644
--- a/drivers/pci/pcie/aer/aer_inject.c
+++ b/drivers/pci/pcie/aer/aer_inject.c
@@ -28,7 +28,7 @@
 #include "aerdrv.h"
 
 /* Override the existing corrected and uncorrected error masks */
-static int aer_mask_override;
+static bool aer_mask_override;
 module_param(aer_mask_override, bool, 0);
 
 struct aer_error_inj {
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 9674e9f30d49..0ca053538146 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -27,8 +27,8 @@
 #include <linux/kfifo.h>
 #include "aerdrv.h"
 
-static int forceload;
-static int nosourceid;
+static bool forceload;
+static bool nosourceid;
 module_param(forceload, bool, 0);
 module_param(nosourceid, bool, 0);
 
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 9dc565c615bd..849c0c11d2af 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -24,15 +24,15 @@
 #include "yenta_socket.h"
 #include "i82365.h"
 
-static int disable_clkrun;
+static bool disable_clkrun;
 module_param(disable_clkrun, bool, 0444);
 MODULE_PARM_DESC(disable_clkrun, "If PC card doesn't function properly, please try this option");
 
-static int isa_probe = 1;
+static bool isa_probe = 1;
 module_param(isa_probe, bool, 0444);
 MODULE_PARM_DESC(isa_probe, "If set ISA interrupts are probed (default). Set to N to disable probing");
 
-static int pwr_irqs_off;
+static bool pwr_irqs_off;
 module_param(pwr_irqs_off, bool, 0644);
 MODULE_PARM_DESC(pwr_irqs_off, "Force IRQs off during power-on of slot. Use only when seeing IRQ storms!");
 
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 8877b836d27c..d96734478324 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -189,7 +189,7 @@ struct compal_data{
 /* =============== */
 /* General globals */
 /* =============== */
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Force driver load, ignore DMI data");
 
diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c
index 7f88c7923fc6..6ee0b5c90933 100644
--- a/drivers/platform/x86/intel_oaktrail.c
+++ b/drivers/platform/x86/intel_oaktrail.c
@@ -95,7 +95,7 @@
 #define OT_EC_BL_CONTROL_ON_DATA	0x1A
 
 
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Force driver load, ignore DMI data");
 
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index f204643c5052..bb5132128b33 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -89,7 +89,7 @@ static int msi_laptop_resume(struct platform_device *device);
 
 #define MSI_STANDARD_EC_DEVICES_EXISTS_ADDRESS	0x2f
 
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Force driver load, ignore DMI data");
 
diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index 09e26bfd4643..fd73ea89b857 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -228,12 +228,12 @@ static struct platform_device *sdev;
 static struct rfkill *rfk;
 static bool has_stepping_quirk;
 
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force,
 		"Disable the DMI check and forces the driver to be loaded");
 
-static int debug;
+static bool debug;
 module_param(debug, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(debug, "Debug enabled or not");
 
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 62533c105da4..ea0c6075b720 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -378,13 +378,13 @@ static unsigned int bright_maxlvl;	/* 0 = unknown */
 
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 static int dbg_wlswemul;
-static int tpacpi_wlsw_emulstate;
+static bool tpacpi_wlsw_emulstate;
 static int dbg_bluetoothemul;
-static int tpacpi_bluetooth_emulstate;
+static bool tpacpi_bluetooth_emulstate;
 static int dbg_wwanemul;
-static int tpacpi_wwan_emulstate;
+static bool tpacpi_wwan_emulstate;
 static int dbg_uwbemul;
-static int tpacpi_uwb_emulstate;
+static bool tpacpi_uwb_emulstate;
 #endif
 
 
@@ -6444,7 +6444,7 @@ static struct ibm_struct brightness_driver_data = {
 
 static int alsa_index = ~((1 << (SNDRV_CARDS - 3)) - 1); /* last three slots */
 static char *alsa_id = "ThinkPadEC";
-static int alsa_enable = SNDRV_DEFAULT_ENABLE1;
+static bool alsa_enable = SNDRV_DEFAULT_ENABLE1;
 
 struct tpacpi_alsa_data {
 	struct snd_card *card;
@@ -6487,7 +6487,7 @@ static enum tpacpi_volume_access_mode volume_mode =
 	TPACPI_VOL_MODE_MAX;
 
 static enum tpacpi_volume_capabilities volume_capabilities;
-static int volume_control_allowed;
+static bool volume_control_allowed;
 
 /*
  * Used to syncronize writers to TP_EC_AUDIO and
@@ -7265,7 +7265,7 @@ enum fan_control_commands {
 						 * and also watchdog cmd */
 };
 
-static int fan_control_allowed;
+static bool fan_control_allowed;
 
 static enum fan_status_access_mode fan_status_access_mode;
 static enum fan_control_access_mode fan_control_access_mode;
@@ -8437,7 +8437,7 @@ static struct proc_dir_entry *proc_dir;
  * Module and infrastructure proble, init and exit handling
  */
 
-static int force_load;
+static bool force_load;
 
 #ifdef CONFIG_THINKPAD_ACPI_DEBUG
 static const char * __init str_supported(int is_supported)
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index a134c26870b0..42a4dcc25f92 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -82,12 +82,12 @@ struct wmi_block {
 #define ACPI_WMI_STRING      0x4	/* GUID takes & returns a string */
 #define ACPI_WMI_EVENT       0x8	/* GUID is an event */
 
-static int debug_event;
+static bool debug_event;
 module_param(debug_event, bool, 0444);
 MODULE_PARM_DESC(debug_event,
 		 "Log WMI Events [0/1]");
 
-static int debug_dump_wdg;
+static bool debug_dump_wdg;
 module_param(debug_dump_wdg, bool, 0444);
 MODULE_PARM_DESC(debug_dump_wdg,
 		 "Dump available WMI interfaces [0/1]");
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 545874b1df9e..076e211a40b7 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -64,7 +64,7 @@ static unsigned int cache_time = 1000;
 module_param(cache_time, uint, 0644);
 MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
 
-static unsigned int pmod_enabled;
+static bool pmod_enabled;
 module_param(pmod_enabled, bool, 0644);
 MODULE_PARM_DESC(pmod_enabled, "PMOD enable bit");
 
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index e5cb9248a442..f3b8bb84faf2 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -75,7 +75,7 @@ static LIST_HEAD(raw3270_devices);
 static int raw3270_registered;
 
 /* Module parameters */
-static int tubxcorrect = 0;
+static bool tubxcorrect = 0;
 module_param(tubxcorrect, bool, 0);
 
 /*
diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c
index 11312f401c70..2211277a1079 100644
--- a/drivers/s390/char/vmwatchdog.c
+++ b/drivers/s390/char/vmwatchdog.c
@@ -28,9 +28,9 @@
 #define MAX_CMDLEN 240
 #define MIN_INTERVAL 15
 static char vmwdt_cmd[MAX_CMDLEN] = "IPL";
-static int vmwdt_conceal;
+static bool vmwdt_conceal;
 
-static int vmwdt_nowayout = WATCHDOG_NOWAYOUT;
+static bool vmwdt_nowayout = WATCHDOG_NOWAYOUT;
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 195823a51aab..ed119cedaae0 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -102,7 +102,7 @@ static int setup_dmaspeed[MAXBOARDS] __initdata = { -1, -1, -1, -1 };
  */
 
 #if defined(MODULE)
-static int isapnp = 0;
+static bool isapnp = 0;
 static int aha1542[] = {0x330, 11, 4, -1};
 module_param_array(aha1542, int, NULL, 0);
 module_param(isapnp, bool, 0);
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
index f5b718d3c31b..13aeca3d51f2 100644
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -546,7 +546,7 @@ static struct ParameterData __devinitdata cfg_data[] = {
  * command line overrides will be used. If set to 1 then safe and
  * slow settings will be used.
  */
-static int use_safe_settings = 0;
+static bool use_safe_settings = 0;
 module_param_named(safe, use_safe_settings, bool, 0);
 MODULE_PARM_DESC(safe, "Use safe and slow settings only. Default: false");
 
diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c
index f6a50c98c36f..002924963cd8 100644
--- a/drivers/scsi/nsp32.c
+++ b/drivers/scsi/nsp32.c
@@ -59,11 +59,11 @@ MODULE_PARM_DESC(trans_mode, "transfer mode (0: BIOS(default) 1: Async 2: Ultra2
 #define ASYNC_MODE    1
 #define ULTRA20M_MODE 2
 
-static int       auto_param = 0;	/* default: ON */
+static bool      auto_param = 0;	/* default: ON */
 module_param     (auto_param, bool, 0);
 MODULE_PARM_DESC(auto_param, "AutoParameter mode (0: ON(default) 1: OFF)");
 
-static int       disc_priv  = 1;	/* default: OFF */
+static bool      disc_priv  = 1;	/* default: OFF */
 module_param     (disc_priv, bool, 0);
 MODULE_PARM_DESC(disc_priv,  "disconnection privilege mode (0: ON 1: OFF(default))");
 
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index ca86721a71b9..b61a753eb896 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -70,7 +70,7 @@ module_param(nsp_burst_mode, int, 0);
 MODULE_PARM_DESC(nsp_burst_mode, "Burst transfer mode (0=io8, 1=io32, 2=mem32(default))");
 
 /* Release IO ports after configuration? */
-static int       free_ports = 0;
+static bool       free_ports = 0;
 module_param(free_ports, bool, 0);
 MODULE_PARM_DESC(free_ports, "Release IO ports after configuration? (default: 0 (=no))");
 
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 0d18d80bcd25..9bcf87ae4c00 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -61,7 +61,7 @@ EXPORT_SYMBOL(comedi_debug);
 module_param(comedi_debug, int, 0644);
 #endif
 
-int comedi_autoconfig = 1;
+bool comedi_autoconfig = 1;
 module_param(comedi_autoconfig, bool, 0444);
 
 static int comedi_num_legacy_minors;
diff --git a/drivers/staging/comedi/comedi_fops.h b/drivers/staging/comedi/comedi_fops.h
index da4b4f5553f5..006cf14c577a 100644
--- a/drivers/staging/comedi/comedi_fops.h
+++ b/drivers/staging/comedi/comedi_fops.h
@@ -1,10 +1,11 @@
 
 #ifndef _COMEDI_FOPS_H
 #define _COMEDI_FOPS_H
+#include <linux/types.h>
 
 extern struct class *comedi_class;
 extern const struct file_operations comedi_fops;
-extern int comedi_autoconfig;
+extern bool comedi_autoconfig;
 extern struct comedi_driver *comedi_drivers;
 
 #endif /* _COMEDI_FOPS_H */
diff --git a/drivers/staging/media/go7007/snd-go7007.c b/drivers/staging/media/go7007/snd-go7007.c
index deac938d8505..d071c838ac2a 100644
--- a/drivers/staging/media/go7007/snd-go7007.c
+++ b/drivers/staging/media/go7007/snd-go7007.c
@@ -38,7 +38,7 @@
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
-static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
 
 module_param_array(index, int, NULL, 0444);
 module_param_array(id, charp, NULL, 0444);
diff --git a/drivers/staging/media/lirc/lirc_bt829.c b/drivers/staging/media/lirc/lirc_bt829.c
index c5a0d27a02dc..4d20e9f74118 100644
--- a/drivers/staging/media/lirc/lirc_bt829.c
+++ b/drivers/staging/media/lirc/lirc_bt829.c
@@ -53,7 +53,7 @@ static unsigned char do_get_bits(void);
 
 #define DRIVER_NAME "lirc_bt829"
 
-static int debug;
+static bool debug;
 #define dprintk(fmt, args...)						 \
 	do {								 \
 		if (debug)						 \
diff --git a/drivers/staging/media/lirc/lirc_igorplugusb.c b/drivers/staging/media/lirc/lirc_igorplugusb.c
index 6cd4cd67a1dd..7a2501776679 100644
--- a/drivers/staging/media/lirc/lirc_igorplugusb.c
+++ b/drivers/staging/media/lirc/lirc_igorplugusb.c
@@ -62,9 +62,9 @@
 
 /* debugging support */
 #ifdef CONFIG_USB_DEBUG
-static int debug = 1;
+static bool debug = 1;
 #else
-static int debug;
+static bool debug;
 #endif
 
 #define dprintk(fmt, args...)					\
diff --git a/drivers/staging/media/lirc/lirc_parallel.c b/drivers/staging/media/lirc/lirc_parallel.c
index 02b07a6c1771..dd2bca7b56fa 100644
--- a/drivers/staging/media/lirc/lirc_parallel.c
+++ b/drivers/staging/media/lirc/lirc_parallel.c
@@ -63,8 +63,8 @@
 
 /*** Global Variables ***/
 
-static int debug;
-static int check_pselecd;
+static bool debug;
+static bool check_pselecd;
 
 unsigned int irq = LIRC_IRQ;
 unsigned int io = LIRC_PORT;
diff --git a/drivers/staging/media/lirc/lirc_serial.c b/drivers/staging/media/lirc/lirc_serial.c
index 8a060a8a7224..2aac67c98283 100644
--- a/drivers/staging/media/lirc/lirc_serial.c
+++ b/drivers/staging/media/lirc/lirc_serial.c
@@ -107,13 +107,13 @@ struct lirc_serial {
 static int type;
 static int io;
 static int irq;
-static int iommap;
+static bool iommap;
 static int ioshift;
-static int softcarrier = 1;
-static int share_irq;
-static int debug;
+static bool softcarrier = 1;
+static bool share_irq;
+static bool debug;
 static int sense = -1;	/* -1 = auto, 0 = active high, 1 = active low */
-static int txsense;	/* 0 = active high, 1 = active low */
+static bool txsense;	/* 0 = active high, 1 = active low */
 
 #define dprintk(fmt, args...)					\
 	do {							\
diff --git a/drivers/staging/media/lirc/lirc_sir.c b/drivers/staging/media/lirc/lirc_sir.c
index 6903d3992eca..c94382b917ac 100644
--- a/drivers/staging/media/lirc/lirc_sir.c
+++ b/drivers/staging/media/lirc/lirc_sir.c
@@ -173,7 +173,7 @@ static DEFINE_SPINLOCK(hardware_lock);
 static int rx_buf[RBUF_LEN];
 static unsigned int rx_tail, rx_head;
 
-static int debug;
+static bool debug;
 #define dprintk(fmt, args...)						\
 	do {								\
 		if (debug)						\
diff --git a/drivers/staging/media/lirc/lirc_zilog.c b/drivers/staging/media/lirc/lirc_zilog.c
index 0302d82a12f7..76ea4a8f2c75 100644
--- a/drivers/staging/media/lirc/lirc_zilog.c
+++ b/drivers/staging/media/lirc/lirc_zilog.c
@@ -155,8 +155,8 @@ static struct mutex tx_data_lock;
 #define zilog_info(s, args...) printk(KERN_INFO KBUILD_MODNAME ": " s, ## args)
 
 /* module parameters */
-static int debug;	/* debug output */
-static int tx_only;	/* only handle the IR Tx function */
+static bool debug;	/* debug output */
+static bool tx_only;	/* only handle the IR Tx function */
 static int minor = -1;	/* minor number */
 
 #define dprintk(fmt, args...)						\
diff --git a/drivers/staging/quatech_usb2/quatech_usb2.c b/drivers/staging/quatech_usb2/quatech_usb2.c
index 02fafecd4773..897a3a99c794 100644
--- a/drivers/staging/quatech_usb2/quatech_usb2.c
+++ b/drivers/staging/quatech_usb2/quatech_usb2.c
@@ -16,7 +16,7 @@
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
 
-static int debug;
+static bool debug;
 
 /* Version Information */
 #define DRIVER_VERSION "v2.00"
diff --git a/drivers/staging/serqt_usb2/serqt_usb2.c b/drivers/staging/serqt_usb2/serqt_usb2.c
index c44e41af2880..1c5780f1571b 100644
--- a/drivers/staging/serqt_usb2/serqt_usb2.c
+++ b/drivers/staging/serqt_usb2/serqt_usb2.c
@@ -16,7 +16,7 @@
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
 
-static int debug;
+static bool debug;
 
 /* Version Information */
 #define DRIVER_VERSION "v2.14"
diff --git a/drivers/staging/speakup/speakup.h b/drivers/staging/speakup/speakup.h
index 412b87947f66..e66579e6147a 100644
--- a/drivers/staging/speakup/speakup.h
+++ b/drivers/staging/speakup/speakup.h
@@ -116,7 +116,7 @@ extern int bleep_time, bell_pos;
 extern int spell_delay, key_echo;
 extern short punc_mask;
 extern short pitch_shift, synth_flags;
-extern int quiet_boot;
+extern bool quiet_boot;
 extern char *synth_name;
 extern struct bleep unprocessed_sound;
 
diff --git a/drivers/staging/speakup/synth.c b/drivers/staging/speakup/synth.c
index c241074a4b5e..2222d6919ef5 100644
--- a/drivers/staging/speakup/synth.c
+++ b/drivers/staging/speakup/synth.c
@@ -22,7 +22,7 @@ static struct spk_synth *synths[MAXSYNTHS];
 struct spk_synth *synth;
 char pitch_buff[32] = "";
 static int module_status;
-int quiet_boot;
+bool quiet_boot;
 
 struct speakup_info_t speakup_info = {
 	.spinlock = __SPIN_LOCK_UNLOCKED(speakup_info.spinlock),
diff --git a/drivers/staging/vme/bridges/vme_tsi148.c b/drivers/staging/vme/bridges/vme_tsi148.c
index 08a449b4abf9..f50582169b24 100644
--- a/drivers/staging/vme/bridges/vme_tsi148.c
+++ b/drivers/staging/vme/bridges/vme_tsi148.c
@@ -41,7 +41,7 @@ static void __exit tsi148_exit(void);
 
 
 /* Module parameter */
-static int err_chk;
+static bool err_chk;
 static int geoid;
 
 static const char driver_name[] = "vme_tsi148";
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index 6a1241c7f841..de88aa5566e5 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -118,7 +118,7 @@ static unsigned long board2;
 static unsigned long board3;
 static unsigned long board4;
 static unsigned long controller;
-static int support_low_speed;
+static bool support_low_speed;
 static unsigned long modem1;
 static unsigned long modem2;
 static unsigned long modem3;
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index e67fb20490d2..ff8017f87914 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -850,7 +850,7 @@ static int mgsl_device_count;
  * .text section address and breakpoint on module load.
  * This is useful for use with gdb and add-symbol-file command.
  */
-static int break_on_load;
+static bool break_on_load;
 
 /*
  * Driver major number, defaults to zero to get auto
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index 0f6b796c95c5..a7efe538df00 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -456,7 +456,7 @@ static int synclinkmp_device_count = 0;
  * .text section address and breakpoint on module load.
  * This is useful for use with gdb and add-symbol-file command.
  */
-static int break_on_load = 0;
+static bool break_on_load = 0;
 
 /*
  * Driver major number, defaults to zero to get auto
diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c
index b42092e1f164..98dd9e49b684 100644
--- a/drivers/usb/atm/speedtch.c
+++ b/drivers/usb/atm/speedtch.c
@@ -73,9 +73,9 @@ static const char speedtch_driver_name[] = "speedtch";
 #define DEFAULT_SW_BUFFERING	0
 
 static unsigned int altsetting = 0; /* zero means: use the default */
-static int dl_512_first = DEFAULT_DL_512_FIRST;
-static int enable_isoc = DEFAULT_ENABLE_ISOC;
-static int sw_buffering = DEFAULT_SW_BUFFERING;
+static bool dl_512_first = DEFAULT_DL_512_FIRST;
+static bool enable_isoc = DEFAULT_ENABLE_ISOC;
+static bool sw_buffering = DEFAULT_SW_BUFFERING;
 
 #define DEFAULT_B_MAX_DSL	8128
 #define DEFAULT_MODEM_MODE	11
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index 00f171a7a8a0..01ea5d7421d4 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -542,7 +542,7 @@ static int modem_index;
 static unsigned int debug;
 static unsigned int altsetting[NB_MODEM] = {
 				[0 ... (NB_MODEM - 1)] = FASTEST_ISO_INTF};
-static int sync_wait[NB_MODEM];
+static bool sync_wait[NB_MODEM];
 static char *cmv_file[NB_MODEM];
 static int annex[NB_MODEM];
 
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 3af5e2dd1d82..8df4b76465ac 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -93,7 +93,7 @@ struct async {
 	u8 bulk_status;
 };
 
-static int usbfs_snoop;
+static bool usbfs_snoop;
 module_param(usbfs_snoop, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(usbfs_snoop, "true to log all usbfs traffic");
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 79d339e2e700..a0613d8f9be7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -102,7 +102,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khubd_wait);
 static struct task_struct *khubd_task;
 
 /* cycle leds on hubs that aren't blinking for attention */
-static int blinkenlights = 0;
+static bool blinkenlights = 0;
 module_param (blinkenlights, bool, S_IRUGO);
 MODULE_PARM_DESC (blinkenlights, "true to cycle leds on hubs");
 
@@ -131,12 +131,12 @@ MODULE_PARM_DESC(initial_descriptor_timeout,
  * otherwise the new scheme is used.  If that fails and "use_both_schemes"
  * is set, then the driver will make another attempt, using the other scheme.
  */
-static int old_scheme_first = 0;
+static bool old_scheme_first = 0;
 module_param(old_scheme_first, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(old_scheme_first,
 		 "start with the old device initialization scheme");
 
-static int use_both_schemes = 1;
+static bool use_both_schemes = 1;
 module_param(use_both_schemes, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(use_both_schemes,
 		"try the other device initialization scheme if the "
@@ -2026,7 +2026,7 @@ static unsigned hub_is_wusb(struct usb_hub *hub)
 #define SET_ADDRESS_TRIES	2
 #define GET_DESCRIPTOR_TRIES	2
 #define SET_CONFIG_TRIES	(2 * (use_both_schemes + 1))
-#define USE_NEW_SCHEME(i)	((i) / 2 == old_scheme_first)
+#define USE_NEW_SCHEME(i)	((i) / 2 == (int)old_scheme_first)
 
 #define HUB_ROOT_RESET_TIME	50	/* times are in msec */
 #define HUB_SHORT_RESET_TIME	10
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 1382c90d0834..8ca9f994a280 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -47,7 +47,7 @@
 
 const char *usbcore_name = "usbcore";
 
-static int nousb;	/* Disable USB when built into kernel image */
+static bool nousb;	/* Disable USB when built into kernel image */
 
 #ifdef	CONFIG_USB_SUSPEND
 static int usb_autosuspend_delay = 2;		/* Default delay value,
diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c
index e9a2c5c44454..c16ff55a74e8 100644
--- a/drivers/usb/gadget/amd5536udc.c
+++ b/drivers/usb/gadget/amd5536udc.c
@@ -152,15 +152,15 @@ static const char *ep_string[] = {
 };
 
 /* DMA usage flag */
-static int use_dma = 1;
+static bool use_dma = 1;
 /* packet per buffer dma */
-static int use_dma_ppb = 1;
+static bool use_dma_ppb = 1;
 /* with per descr. update */
-static int use_dma_ppb_du;
+static bool use_dma_ppb_du;
 /* buffer fill mode */
 static int use_dma_bufferfill_mode;
 /* full speed only mode */
-static int use_fullspeed;
+static bool use_fullspeed;
 /* tx buffer size for high speed */
 static unsigned long hs_tx_buf = UDC_EPIN_BUFF_SIZE;
 
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 0cd764d59351..a28f6ffcd0f3 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -250,9 +250,9 @@ static struct usb_configuration rndis_config_driver = {
 /*-------------------------------------------------------------------------*/
 
 #ifdef CONFIG_USB_ETH_EEM
-static int use_eem = 1;
+static bool use_eem = 1;
 #else
-static int use_eem;
+static bool use_eem;
 #endif
 module_param(use_eem, bool, 0);
 MODULE_PARM_DESC(use_eem, "use CDC EEM mode");
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index e0f30fc70e45..47766f0e7caa 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -303,16 +303,16 @@ MODULE_LICENSE("Dual BSD/GPL");
 static struct {
 	char		*file[FSG_MAX_LUNS];
 	char		*serial;
-	int		ro[FSG_MAX_LUNS];
-	int		nofua[FSG_MAX_LUNS];
+	bool		ro[FSG_MAX_LUNS];
+	bool		nofua[FSG_MAX_LUNS];
 	unsigned int	num_filenames;
 	unsigned int	num_ros;
 	unsigned int	num_nofuas;
 	unsigned int	nluns;
 
-	int		removable;
-	int		can_stall;
-	int		cdrom;
+	bool		removable;
+	bool		can_stall;
+	bool		cdrom;
 
 	char		*transport_parm;
 	char		*protocol_parm;
diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c
index 4c81d540bc26..7322d293213e 100644
--- a/drivers/usb/gadget/net2272.c
+++ b/drivers/usb/gadget/net2272.c
@@ -69,7 +69,7 @@ static const char * const ep_name[] = {
  *
  * If use_dma is disabled, pio will be used instead.
  */
-static int use_dma = 0;
+static bool use_dma = 0;
 module_param(use_dma, bool, 0644);
 
 /*
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index cf1f36454d08..cdedd1336745 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -90,8 +90,8 @@ static const char *const ep_name [] = {
  * Some gadget drivers work better with the dma support here than others.
  * These two parameters let you use PIO or more aggressive DMA.
  */
-static int use_dma = 1;
-static int use_dma_chaining = 0;
+static bool use_dma = 1;
+static bool use_dma_chaining = 0;
 
 /* "modprobe net2280 use_dma=n" etc */
 module_param (use_dma, bool, S_IRUGO);
@@ -112,7 +112,7 @@ module_param (fifo_mode, ushort, 0644);
  * USB suspend requests will be ignored.  This is acceptable for
  * self-powered devices
  */
-static int enable_suspend = 0;
+static bool enable_suspend = 0;
 
 /* "modprobe net2280 enable_suspend=1" etc */
 module_param (enable_suspend, bool, S_IRUGO);
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 7db5bbe6251b..576cd8578b45 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -98,7 +98,7 @@ module_param (fifo_mode, uint, 0);
 MODULE_PARM_DESC (fifo_mode, "endpoint configuration");
 
 #ifdef	USE_DMA
-static unsigned use_dma = 1;
+static bool use_dma = 1;
 
 /* "modprobe omap_udc use_dma=y", or else as a kernel
  * boot parameter "omap_udc:use_dma=y"
diff --git a/drivers/usb/gadget/pch_udc.c b/drivers/usb/gadget/pch_udc.c
index dd2313cce1d3..a3fcaae4bc2a 100644
--- a/drivers/usb/gadget/pch_udc.c
+++ b/drivers/usb/gadget/pch_udc.c
@@ -359,7 +359,7 @@ struct pch_udc_dev {
 static const char	ep0_string[] = "ep0in";
 static DEFINE_SPINLOCK(udc_stall_spinlock);	/* stall spin lock */
 struct pch_udc_dev *pch_udc;		/* pointer to device object */
-static int speed_fs;
+static bool speed_fs;
 module_param_named(speed_fs, speed_fs, bool, S_IRUGO);
 MODULE_PARM_DESC(speed_fs, "true for Full speed operation");
 
diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c
index ed1b816e58d8..ad9e5b2df642 100644
--- a/drivers/usb/gadget/serial.c
+++ b/drivers/usb/gadget/serial.c
@@ -123,11 +123,11 @@ MODULE_AUTHOR("Al Borchers");
 MODULE_AUTHOR("David Brownell");
 MODULE_LICENSE("GPL");
 
-static int use_acm = true;
+static bool use_acm = true;
 module_param(use_acm, bool, 0);
 MODULE_PARM_DESC(use_acm, "Use CDC ACM, default=yes");
 
-static int use_obex = false;
+static bool use_obex = false;
 module_param(use_obex, bool, 0);
 MODULE_PARM_DESC(use_obex, "Use CDC OBEX, default=no");
 
diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c
index 20697cc132d1..31d34832907e 100644
--- a/drivers/usb/gadget/zero.c
+++ b/drivers/usb/gadget/zero.c
@@ -81,7 +81,7 @@ module_param(buflen, uint, 0);
  * work better with hosts where config changes are problematic or
  * controllers (like original superh) that only support one config.
  */
-static int loopdefault = 0;
+static bool loopdefault = 0;
 module_param(loopdefault, bool, S_IRUGO|S_IWUSR);
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index e311a511529b..a007a9fe0f87 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -112,7 +112,7 @@ module_param (park, uint, S_IRUGO);
 MODULE_PARM_DESC (park, "park setting; 1-3 back-to-back async packets");
 
 /* for flakey hardware, ignore overcurrent indicators */
-static int ignore_oc = 0;
+static bool ignore_oc = 0;
 module_param (ignore_oc, bool, S_IRUGO);
 MODULE_PARM_DESC (ignore_oc, "ignore bogus hardware overcurrent indications");
 
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 5f5a63241436..34b9edd86651 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -115,13 +115,13 @@ static inline void sb800_prefetch(struct ohci_hcd *ohci, int on)
 
 
 /* Some boards misreport power switching/overcurrent */
-static int distrust_firmware = 1;
+static bool distrust_firmware = 1;
 module_param (distrust_firmware, bool, 0);
 MODULE_PARM_DESC (distrust_firmware,
 	"true to distrust firmware power/overcurrent setup");
 
 /* Some boards leave IR set wrongly, since they fail BIOS/SMM handshakes */
-static int no_handshake = 0;
+static bool no_handshake = 0;
 module_param (no_handshake, bool, 0);
 MODULE_PARM_DESC (no_handshake, "true (not default) disables BIOS handshake");
 
diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
index 6f62de5c6e35..015c7c62ed49 100644
--- a/drivers/usb/host/oxu210hp-hcd.c
+++ b/drivers/usb/host/oxu210hp-hcd.c
@@ -233,7 +233,7 @@ module_param(park, uint, S_IRUGO);
 MODULE_PARM_DESC(park, "park setting; 1-3 back-to-back async packets");
 
 /* For flakey hardware, ignore overcurrent indicators */
-static int ignore_oc;
+static bool ignore_oc;
 module_param(ignore_oc, bool, S_IRUGO);
 MODULE_PARM_DESC(ignore_oc, "ignore bogus hardware overcurrent indications");
 
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 533d12cca371..16dd6a6abf00 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -74,7 +74,7 @@ MODULE_LICENSE("GPL");
 #define INT_MODULE_PARM(n, v) static int n = v;module_param(n, int, 0444)
 INT_MODULE_PARM(testing, 0);
 /* Some boards misreport power switching/overcurrent*/
-static int distrust_firmware = 1;
+static bool distrust_firmware = 1;
 module_param(distrust_firmware, bool, 0);
 MODULE_PARM_DESC(distrust_firmware, "true to distrust firmware power/overcurren"
 	"t setup");
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index c8ae199cfbb8..6b5eb1017e2c 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -59,7 +59,7 @@
 #define DRIVER_DESC "USB Universal Host Controller Interface driver"
 
 /* for flakey hardware, ignore overcurrent indicators */
-static int ignore_oc;
+static bool ignore_oc;
 module_param(ignore_oc, bool, S_IRUGO);
 MODULE_PARM_DESC(ignore_oc, "ignore hardware overcurrent indications");
 
diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c
index 2dbe600fbc11..a4a3c7cd4a11 100644
--- a/drivers/usb/misc/ftdi-elan.c
+++ b/drivers/usb/misc/ftdi-elan.c
@@ -53,7 +53,7 @@ MODULE_AUTHOR("Tony Olech");
 MODULE_DESCRIPTION("FTDI ELAN driver");
 MODULE_LICENSE("GPL");
 #define INT_MODULE_PARM(n, v) static int n = v;module_param(n, int, 0444)
-static int distrust_firmware = 1;
+static bool distrust_firmware = 1;
 module_param(distrust_firmware, bool, 0);
 MODULE_PARM_DESC(distrust_firmware, "true to distrust firmware power/overcurren"
         "t setup");
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 2453a39b4794..4fd0dc835ae5 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -62,7 +62,7 @@ MODULE_LICENSE("GPL");
 
 /* Module parameters */
 static DEFINE_MUTEX(iowarrior_mutex);
-static int debug = 0;
+static bool debug = 0;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "debug=1 enables debugging messages");
 
diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c
index 53be7aef6308..66bc376005d2 100644
--- a/drivers/usb/musb/cppi_dma.c
+++ b/drivers/usb/musb/cppi_dma.c
@@ -750,7 +750,7 @@ cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx)
  * So this module parameter lets the heuristic be disabled.  When using
  * gadgetfs, the heuristic will probably need to be disabled.
  */
-static int cppi_rx_rndis = 1;
+static bool cppi_rx_rndis = 1;
 
 module_param(cppi_rx_rndis, bool, 0);
 MODULE_PARM_DESC(cppi_rx_rndis, "enable/disable RX RNDIS heuristic");
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index f6ff7923048b..56cf0243979e 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1586,7 +1586,7 @@ irqreturn_t musb_interrupt(struct musb *musb)
 EXPORT_SYMBOL_GPL(musb_interrupt);
 
 #ifndef CONFIG_MUSB_PIO_ONLY
-static int __initdata use_dma = 1;
+static bool __initdata use_dma = 1;
 
 /* "modprobe ... use_dma=0" etc */
 module_param(use_dma, bool, 0);
diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c
index b43d07df4c44..123bf9155339 100644
--- a/drivers/usb/serial/aircable.c
+++ b/drivers/usb/serial/aircable.c
@@ -52,7 +52,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 /* Vendor and Product ID */
 #define AIRCABLE_VID		0x16CA
diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index 18e875b92e00..69328dcfd91a 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -37,7 +37,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 
-static int debug;
+static bool debug;
 /*
  * Version information
  */
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index f9f29b289f2f..29ffeb6279c7 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -37,7 +37,7 @@
 #include <linux/usb/serial.h>
 #include "belkin_sa.h"
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 0e77511060c0..5e53cc59e652 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -70,7 +70,7 @@
 #define CH341_NBREAK_BITS_REG2 0x40
 
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x4348, 0x5523) },
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index adfe660ed008..fba1147ed916 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -49,7 +49,7 @@ static void cp210x_break_ctl(struct tty_struct *, int);
 static int cp210x_startup(struct usb_serial *);
 static void cp210x_dtr_rts(struct usb_serial_port *p, int on);
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index 98bf83349838..6bc3802a581a 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -43,7 +43,7 @@
 
 #define CYBERJACK_LOCAL_BUF_SIZE 32
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index 07680d6b792b..3bdeafa29c24 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -46,10 +46,10 @@
 #include "cypress_m8.h"
 
 
-static int debug;
-static int stats;
+static bool debug;
+static bool stats;
 static int interval;
-static int unstable_bauds;
+static bool unstable_bauds;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 6d26a77d0f2a..b23bebd721a1 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -251,7 +251,7 @@ static int digi_read_oob_callback(struct urb *urb);
 
 /* Statics */
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table_combined[] = {
 	{ USB_DEVICE(DIGI_VENDOR_ID, DIGI_2_ID) },
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index 504b5585ea45..aced6817bf95 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -28,7 +28,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index c290df97108e..01b6404df395 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -55,7 +55,7 @@
 #define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Bill Ryder <bryder@sgi.com>, Kuba Ober <kuba@mareimbrium.org>, Andreas Mohr, Johan Hovold <jhovold@gmail.com>"
 #define DRIVER_DESC "USB FTDI Serial Converters Driver"
 
-static int debug;
+static bool debug;
 static __u16 vendor = FTDI_VID;
 static __u16 product;
 
diff --git a/drivers/usb/serial/funsoft.c b/drivers/usb/serial/funsoft.c
index e21ce9ddfc63..5d4b099dcf8b 100644
--- a/drivers/usb/serial/funsoft.c
+++ b/drivers/usb/serial/funsoft.c
@@ -16,7 +16,7 @@
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x1404, 0xcddc) },
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index bf12565f8e87..21343378c322 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -42,7 +42,7 @@
 static int initial_mode = 1;
 
 /* debug flag */
-static int debug;
+static bool debug;
 
 #define GARMIN_VENDOR_ID             0x091E
 
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index abd2ee2b2f99..0497575e4799 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -191,7 +191,7 @@ static const struct divisor_table_entry divisor_table[] = {
 };
 
 /* local variables */
-static int debug;
+static bool debug;
 
 static atomic_t CmdUrbs;	/* Number of outstanding Command Write Urbs */
 
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index e44d375edaad..65bf06aa591a 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -210,10 +210,10 @@ static unsigned char OperationalMajorVersion;
 static unsigned char OperationalMinorVersion;
 static unsigned short OperationalBuildNumber;
 
-static int debug;
+static bool debug;
 
 static int closing_wait = EDGE_CLOSING_WAIT;
-static int ignore_cpu_rev;
+static bool ignore_cpu_rev;
 static int default_uart_mode;		/* RS232 */
 
 static void edge_tty_recv(struct device *dev, struct tty_struct *tty,
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index 36f5cbe90485..06053a920dd8 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -34,7 +34,7 @@
 #define DRIVER_DESC "USB PocketPC PDA driver"
 
 static __u16 product, vendor;
-static int debug;
+static bool debug;
 static int connect_retries = KP_RETRIES;
 static int initial_wait;
 
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index 5170799d6e94..6f9356f3f99e 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -147,7 +147,7 @@ static struct usb_driver usb_ipw_driver = {
 	.no_dynamic_id = 	1,
 };
 
-static int debug;
+static bool debug;
 
 static int ipw_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 0c537da0d3cd..84a396e83671 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -45,7 +45,7 @@
 #define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Johan Hovold <jhovold@gmail.com>"
 #define DRIVER_DESC "USB IR Dongle driver"
 
-static int debug;
+static bool debug;
 
 /* if overridden by the user, then use their value for the size of the read and
  * write urbs */
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 64d0ffd4440b..3077a4436976 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -34,9 +34,9 @@
 
 
 #ifdef CONFIG_USB_SERIAL_DEBUG
-static int debug = 1;
+static bool debug = 1;
 #else
-static int debug;
+static bool debug;
 #endif
 
 /*
@@ -65,7 +65,7 @@ static int clockmode = 1;
 static int cdmode = 1;
 static int iuu_cardin;
 static int iuu_cardout;
-static int xmas;
+static bool xmas;
 static int vcc_default = 5;
 
 static void read_rxcmd_callback(struct urb *urb);
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index bc8dc203e818..4cc36c761801 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -45,7 +45,7 @@
 #include <linux/usb/serial.h>
 #include "keyspan.h"
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index a40615674a68..7c62a7048302 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -31,7 +31,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 /* make a simple define to handle if we are compiling keyspan_pda or xircom support */
 #if defined(CONFIG_USB_SERIAL_KEYSPAN_PDA) || defined(CONFIG_USB_SERIAL_KEYSPAN_PDA_MODULE)
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index 19373cb7c5bf..fc064e1442ca 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -49,7 +49,7 @@
 #include <linux/usb/serial.h>
 #include "kl5kusb105.h"
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index a975bb80303f..27fa9c8a77b0 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -45,7 +45,7 @@
 #define DRIVER_AUTHOR "Wolfgang Grandegger <wolfgang@ces.ch>"
 #define DRIVER_DESC "Magic Control Technology USB-RS232 converter driver"
 
-static int debug;
+static bool debug;
 
 /*
  * Function prototypes
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 19d112f51b97..4554ee49e635 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -71,7 +71,7 @@ struct moschip_port {
 	struct urb		*write_urb_pool[NUM_URBS];
 };
 
-static int debug;
+static bool debug;
 
 static struct usb_serial_driver moschip7720_2port_driver;
 
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 55cfd6265b98..03b5e249e95e 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -263,7 +263,7 @@ struct moschip_port {
 };
 
 
-static int debug;
+static bool debug;
 
 /*
  * mos7840_set_reg_sync
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index 1f00f243c26c..b28f1db0195f 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -21,7 +21,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x0a99, 0x0001) },	/* Talon Technology device */
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 45a8c55881d3..8b8d58a2ac12 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -23,7 +23,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index 691f57a9d712..262ded9e076b 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -32,7 +32,7 @@
  * an examples of 1D barcode types are EAN, UPC, Code39, IATA etc.. */
 #define DRIVER_DESC	"Opticon USB barcode to serial driver (1D)"
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x065a, 0x0009) },
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index c96b6b6509fb..420d9857394a 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1234,7 +1234,7 @@ static struct usb_serial_driver option_1port_device = {
 #endif
 };
 
-static int debug;
+static bool debug;
 
 /* per port private data */
 
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index 2161d1c3c089..e287fd32682c 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -74,7 +74,7 @@ static struct usb_driver oti6858_driver = {
 	.no_dynamic_id = 	1,
 };
 
-static int debug;
+static bool debug;
 
 /* requests */
 #define	OTI6858_REQ_GET_STATUS		(USB_DIR_IN | USB_TYPE_VENDOR | 0x00)
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 329295615d06..3d8cda57ce7a 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -36,7 +36,7 @@
  */
 #define DRIVER_DESC "Prolific PL2303 USB to serial adaptor driver"
 
-static int debug;
+static bool debug;
 
 #define PL2303_CLOSING_WAIT	(30*HZ)
 
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index aa9367f5b421..1d5deee3be52 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -22,7 +22,7 @@
 #define DRIVER_AUTHOR "Qualcomm Inc"
 #define DRIVER_DESC "Qualcomm USB Serial driver"
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{USB_DEVICE(0x05c6, 0x9211)},	/* Acer Gobi QDL device */
diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index a36e2313eed0..d074b3740dcb 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -81,9 +81,9 @@
 #define CONFIG_USB_SERIAL_SAFE_PADDED 0
 #endif
 
-static int debug;
-static int safe = 1;
-static int padded = CONFIG_USB_SERIAL_SAFE_PADDED;
+static bool debug;
+static bool safe = 1;
+static bool padded = CONFIG_USB_SERIAL_SAFE_PADDED;
 
 #define DRIVER_VERSION "v0.1"
 #define DRIVER_AUTHOR "sl@lineo.com, tbr@lineo.com, Johan Hovold <jhovold@gmail.com>"
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index f2485429172f..fdae0a4407cb 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -46,8 +46,8 @@
    allocations > PAGE_SIZE and the number of packets in a page
    is an integer 512 is the largest possible packet on EHCI */
 
-static int debug;
-static int nmea;
+static bool debug;
+static bool nmea;
 
 /* Used in interface blacklisting */
 struct sierra_iface_info {
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 180ea6c7911c..d7f5eee18f00 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -33,7 +33,7 @@
 #define DRIVER_VERSION	"v0.10"
 #define DRIVER_DESC 	"SPCP8x5 USB to serial adaptor driver"
 
-static int debug;
+static bool debug;
 
 #define SPCP8x5_007_VID		0x04FC
 #define SPCP8x5_007_PID		0x0201
diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c
index 87362e48796e..7697858d8858 100644
--- a/drivers/usb/serial/ssu100.c
+++ b/drivers/usb/serial/ssu100.c
@@ -46,7 +46,7 @@
 #define FULLPWRBIT          0x00000080
 #define NEXT_BOARD_POWER_BIT        0x00000004
 
-static int debug;
+static bool debug;
 
 /* Version Information */
 #define DRIVER_VERSION "v0.1"
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index c70cc012d03f..50651cf4fc61 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -20,7 +20,7 @@
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x05e0, 0x0600) },
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 4af21f46096e..8468eb769a29 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -150,7 +150,7 @@ static int ti_download_firmware(struct ti_device *tdev);
 /* Data */
 
 /* module parameters */
-static int debug;
+static bool debug;
 static int closing_wait = TI_DEFAULT_CLOSING_WAIT;
 static ushort vendor_3410[TI_EXTRA_VID_PID_COUNT];
 static unsigned int vendor_3410_count;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index ce6c1a65a544..611b206591cb 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -61,7 +61,7 @@ static struct usb_driver usb_serial_driver = {
    drivers depend on it.
 */
 
-static int debug;
+static bool debug;
 /* initially all NULL */
 static struct usb_serial *serial_table[SERIAL_TTY_MINORS];
 static DEFINE_MUTEX(table_lock);
diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
index d555ca9567b8..c88657dd31c8 100644
--- a/drivers/usb/serial/usb_wwan.c
+++ b/drivers/usb/serial/usb_wwan.c
@@ -37,7 +37,7 @@
 #include <linux/serial.h>
 #include "usb-wwan.h"
 
-static int debug;
+static bool debug;
 
 void usb_wwan_dtr_rts(struct usb_serial_port *port, int on)
 {
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 1c11959a7d58..210e4b10dc11 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -52,7 +52,7 @@ static int palm_os_4_probe(struct usb_serial *serial,
 					const struct usb_device_id *id);
 
 /* Parameters that may be passed into the module. */
-static int debug;
+static bool debug;
 static __u16 vendor;
 static __u16 product;
 
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 11af903cb09f..7e0acf5c8e38 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -36,7 +36,7 @@
 #include <linux/ihex.h>
 #include "whiteheat.h"			/* WhiteHEAT specific commands */
 
-static int debug;
+static bool debug;
 
 #ifndef CMSPAR
 #define CMSPAR 0
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 44bdce4242ad..622f12b62a47 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -301,9 +301,9 @@ static struct fb_ops atyfb_ops = {
 	.fb_sync	= atyfb_sync,
 };
 
-static int noaccel;
+static bool noaccel;
 #ifdef CONFIG_MTRR
-static int nomtrr;
+static bool nomtrr;
 #endif
 static int vram;
 static int pll;
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 150684882ef7..ce1506b75adf 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -263,19 +263,19 @@ static reg_val common_regs[] = {
         
 static char *mode_option;
 static char *monitor_layout;
-static int noaccel = 0;
+static bool noaccel = 0;
 static int default_dynclk = -2;
-static int nomodeset = 0;
-static int ignore_edid = 0;
-static int mirror = 0;
+static bool nomodeset = 0;
+static bool ignore_edid = 0;
+static bool mirror = 0;
 static int panel_yres = 0;
-static int force_dfp = 0;
-static int force_measure_pll = 0;
+static bool force_dfp = 0;
+static bool force_measure_pll = 0;
 #ifdef CONFIG_MTRR
-static int nomtrr = 0;
+static bool nomtrr = 0;
 #endif
-static int force_sleep;
-static int ignore_devlist;
+static bool force_sleep;
+static bool ignore_devlist;
 #ifdef CONFIG_PMAC_BACKLIGHT
 static int backlight = 1;
 #else
diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c
index 6df7c54db0a3..6fb499e7678f 100644
--- a/drivers/video/cirrusfb.c
+++ b/drivers/video/cirrusfb.c
@@ -350,7 +350,7 @@ struct cirrusfb_info {
 	void (*unmap)(struct fb_info *info);
 };
 
-static int noaccel __devinitdata;
+static bool noaccel __devinitdata;
 static char *mode_option __devinitdata = "640x480@60";
 
 /****************************************************************************/
diff --git a/drivers/video/hgafb.c b/drivers/video/hgafb.c
index 4394389caf68..c645f9282650 100644
--- a/drivers/video/hgafb.c
+++ b/drivers/video/hgafb.c
@@ -133,7 +133,7 @@ static struct fb_fix_screeninfo hga_fix __devinitdata = {
 /* Don't assume that tty1 will be the initial current console. */
 static int release_io_port = 0;
 static int release_io_ports = 0;
-static int nologo = 0;
+static bool nologo = 0;
 
 /* -------------------------------------------------------------------------
  *
diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c
index 5ba399991050..c94c91fd8668 100644
--- a/drivers/video/intelfb/intelfbdrv.c
+++ b/drivers/video/intelfb/intelfbdrv.c
@@ -230,15 +230,15 @@ MODULE_DESCRIPTION("Framebuffer driver for Intel(R) " SUPPORTED_CHIPSETS
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DEVICE_TABLE(pci, intelfb_pci_table);
 
-static int accel        = 1;
+static bool accel       = 1;
 static int vram         = 4;
-static int hwcursor     = 0;
-static int mtrr         = 1;
-static int fixed        = 0;
-static int noinit       = 0;
-static int noregister   = 0;
-static int probeonly    = 0;
-static int idonly       = 0;
+static bool hwcursor    = 0;
+static bool mtrr        = 1;
+static bool fixed       = 0;
+static bool noinit      = 0;
+static bool noregister  = 0;
+static bool probeonly   = 0;
+static bool idonly      = 0;
 static int bailearly    = 0;
 static int voffset	= 48;
 static char *mode       = NULL;
diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c
index ea7a8ccc830c..080c35b34bbb 100644
--- a/drivers/video/logo/logo.c
+++ b/drivers/video/logo/logo.c
@@ -21,7 +21,7 @@
 #include <asm/bootinfo.h>
 #endif
 
-static int nologo;
+static bool nologo;
 module_param(nologo, bool, 0);
 MODULE_PARM_DESC(nologo, "Disables startup logo");
 
diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index feea7b1dc386..fb3f67391105 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -84,11 +84,11 @@
 
 /* --------------------------------------------------------------------- */
 
-static int internal;
-static int external;
-static int libretto;
-static int nostretch;
-static int nopciburst;
+static bool internal;
+static bool external;
+static bool libretto;
+static bool nostretch;
+static bool nopciburst;
 static char *mode_option __devinitdata = NULL;
 
 #ifdef MODULE
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index 25d8e5103193..b291bfaac80e 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -47,9 +47,9 @@ static unsigned int	def_rotate;
 static unsigned int	def_mirror;
 
 #ifdef CONFIG_FB_OMAP_MANUAL_UPDATE
-static int		manual_update = 1;
+static bool		manual_update = 1;
 #else
-static int		manual_update;
+static bool		manual_update;
 #endif
 
 static struct platform_device	*fbdev_pdev;
diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c
index 86ec12e16c7c..da7b18576549 100644
--- a/drivers/video/omap2/dss/core.c
+++ b/drivers/video/omap2/dss/core.c
@@ -50,7 +50,7 @@ module_param_named(def_disp, def_disp_name, charp, 0);
 MODULE_PARM_DESC(def_disp, "default display name");
 
 #ifdef DEBUG
-unsigned int dss_debug;
+bool dss_debug;
 module_param_named(debug, dss_debug, bool, 0644);
 #endif
 
diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c
index 5abf8e7e7456..46f37883e499 100644
--- a/drivers/video/omap2/dss/dsi.c
+++ b/drivers/video/omap2/dss/dsi.c
@@ -340,8 +340,8 @@ struct dsi_packet_sent_handler_data {
 static struct platform_device *dsi_pdev_map[MAX_NUM_DSI];
 
 #ifdef DEBUG
-static unsigned int dsi_perf;
-module_param_named(dsi_perf, dsi_perf, bool, 0644);
+static bool dsi_perf;
+module_param(dsi_perf, bool, 0644);
 #endif
 
 static inline struct dsi_data *dsi_get_dsidrv_data(struct platform_device *dsidev)
diff --git a/drivers/video/omap2/dss/dss.h b/drivers/video/omap2/dss/dss.h
index 6308fc59fc9e..57a52eecee91 100644
--- a/drivers/video/omap2/dss/dss.h
+++ b/drivers/video/omap2/dss/dss.h
@@ -28,7 +28,7 @@
 #endif
 
 #ifdef DEBUG
-extern unsigned int dss_debug;
+extern bool dss_debug;
 #ifdef DSS_SUBSYS_NAME
 #define DSSDBG(format, ...) \
 	if (dss_debug) \
diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
index 70aa47de7146..68ba1f800082 100644
--- a/drivers/video/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/omap2/omapfb/omapfb-main.c
@@ -43,18 +43,18 @@
 
 static char *def_mode;
 static char *def_vram;
-static int def_vrfb;
+static bool def_vrfb;
 static int def_rotate;
-static int def_mirror;
+static bool def_mirror;
 static bool auto_update;
 static unsigned int auto_update_freq;
 module_param(auto_update, bool, 0);
 module_param(auto_update_freq, uint, 0644);
 
 #ifdef DEBUG
-unsigned int omapfb_debug;
+bool omapfb_debug;
 module_param_named(debug, omapfb_debug, bool, 0644);
-static unsigned int omapfb_test_pattern;
+static bool omapfb_test_pattern;
 module_param_named(test, omapfb_test_pattern, bool, 0644);
 #endif
 
diff --git a/drivers/video/omap2/omapfb/omapfb.h b/drivers/video/omap2/omapfb/omapfb.h
index fdf0edeccf4e..e12d384ea520 100644
--- a/drivers/video/omap2/omapfb/omapfb.h
+++ b/drivers/video/omap2/omapfb/omapfb.h
@@ -32,7 +32,7 @@
 #include <video/omapdss.h>
 
 #ifdef DEBUG
-extern unsigned int omapfb_debug;
+extern bool omapfb_debug;
 #define DBG(format, ...) \
 	do { \
 		if (omapfb_debug) \
diff --git a/drivers/video/pm2fb.c b/drivers/video/pm2fb.c
index dc7bfa91e57a..df31a24a5026 100644
--- a/drivers/video/pm2fb.c
+++ b/drivers/video/pm2fb.c
@@ -78,12 +78,12 @@ static char *mode_option __devinitdata;
  * these flags allow the user to specify that requests for +ve sync
  * should be silently turned in -ve sync.
  */
-static int lowhsync;
-static int lowvsync;
-static int noaccel __devinitdata;
+static bool lowhsync;
+static bool lowvsync;
+static bool noaccel __devinitdata;
 /* mtrr option */
 #ifdef CONFIG_MTRR
-static int nomtrr __devinitdata;
+static bool nomtrr __devinitdata;
 #endif
 
 /*
diff --git a/drivers/video/pm3fb.c b/drivers/video/pm3fb.c
index 6632ee5ecb7e..055e527a8e45 100644
--- a/drivers/video/pm3fb.c
+++ b/drivers/video/pm3fb.c
@@ -57,11 +57,11 @@
  */
 static int hwcursor = 1;
 static char *mode_option __devinitdata;
-static int noaccel __devinitdata;
+static bool noaccel __devinitdata;
 
 /* mtrr option */
 #ifdef CONFIG_MTRR
-static int nomtrr __devinitdata;
+static bool nomtrr __devinitdata;
 #endif
 
 /*
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index d8ab7be4fd6b..2f58cf9c813b 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -207,9 +207,9 @@ MODULE_DEVICE_TABLE(pci, rivafb_pci_tbl);
 /* command line data, set in rivafb_setup() */
 static int flatpanel __devinitdata = -1; /* Autodetect later */
 static int forceCRTC __devinitdata = -1;
-static int noaccel   __devinitdata = 0;
+static bool noaccel  __devinitdata = 0;
 #ifdef CONFIG_MTRR
-static int nomtrr __devinitdata = 0;
+static bool nomtrr __devinitdata = 0;
 #endif
 #ifdef CONFIG_PMAC_BACKLIGHT
 static int backlight __devinitdata = 1;
@@ -218,7 +218,7 @@ static int backlight __devinitdata = 0;
 #endif
 
 static char *mode_option __devinitdata = NULL;
-static int  strictmode       = 0;
+static bool strictmode       = 0;
 
 static struct fb_fix_screeninfo __devinitdata rivafb_fix = {
 	.type		= FB_TYPE_PACKED_PIXELS,
diff --git a/drivers/video/smscufx.c b/drivers/video/smscufx.c
index 3c22994ea31a..ccbfef5e828f 100644
--- a/drivers/video/smscufx.c
+++ b/drivers/video/smscufx.c
@@ -130,8 +130,8 @@ static struct usb_device_id id_table[] = {
 MODULE_DEVICE_TABLE(usb, id_table);
 
 /* module options */
-static int console;   /* Optionally allow fbcon to consume first framebuffer */
-static int fb_defio = true;  /* Optionally enable fb_defio mmap support */
+static bool console;   /* Optionally allow fbcon to consume first framebuffer */
+static bool fb_defio = true;  /* Optionally enable fb_defio mmap support */
 
 /* ufx keeps a list of urbs for efficient bulk transfers */
 static void ufx_urb_completion(struct urb *urb);
diff --git a/drivers/video/sstfb.c b/drivers/video/sstfb.c
index 2301c275d63a..111fb32e8769 100644
--- a/drivers/video/sstfb.c
+++ b/drivers/video/sstfb.c
@@ -93,11 +93,11 @@
 
 /* initialized by setup */
 
-static int vgapass;		/* enable VGA passthrough cable */
+static bool vgapass;		/* enable VGA passthrough cable */
 static int mem;			/* mem size in MB, 0 = autodetect */
-static int clipping = 1;	/* use clipping (slower, safer) */
+static bool clipping = 1;	/* use clipping (slower, safer) */
 static int gfxclk;		/* force FBI freq in Mhz . Dangerous */
-static int slowpci;		/* slow PCI settings */
+static bool slowpci;		/* slow PCI settings */
 
 /*
   Possible default video modes: 800x600@60, 640x480@75, 1024x768@76, 640x480@60
diff --git a/drivers/video/tdfxfb.c b/drivers/video/tdfxfb.c
index a99b994c9b6b..e026724a3a56 100644
--- a/drivers/video/tdfxfb.c
+++ b/drivers/video/tdfxfb.c
@@ -169,7 +169,7 @@ static int nowrap = 1;      /* not implemented (yet) */
 static int hwcursor = 1;
 static char *mode_option __devinitdata;
 /* mtrr option */
-static int nomtrr __devinitdata;
+static bool nomtrr __devinitdata;
 
 /* -------------------------------------------------------------------------
  *			Hardware-specific funcions
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
index 1f868d0187a2..a19773149bd7 100644
--- a/drivers/video/udlfb.c
+++ b/drivers/video/udlfb.c
@@ -69,9 +69,9 @@ static struct usb_device_id id_table[] = {
 MODULE_DEVICE_TABLE(usb, id_table);
 
 /* module options */
-static int console = 1; /* Allow fbcon to open framebuffer */
-static int fb_defio = 1;  /* Detect mmap writes using page faults */
-static int shadow = 1; /* Optionally disable shadow framebuffer */
+static bool console = 1; /* Allow fbcon to open framebuffer */
+static bool fb_defio = 1;  /* Detect mmap writes using page faults */
+static bool shadow = 1; /* Optionally disable shadow framebuffer */
 
 /* dlfb keeps a list of urbs for efficient bulk transfers */
 static void dlfb_urb_completion(struct urb *urb);
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 7f8472cc993b..e7f69ef572dc 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -44,11 +44,11 @@ static struct fb_fix_screeninfo uvesafb_fix __devinitdata = {
 };
 
 static int mtrr		__devinitdata = 3; /* enable mtrr by default */
-static int blank	= 1;		   /* enable blanking by default */
+static bool blank	= 1;		   /* enable blanking by default */
 static int ypan		= 1; 		 /* 0: scroll, 1: ypan, 2: ywrap */
 static bool pmi_setpal	__devinitdata = true; /* use PMI for palette changes */
-static int nocrtc	__devinitdata; /* ignore CRTC settings */
-static int noedid	__devinitdata; /* don't try DDC transfers */
+static bool nocrtc	__devinitdata; /* ignore CRTC settings */
+static bool noedid	__devinitdata; /* don't try DDC transfers */
 static int vram_remap	__devinitdata; /* set amt. of memory to be used */
 static int vram_total	__devinitdata; /* set total amount of memory */
 static u16 maxclk	__devinitdata; /* maximum pixel clock */
diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index bf2f78065cf9..501a922aa9dc 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -110,7 +110,7 @@ static struct fb_fix_screeninfo vfb_fix __devinitdata = {
 	.accel =	FB_ACCEL_NONE,
 };
 
-static int vfb_enable __initdata = 0;	/* disabled by default */
+static bool vfb_enable __initdata = 0;	/* disabled by default */
 module_param(vfb_enable, bool, 0);
 
 static int vfb_check_var(struct fb_var_screeninfo *var,
diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
index d4d8d1fdccc4..e45ca2b4bfbe 100644
--- a/drivers/watchdog/f71808e_wdt.c
+++ b/drivers/watchdog/f71808e_wdt.c
@@ -100,7 +100,7 @@ MODULE_PARM_DESC(f71862fg_pin,
 	"Watchdog f71862fg reset output pin configuration. Choose pin 56 or 63"
 			" (default=" __MODULE_STRING(WATCHDOG_F71862FG_PIN)")");
 
-static int nowayout = WATCHDOG_NOWAYOUT;
+static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0444);
 MODULE_PARM_DESC(nowayout, "Disable watchdog shutdown on close");
 
diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
index eed5436ffb51..20feb4d3d791 100644
--- a/drivers/watchdog/mpc8xxx_wdt.c
+++ b/drivers/watchdog/mpc8xxx_wdt.c
@@ -55,7 +55,7 @@ module_param(timeout, ushort, 0);
 MODULE_PARM_DESC(timeout,
 	"Watchdog timeout in ticks. (0<timeout<65536, default=65535)");
 
-static int reset = 1;
+static bool reset = 1;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset,
 	"Watchdog Interrupt/Reset Mode. 0 = interrupt, 1 = reset");
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 52fed16d8701..30d7be026c18 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -16,7 +16,7 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
-static int permissive;
+static bool permissive;
 module_param(permissive, bool, 0644);
 
 /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 8e1c44d8ab46..d5dcf8d5d3d9 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -16,7 +16,7 @@
 #define INVALID_EVTCHN_IRQ  (-1)
 struct workqueue_struct *xen_pcibk_wq;
 
-static int __read_mostly passthrough;
+static bool __read_mostly passthrough;
 module_param(passthrough, bool, S_IRUGO);
 MODULE_PARM_DESC(passthrough,
 	"Option to specify how to export PCI topology to guest:\n"\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 23d7451b2938..65ba36b80a9e 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -55,7 +55,7 @@ static				DEFINE_SPINLOCK(nsm_lock);
  * Local NSM state
  */
 u32	__read_mostly		nsm_local_state;
-int	__read_mostly		nsm_use_hostnames;
+bool	__read_mostly		nsm_use_hostnames;
 
 static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
 {
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 277dfaf2e99a..31778f74357d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -84,7 +84,7 @@ retry:
 /*
  * Turn off NFSv4 uid/gid mapping when using AUTH_SYS
  */
-static int nfs4_disable_idmapping = 1;
+static bool nfs4_disable_idmapping = true;
 
 /*
  * RPC cruft for NFS
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 25c3bfad7953..f649fba8c384 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -57,7 +57,7 @@
 #define NFS_64_BIT_INODE_NUMBERS_ENABLED	1
 
 /* Default is to see 64-bit inode numbers */
-static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
+static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
 
 static void nfs_invalidate_inode(struct inode *);
 static int nfs_update_inode(struct inode *, struct nfs_fattr *);
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index f554a9313b43..7762bc2d8404 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -66,7 +66,7 @@ extern u8 acpi_gbl_create_osi_method;
 extern u8 acpi_gbl_use_default_register_widths;
 extern acpi_name acpi_gbl_trace_method_name;
 extern u32 acpi_gbl_trace_flags;
-extern u32 acpi_gbl_enable_aml_debug_object;
+extern bool acpi_gbl_enable_aml_debug_object;
 extern u8 acpi_gbl_copy_dsdt_locally;
 extern u8 acpi_gbl_truncate_io_addresses;
 extern u8 acpi_gbl_disable_auto_repair;
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index 51a527d24a8a..04f349d8da73 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -16,10 +16,10 @@
 
 #ifdef __KERNEL__
 
-extern int hest_disable;
+extern bool hest_disable;
 extern int erst_disable;
 #ifdef CONFIG_ACPI_APEI_GHES
-extern int ghes_disable;
+extern bool ghes_disable;
 #else
 #define ghes_disable 1
 #endif
diff --git a/include/linux/console.h b/include/linux/console.h
index 7453cfd593c8..7201ce4280ca 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -152,7 +152,7 @@ extern int braille_register_console(struct console *, int index,
 		char *console_options, char *braille_options);
 extern int braille_unregister_console(struct console *);
 extern void console_sysfs_notify(void);
-extern int console_suspend_enabled;
+extern bool console_suspend_enabled;
 
 /* Suspend and resume console messages over PM events */
 extern void suspend_console(void);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 90b0656a869e..88a114fce477 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -195,7 +195,7 @@ extern struct svc_procedure	nlmsvc_procedures4[];
 #endif
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
-extern int			nsm_use_hostnames;
+extern bool			nsm_use_hostnames;
 extern u32			nsm_local_state;
 
 /*
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a3ac9c48e5de..8ef7894a48d0 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -396,7 +396,7 @@ static inline void mmc_set_disable_delay(struct mmc_host *host,
 }
 
 /* Module parameter */
-extern int mmc_assume_removable;
+extern bool mmc_assume_removable;
 
 static inline int mmc_card_is_removable(struct mmc_host *host)
 {
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 38ccaea08204..df3649560818 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -21,11 +21,11 @@
 
 /* Control parameters settable through module/boot flags */
 extern enum audit_mode aa_g_audit;
-extern int aa_g_audit_header;
-extern int aa_g_debug;
-extern int aa_g_lock_policy;
-extern int aa_g_logsyscall;
-extern int aa_g_paranoid_load;
+extern bool aa_g_audit_header;
+extern bool aa_g_debug;
+extern bool aa_g_lock_policy;
+extern bool aa_g_logsyscall;
+extern bool aa_g_paranoid_load;
 extern unsigned int aa_g_path_max;
 
 /*
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index d7f06f8b2837..68d50c54e431 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -708,7 +708,7 @@ module_param_call(mode, param_set_mode, param_get_mode,
 		  &aa_g_profile_mode, S_IRUSR | S_IWUSR);
 
 /* Debug mode */
-int aa_g_debug;
+bool aa_g_debug;
 module_param_named(debug, aa_g_debug, aabool, S_IRUSR | S_IWUSR);
 
 /* Audit mode */
@@ -719,7 +719,7 @@ module_param_call(audit, param_set_audit, param_get_audit,
 /* Determines if audit header is included in audited messages.  This
  * provides more context if the audit daemon is not running
  */
-int aa_g_audit_header = 1;
+bool aa_g_audit_header = 1;
 module_param_named(audit_header, aa_g_audit_header, aabool,
 		   S_IRUSR | S_IWUSR);
 
@@ -727,12 +727,12 @@ module_param_named(audit_header, aa_g_audit_header, aabool,
  * TODO: add in at boot loading of policy, which is the only way to
  *       load policy, if lock_policy is set
  */
-int aa_g_lock_policy;
+bool aa_g_lock_policy;
 module_param_named(lock_policy, aa_g_lock_policy, aalockpolicy,
 		   S_IRUSR | S_IWUSR);
 
 /* Syscall logging mode */
-int aa_g_logsyscall;
+bool aa_g_logsyscall;
 module_param_named(logsyscall, aa_g_logsyscall, aabool, S_IRUSR | S_IWUSR);
 
 /* Maximum pathname length before accesses will start getting rejected */
@@ -742,12 +742,12 @@ module_param_named(path_max, aa_g_path_max, aauint, S_IRUSR | S_IWUSR);
 /* Determines how paranoid loading of policy is and how much verification
  * on the loaded policy is done.
  */
-int aa_g_paranoid_load = 1;
+bool aa_g_paranoid_load = 1;
 module_param_named(paranoid_load, aa_g_paranoid_load, aabool,
 		   S_IRUSR | S_IWUSR);
 
 /* Boot time disable flag */
-static unsigned int apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE;
+static bool apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE;
 module_param_named(enabled, apparmor_enabled, aabool, S_IRUSR);
 
 static int __init apparmor_enabled_setup(char *str)
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 0fb448e6a1a3..a457d2138f49 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -32,7 +32,7 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
-static int allow_unsafe_assigned_interrupts;
+static bool allow_unsafe_assigned_interrupts;
 module_param_named(allow_unsafe_assigned_interrupts,
 		   allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
-- 
cgit v1.2.3


From 72db395ffadb1d33233fd123c2bf87ba0198c6c1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:28 +1030
Subject: module_param: check that bool parameters really are bool.

module_param(bool) used to counter-intuitively take an int.  In
fddd5201 (mid-2009) we allowed bool or int/unsigned int using a messy
trick.

This tightens the check (you'll get a warning about incompatible
return type) but still allows it.  Next kernel version, we'll remove
it.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 6bdde0c3bcca..c47f4d60db0b 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -350,17 +350,11 @@ extern int param_set_charp(const char *val, const struct kernel_param *kp);
 extern int param_get_charp(char *buffer, const struct kernel_param *kp);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
-/* For historical reasons "bool" parameters can be (unsigned) "int". */
+/* We used to allow int as well as bool.  We're taking that away! */
 extern struct kernel_param_ops param_ops_bool;
 extern int param_set_bool(const char *val, const struct kernel_param *kp);
 extern int param_get_bool(char *buffer, const struct kernel_param *kp);
-#define param_check_bool(name, p)					\
-	static inline void __check_##name(void)				\
-	{								\
-		BUILD_BUG_ON(!__same_type((p), bool *) &&		\
-			     !__same_type((p), unsigned int *) &&	\
-			     !__same_type((p), int *));			\
-	}
+#define param_check_bool(name, p) __param_check(name, p, bool)
 
 extern struct kernel_param_ops param_ops_invbool;
 extern int param_set_invbool(const char *val, const struct kernel_param *kp);
-- 
cgit v1.2.3


From eb8a54a78e974e1af3e17fa38bb74d3747c5c1bd Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Thu, 12 Jan 2012 15:23:04 -0800
Subject: phylib: introduce mdiobus_alloc_size()

Introduce function mdiobus_alloc_size() as an alternative to mdiobus_alloc().
Most callers of mdiobus_alloc() also allocate a private data structure, and
then manually point bus->priv to this object.  mdiobus_alloc_size()
combines the two operations into one, which simplifies memory management.

The original mdiobus_alloc() now just calls mdiobus_alloc_size(0).

Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 24 +++++++++++++++++++-----
 include/linux/phy.h        |  7 ++++++-
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 6c58da2b882c..88cc5db9affd 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -37,22 +37,36 @@
 #include <asm/uaccess.h>
 
 /**
- * mdiobus_alloc - allocate a mii_bus structure
+ * mdiobus_alloc_size - allocate a mii_bus structure
  *
  * Description: called by a bus driver to allocate an mii_bus
  * structure to fill in.
+ *
+ * 'size' is an an extra amount of memory to allocate for private storage.
+ * If non-zero, then bus->priv is points to that memory.
  */
-struct mii_bus *mdiobus_alloc(void)
+struct mii_bus *mdiobus_alloc_size(size_t size)
 {
 	struct mii_bus *bus;
+	size_t aligned_size = ALIGN(sizeof(*bus), NETDEV_ALIGN);
+	size_t alloc_size;
+
+	/* If we alloc extra space, it should be aligned */
+	if (size)
+		alloc_size = aligned_size + size;
+	else
+		alloc_size = sizeof(*bus);
 
-	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
-	if (bus != NULL)
+	bus = kzalloc(alloc_size, GFP_KERNEL);
+	if (bus) {
 		bus->state = MDIOBUS_ALLOCATED;
+		if (size)
+			bus->priv = (void *)bus + aligned_size;
+	}
 
 	return bus;
 }
-EXPORT_SYMBOL(mdiobus_alloc);
+EXPORT_SYMBOL(mdiobus_alloc_size);
 
 /**
  * mdiobus_release - mii_bus device release callback
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 79f337c47388..c599f7eca1e7 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -129,7 +129,12 @@ struct mii_bus {
 };
 #define to_mii_bus(d) container_of(d, struct mii_bus, dev)
 
-struct mii_bus *mdiobus_alloc(void);
+struct mii_bus *mdiobus_alloc_size(size_t);
+static inline struct mii_bus *mdiobus_alloc(void)
+{
+	return mdiobus_alloc_size(0);
+}
+
 int mdiobus_register(struct mii_bus *bus);
 void mdiobus_unregister(struct mii_bus *bus);
 void mdiobus_free(struct mii_bus *bus);
-- 
cgit v1.2.3


From ddecf0f4db44ef94847a62d6ecf74456b4dcc66f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 6 Jan 2012 06:31:44 +0000
Subject: net_sched: sfq: add optional RED on top of SFQ

Adds an optional Random Early Detection on each SFQ flow queue.

Traditional SFQ limits count of packets, while RED permits to also
control number of bytes per flow, and adds ECN capability as well.

1) We dont handle the idle time management in this RED implementation,
since each 'new flow' begins with a null qavg. We really want to address
backlogged flows.

2) if headdrop is selected, we try to ecn mark first packet instead of
currently enqueued packet. This gives faster feedback for tcp flows
compared to traditional RED [ marking the last packet in queue ]

Example of use :

tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 4sec sfq \
	limit 3000 headdrop flows 512 divisor 16384 \
	redflowlimit 100000 min 8000 max 60000 probability 0.20 ecn

qdisc sfq 10: parent 1:1 limit 3000p quantum 1514b depth 127 headdrop
flows 512/16384 divisor 16384
 ewma 6 min 8000b max 60000b probability 0.2 ecn
 prob_mark 0 prob_mark_head 4876 prob_drop 6131
 forced_mark 0 forced_mark_head 0 forced_drop 0
 Sent 1175211782 bytes 777537 pkt (dropped 6131, overlimits 11007
requeues 0)
 rate 99483Kbit 8219pps backlog 689392b 456p requeues 0

In this test, with 64 netperf TCP_STREAM sessions, 50% using ECN enabled
flows, we can see number of packets CE marked is smaller than number of
drops (for non ECN flows)

If same test is run, without RED, we can check backlog is much bigger.

qdisc sfq 10: parent 1:1 limit 3000p quantum 1514b depth 127 headdrop
flows 512/16384 divisor 16384
 Sent 1148683617 bytes 795006 pkt (dropped 0, overlimits 0 requeues 0)
 rate 98429Kbit 8521pps backlog 1221290b 841p requeues 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Stephen Hemminger <shemminger@vyatta.com>
CC: Dave Taht <dave.taht@gmail.com>
Tested-by: Dave Taht <dave.taht@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  20 +++++++
 include/net/red.h         |   3 +-
 net/sched/sch_sfq.c       | 146 +++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 152 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 8f1b928f777c..0d5b79365d03 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -162,10 +162,30 @@ struct tc_sfq_qopt {
 	unsigned	flows;		/* Maximal number of flows  */
 };
 
+struct tc_sfqred_stats {
+	__u32           prob_drop;      /* Early drops, below max threshold */
+	__u32           forced_drop;	/* Early drops, after max threshold */
+	__u32           prob_mark;      /* Marked packets, below max threshold */
+	__u32           forced_mark;    /* Marked packets, after max threshold */
+	__u32           prob_mark_head; /* Marked packets, below max threshold */
+	__u32           forced_mark_head;/* Marked packets, after max threshold */
+};
+
 struct tc_sfq_qopt_v1 {
 	struct tc_sfq_qopt v0;
 	unsigned int	depth;		/* max number of packets per flow */
 	unsigned int	headdrop;
+/* SFQRED parameters */
+	__u32		limit;		/* HARD maximal flow queue length (bytes) */
+	__u32		qth_min;	/* Min average length threshold (bytes) */
+	__u32		qth_max;	/* Max average length threshold (bytes) */
+	unsigned char   Wlog;		/* log(W)		*/
+	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
+	unsigned char   Scell_log;	/* cell size for idle damping */
+	unsigned char	flags;
+	__u32		max_P;		/* probability, high resolution */
+/* SFQRED stats */
+	struct tc_sfqred_stats stats;
 };
 
 
diff --git a/include/net/red.h b/include/net/red.h
index baab385a4736..28068ec614b2 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -199,7 +199,8 @@ static inline void red_set_parms(struct red_parms *p,
 	p->Scell_log	= Scell_log;
 	p->Scell_max	= (255 << Scell_log);
 
-	memcpy(p->Stab, stab, sizeof(p->Stab));
+	if (stab)
+		memcpy(p->Stab, stab, sizeof(p->Stab));
 }
 
 static inline int red_is_idling(const struct red_vars *v)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 0a7964009e8c..67494aef9acf 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -24,6 +24,7 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/flow_keys.h>
+#include <net/red.h>
 
 
 /*	Stochastic Fairness Queuing algorithm.
@@ -108,24 +109,30 @@ struct sfq_slot {
 	struct sfq_head dep; /* anchor in dep[] chains */
 	unsigned short	hash; /* hash value (index in ht[]) */
 	short		allot; /* credit for this slot */
+
+	unsigned int    backlog;
+	struct red_vars vars;
 };
 
 struct sfq_sched_data {
 /* frequently used fields */
 	int		limit;		/* limit of total number of packets in this qdisc */
 	unsigned int	divisor;	/* number of slots in hash table */
-	unsigned int	maxflows;	/* number of flows in flows array */
-	int		headdrop;
-	int		maxdepth;	/* limit of packets per flow */
+	u8		headdrop;
+	u8		maxdepth;	/* limit of packets per flow */
 
 	u32		perturbation;
-	struct tcf_proto *filter_list;
-	sfq_index	cur_depth;	/* depth of longest slot */
+	u8		cur_depth;	/* depth of longest slot */
+	u8		flags;
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
-	struct sfq_slot *tail;		/* current slot in round */
+	struct tcf_proto *filter_list;
 	sfq_index	*ht;		/* Hash table ('divisor' slots) */
 	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
 
+	struct red_parms *red_parms;
+	struct tc_sfqred_stats stats;
+	struct sfq_slot *tail;		/* current slot in round */
+
 	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
 					/* Linked lists of slots, indexed by depth
 					 * dep[0] : list of unused flows
@@ -133,6 +140,7 @@ struct sfq_sched_data {
 					 * dep[X] : list of flows with X packets
 					 */
 
+	unsigned int	maxflows;	/* number of flows in flows array */
 	int		perturb_period;
 	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
 	struct timer_list perturb_timer;
@@ -321,6 +329,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 drop:
 		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
 		len = qdisc_pkt_len(skb);
+		slot->backlog -= len;
 		sfq_dec(q, x);
 		kfree_skb(skb);
 		sch->q.qlen--;
@@ -341,6 +350,23 @@ drop:
 	return 0;
 }
 
+/* Is ECN parameter configured */
+static int sfq_prob_mark(const struct sfq_sched_data *q)
+{
+	return q->flags & TC_RED_ECN;
+}
+
+/* Should packets over max threshold just be marked */
+static int sfq_hard_mark(const struct sfq_sched_data *q)
+{
+	return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
+}
+
+static int sfq_headdrop(const struct sfq_sched_data *q)
+{
+	return q->headdrop;
+}
+
 static int
 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
@@ -349,6 +375,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	sfq_index x, qlen;
 	struct sfq_slot *slot;
 	int uninitialized_var(ret);
+	struct sk_buff *head;
+	int delta;
 
 	hash = sfq_classify(skb, sch, &ret);
 	if (hash == 0) {
@@ -368,24 +396,75 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->ht[hash] = x;
 		slot = &q->slots[x];
 		slot->hash = hash;
+		slot->backlog = 0; /* should already be 0 anyway... */
+		red_set_vars(&slot->vars);
+		goto enqueue;
 	}
+	if (q->red_parms) {
+		slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
+							&slot->vars,
+							slot->backlog);
+		switch (red_action(q->red_parms,
+				   &slot->vars,
+				   slot->vars.qavg)) {
+		case RED_DONT_MARK:
+			break;
 
-	if (slot->qlen >= q->maxdepth) {
-		struct sk_buff *head;
+		case RED_PROB_MARK:
+			sch->qstats.overlimits++;
+			if (sfq_prob_mark(q)) {
+				/* We know we have at least one packet in queue */
+				if (sfq_headdrop(q) &&
+				    INET_ECN_set_ce(slot->skblist_next)) {
+					q->stats.prob_mark_head++;
+					break;
+				}
+				if (INET_ECN_set_ce(skb)) {
+					q->stats.prob_mark++;
+					break;
+				}
+			}
+			q->stats.prob_drop++;
+			goto congestion_drop;
+
+		case RED_HARD_MARK:
+			sch->qstats.overlimits++;
+			if (sfq_hard_mark(q)) {
+				/* We know we have at least one packet in queue */
+				if (sfq_headdrop(q) &&
+				    INET_ECN_set_ce(slot->skblist_next)) {
+					q->stats.forced_mark_head++;
+					break;
+				}
+				if (INET_ECN_set_ce(skb)) {
+					q->stats.forced_mark++;
+					break;
+				}
+			}
+			q->stats.forced_drop++;
+			goto congestion_drop;
+		}
+	}
 
-		if (!q->headdrop)
+	if (slot->qlen >= q->maxdepth) {
+congestion_drop:
+		if (!sfq_headdrop(q))
 			return qdisc_drop(skb, sch);
 
+		/* We know we have at least one packet in queue */
 		head = slot_dequeue_head(slot);
-		sch->qstats.backlog -= qdisc_pkt_len(head);
+		delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
+		sch->qstats.backlog -= delta;
+		slot->backlog -= delta;
 		qdisc_drop(head, sch);
 
-		sch->qstats.backlog += qdisc_pkt_len(skb);
 		slot_queue_add(slot, skb);
 		return NET_XMIT_CN;
 	}
 
+enqueue:
 	sch->qstats.backlog += qdisc_pkt_len(skb);
+	slot->backlog += qdisc_pkt_len(skb);
 	slot_queue_add(slot, skb);
 	sfq_inc(q, x);
 	if (slot->qlen == 1) {		/* The flow is new */
@@ -396,6 +475,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			slot->next = q->tail->next;
 			q->tail->next = x;
 		}
+		/* We could use a bigger initial quantum for new flows */
 		slot->allot = q->scaled_quantum;
 	}
 	if (++sch->q.qlen <= q->limit)
@@ -439,7 +519,7 @@ next_slot:
 	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
-
+	slot->backlog -= qdisc_pkt_len(skb);
 	/* Is the slot empty? */
 	if (slot->qlen == 0) {
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
@@ -490,6 +570,8 @@ static void sfq_rehash(struct Qdisc *sch)
 			sfq_dec(q, i);
 			__skb_queue_tail(&list, skb);
 		}
+		slot->backlog = 0;
+		red_set_vars(&slot->vars);
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 	}
 	q->tail = NULL;
@@ -514,6 +596,11 @@ drop:				sch->qstats.backlog -= qdisc_pkt_len(skb);
 		if (slot->qlen >= q->maxdepth)
 			goto drop;
 		slot_queue_add(slot, skb);
+		if (q->red_parms)
+			slot->vars.qavg = red_calc_qavg(q->red_parms,
+							&slot->vars,
+							slot->backlog);
+		slot->backlog += qdisc_pkt_len(skb);
 		sfq_inc(q, x);
 		if (slot->qlen == 1) {		/* The flow is new */
 			if (q->tail == NULL) {	/* It is the first flow */
@@ -552,6 +639,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_sfq_qopt *ctl = nla_data(opt);
 	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
 	unsigned int qlen;
+	struct red_parms *p = NULL;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
@@ -560,7 +648,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	if (ctl->divisor &&
 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
 		return -EINVAL;
-
+	if (ctl_v1 && ctl_v1->qth_min) {
+		p = kmalloc(sizeof(*p), GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+	}
 	sch_tree_lock(sch);
 	if (ctl->quantum) {
 		q->quantum = ctl->quantum;
@@ -576,6 +668,16 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	if (ctl_v1) {
 		if (ctl_v1->depth)
 			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+		if (p) {
+			swap(q->red_parms, p);
+			red_set_parms(q->red_parms,
+				      ctl_v1->qth_min, ctl_v1->qth_max,
+				      ctl_v1->Wlog,
+				      ctl_v1->Plog, ctl_v1->Scell_log,
+				      NULL,
+				      ctl_v1->max_P);
+		}
+		q->flags = ctl_v1->flags;
 		q->headdrop = ctl_v1->headdrop;
 	}
 	if (ctl->limit) {
@@ -594,6 +696,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 		q->perturbation = net_random();
 	}
 	sch_tree_unlock(sch);
+	kfree(p);
 	return 0;
 }
 
@@ -625,6 +728,7 @@ static void sfq_destroy(struct Qdisc *sch)
 	del_timer_sync(&q->perturb_timer);
 	sfq_free(q->ht);
 	sfq_free(q->slots);
+	kfree(q->red_parms);
 }
 
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
@@ -683,6 +787,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_sfq_qopt_v1 opt;
+	struct red_parms *p = q->red_parms;
 
 	memset(&opt, 0, sizeof(opt));
 	opt.v0.quantum	= q->quantum;
@@ -693,6 +798,17 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	opt.depth	= q->maxdepth;
 	opt.headdrop	= q->headdrop;
 
+	if (p) {
+		opt.qth_min	= p->qth_min >> p->Wlog;
+		opt.qth_max	= p->qth_max >> p->Wlog;
+		opt.Wlog	= p->Wlog;
+		opt.Plog	= p->Plog;
+		opt.Scell_log	= p->Scell_log;
+		opt.max_P	= p->max_P;
+	}
+	memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
+	opt.flags	= q->flags;
+
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
 	return skb->len;
@@ -747,15 +863,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	sfq_index idx = q->ht[cl - 1];
 	struct gnet_stats_queue qs = { 0 };
 	struct tc_sfq_xstats xstats = { 0 };
-	struct sk_buff *skb;
 
 	if (idx != SFQ_EMPTY_SLOT) {
 		const struct sfq_slot *slot = &q->slots[idx];
 
 		xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
 		qs.qlen = slot->qlen;
-		slot_queue_walk(slot, skb)
-			qs.backlog += qdisc_pkt_len(skb);
+		qs.backlog = slot->backlog;
 	}
 	if (gnet_stats_copy_queue(d, &qs) < 0)
 		return -1;
-- 
cgit v1.2.3


From 4da47859956cebdc4c58c38a931e21847458d744 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:13 -0800
Subject: kernel.h: neaten panic prototype

Use __printf macro.
Convert NORET_AND to ATTRIB_NORET.
Use the normal kernel style for pointer arguments.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d0a7a0c71661..60934395e36c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -185,8 +185,9 @@ static inline void might_fault(void)
 
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
-NORET_TYPE void panic(const char * fmt, ...)
-	__attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
+NORET_TYPE __printf(1, 2)
+void panic(const char *fmt, ...)
+	ATTRIB_NORET __cold;
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
-- 
cgit v1.2.3


From 80bf007f20b16272f210e0803f739f5606cff59d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:14 -0800
Subject: include/linux/linkage.h: remove unused NORET_AND macro

The only use in kernel.h is gone so remove the macro.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/linkage.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 3f46aedea42f..c75074cb8ad4 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -90,6 +90,5 @@
 
 #define NORET_TYPE    /**/
 #define ATTRIB_NORET  __attribute__((noreturn))
-#define NORET_AND     noreturn,
 
 #endif
-- 
cgit v1.2.3


From 9402c95f34a66e81eba473a2f7267bbae5a1dee2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:17 -0800
Subject: treewide: remove useless NORET_TYPE macro and uses

It's a very old and now unused prototype marking so just delete it.

Neaten panic pointer argument style to keep checkpatch quiet.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/avr32/include/asm/system.h        | 2 +-
 arch/avr32/kernel/traps.c              | 2 +-
 arch/ia64/kernel/machine_kexec.c       | 2 +-
 arch/m68k/amiga/config.c               | 2 +-
 arch/mips/include/asm/ptrace.h         | 2 +-
 arch/mips/kernel/traps.c               | 2 +-
 arch/powerpc/kernel/machine_kexec_32.c | 2 +-
 arch/powerpc/kernel/machine_kexec_64.c | 6 +++---
 arch/s390/kernel/nmi.c                 | 2 +-
 arch/tile/kernel/machine_kexec.c       | 4 ++--
 include/linux/kernel.h                 | 6 +++---
 include/linux/linkage.h                | 1 -
 include/linux/sched.h                  | 2 +-
 kernel/exit.c                          | 6 +++---
 kernel/panic.c                         | 2 +-
 15 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/avr32/include/asm/system.h b/arch/avr32/include/asm/system.h
index 9702c2213e1e..62d9ded01635 100644
--- a/arch/avr32/include/asm/system.h
+++ b/arch/avr32/include/asm/system.h
@@ -169,7 +169,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
 
 struct pt_regs;
-void NORET_TYPE die(const char *str, struct pt_regs *regs, long err);
+void die(const char *str, struct pt_regs *regs, long err);
 void _exception(long signr, struct pt_regs *regs, int code,
 		unsigned long addr);
 
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 7aa25756412f..3d760c06f024 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -24,7 +24,7 @@
 
 static DEFINE_SPINLOCK(die_lock);
 
-void NORET_TYPE die(const char *str, struct pt_regs *regs, long err)
+void die(const char *str, struct pt_regs *regs, long err)
 {
 	static int die_counter;
 
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 3d3aeef46947..581a16d5e85b 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -27,7 +27,7 @@
 #include <asm/sal.h>
 #include <asm/mca.h>
 
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
 					unsigned long indirection_page,
 					unsigned long start_address,
 					struct ia64_boot_param *boot_param,
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 82a4bb51d5d8..a3b0558328b6 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -511,7 +511,7 @@ static unsigned long amiga_gettimeoffset(void)
 	return ticks + offset;
 }
 
-static NORET_TYPE void amiga_reset(void)
+static void amiga_reset(void)
     ATTRIB_NORET;
 
 static void amiga_reset(void)
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index de39b1f343ea..3d913259e507 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -144,7 +144,7 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
 extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
 extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
-extern NORET_TYPE void die(const char *, struct pt_regs *) ATTRIB_NORET;
+extern void die(const char *, struct pt_regs *) ATTRIB_NORET;
 
 static inline void die_if_kernel(const char *str, struct pt_regs *regs)
 {
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 5c8a49d55054..725e9a5ca966 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1340,7 +1340,7 @@ void ejtag_exception_handler(struct pt_regs *regs)
 /*
  * NMI exception handler.
  */
-NORET_TYPE void ATTRIB_NORET nmi_exception_handler(struct pt_regs *regs)
+void ATTRIB_NORET nmi_exception_handler(struct pt_regs *regs)
 {
 	bust_spinlocks(1);
 	printk("NMI taken!!!!\n");
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index e63f2e7d2efb..026e7f153949 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -16,7 +16,7 @@
 #include <asm/hw_irq.h>
 #include <asm/io.h>
 
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
 				unsigned long indirection_page,
 				unsigned long reboot_code_buffer,
 				unsigned long start_address) ATTRIB_NORET;
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 26ccbf77dd41..5fbbf814923a 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -307,9 +307,9 @@ static union thread_union kexec_stack __init_task_data =
 struct paca_struct kexec_paca;
 
 /* Our assembly helper, in kexec_stub.S */
-extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
-					void *image, void *control,
-					void (*clear_all)(void)) ATTRIB_NORET;
+extern void kexec_sequence(void *newstack, unsigned long start,
+			   void *image, void *control,
+			   void (*clear_all)(void)) ATTRIB_NORET;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index fab88431a06f..0fd2e863e114 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -30,7 +30,7 @@ struct mcck_struct {
 
 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
 
-static NORET_TYPE void s390_handle_damage(char *msg)
+static void s390_handle_damage(char *msg)
 {
 	smp_send_stop();
 	disabled_wait((unsigned long) __builtin_return_address(0));
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index e00d7179989e..b0c907059067 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c
@@ -248,10 +248,10 @@ static void setup_quasi_va_is_pa(void)
 }
 
 
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
 {
 	void *reboot_code_buffer;
-	NORET_TYPE void (*rnk)(unsigned long, void *, unsigned long)
+	void (*rnk)(unsigned long, void *, unsigned long)
 		ATTRIB_NORET;
 
 	/* Mask all interrupts before starting to reboot. */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 60934395e36c..aaf1753dd2b3 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -185,16 +185,16 @@ static inline void might_fault(void)
 
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
-NORET_TYPE __printf(1, 2)
+__printf(1, 2)
 void panic(const char *fmt, ...)
 	ATTRIB_NORET __cold;
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
-NORET_TYPE void do_exit(long error_code)
+void do_exit(long error_code)
 	ATTRIB_NORET;
-NORET_TYPE void complete_and_exit(struct completion *, long)
+void complete_and_exit(struct completion *, long)
 	ATTRIB_NORET;
 
 /* Internal, do not use. */
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index c75074cb8ad4..6a8f252e49ee 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -88,7 +88,6 @@
 
 #endif
 
-#define NORET_TYPE    /**/
 #define ATTRIB_NORET  __attribute__((noreturn))
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 21cd0303af51..4032ec1cf836 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2275,7 +2275,7 @@ extern void __cleanup_sighand(struct sighand_struct *);
 extern void exit_itimers(struct signal_struct *);
 extern void flush_itimer_signals(void);
 
-extern NORET_TYPE void do_group_exit(int);
+extern void do_group_exit(int);
 
 extern void daemonize(const char *, ...);
 extern int allow_signal(int);
diff --git a/kernel/exit.c b/kernel/exit.c
index 94ed6e20bb53..c44738267be7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -887,7 +887,7 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
-NORET_TYPE void do_exit(long code)
+void do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
@@ -1051,7 +1051,7 @@ NORET_TYPE void do_exit(long code)
 
 EXPORT_SYMBOL_GPL(do_exit);
 
-NORET_TYPE void complete_and_exit(struct completion *comp, long code)
+void complete_and_exit(struct completion *comp, long code)
 {
 	if (comp)
 		complete(comp);
@@ -1070,7 +1070,7 @@ SYSCALL_DEFINE1(exit, int, error_code)
  * Take down every thread in the group.  This is called by fatal signals
  * as well as by sys_exit_group (below).
  */
-NORET_TYPE void
+void
 do_group_exit(int exit_code)
 {
 	struct signal_struct *sig = current->signal;
diff --git a/kernel/panic.c b/kernel/panic.c
index 3458469eb7c3..6fd09ed6fd90 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -57,7 +57,7 @@ EXPORT_SYMBOL(panic_blink);
  *
  *	This function never returns.
  */
-NORET_TYPE void panic(const char * fmt, ...)
+void panic(const char *fmt, ...)
 {
 	static char buf[1024];
 	va_list args;
-- 
cgit v1.2.3


From ff2d8b19a3a62559afba1c53360c8577a7697714 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:21 -0800
Subject: treewide: convert uses of ATTRIB_NORETURN to __noreturn

Use the more commonly used __noreturn instead of ATTRIB_NORETURN.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/machine_kexec.c       | 2 +-
 arch/m68k/amiga/config.c               | 3 +--
 arch/mips/include/asm/ptrace.h         | 2 +-
 arch/mips/kernel/traps.c               | 2 +-
 arch/mn10300/include/asm/exceptions.h  | 2 +-
 arch/powerpc/kernel/machine_kexec_32.c | 2 +-
 arch/powerpc/kernel/machine_kexec_64.c | 2 +-
 arch/s390/include/asm/processor.h      | 2 +-
 arch/sh/kernel/process_32.c            | 2 +-
 arch/sh/kernel/process_64.c            | 2 +-
 arch/tile/kernel/machine_kexec.c       | 2 +-
 include/linux/kernel.h                 | 6 +++---
 12 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 581a16d5e85b..4eed35814994 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -31,7 +31,7 @@ typedef void (*relocate_new_kernel_t)(
 					unsigned long indirection_page,
 					unsigned long start_address,
 					struct ia64_boot_param *boot_param,
-					unsigned long pal_addr) ATTRIB_NORET;
+					unsigned long pal_addr) __noreturn;
 
 struct kimage *ia64_kimage;
 
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index a3b0558328b6..b95a451b1c3a 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -511,8 +511,7 @@ static unsigned long amiga_gettimeoffset(void)
 	return ticks + offset;
 }
 
-static void amiga_reset(void)
-    ATTRIB_NORET;
+static void amiga_reset(void)  __noreturn;
 
 static void amiga_reset(void)
 {
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index 3d913259e507..7b99c670e478 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -144,7 +144,7 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
 extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
 extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
-extern void die(const char *, struct pt_regs *) ATTRIB_NORET;
+extern void die(const char *, struct pt_regs *) __noreturn;
 
 static inline void die_if_kernel(const char *str, struct pt_regs *regs)
 {
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 725e9a5ca966..bbddb86c1fa1 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1340,7 +1340,7 @@ void ejtag_exception_handler(struct pt_regs *regs)
 /*
  * NMI exception handler.
  */
-void ATTRIB_NORET nmi_exception_handler(struct pt_regs *regs)
+void __noreturn nmi_exception_handler(struct pt_regs *regs)
 {
 	bust_spinlocks(1);
 	printk("NMI taken!!!!\n");
diff --git a/arch/mn10300/include/asm/exceptions.h b/arch/mn10300/include/asm/exceptions.h
index ca3e20508c77..95a4d42c3a06 100644
--- a/arch/mn10300/include/asm/exceptions.h
+++ b/arch/mn10300/include/asm/exceptions.h
@@ -110,7 +110,7 @@ extern asmlinkage void nmi_handler(void);
 extern asmlinkage void misalignment(struct pt_regs *, enum exception_code);
 
 extern void die(const char *, struct pt_regs *, enum exception_code)
-	ATTRIB_NORET;
+	__noreturn;
 
 extern int die_if_no_fixup(const char *, struct pt_regs *, enum exception_code);
 
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index 026e7f153949..affe5dcce7f4 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -19,7 +19,7 @@
 typedef void (*relocate_new_kernel_t)(
 				unsigned long indirection_page,
 				unsigned long reboot_code_buffer,
-				unsigned long start_address) ATTRIB_NORET;
+				unsigned long start_address) __noreturn;
 
 /*
  * This is a generic machine_kexec function suitable at least for
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 5fbbf814923a..d7f609086a99 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -309,7 +309,7 @@ struct paca_struct kexec_paca;
 /* Our assembly helper, in kexec_stub.S */
 extern void kexec_sequence(void *newstack, unsigned long start,
 			   void *image, void *control,
-			   void (*clear_all)(void)) ATTRIB_NORET;
+			   void (*clear_all)(void)) __noreturn;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 27272f6a14c2..d25843a6a915 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -236,7 +236,7 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
 /*
  * Function to drop a processor into disabled wait state
  */
-static inline void ATTRIB_NORET disabled_wait(unsigned long code)
+static inline void __noreturn disabled_wait(unsigned long code)
 {
         unsigned long ctl_buf;
         psw_t dw_psw;
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index aaf6d59c2012..7ec665178125 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -70,7 +70,7 @@ void show_regs(struct pt_regs * regs)
 /*
  * Create a kernel thread
  */
-ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
 {
 	do_exit(fn(arg));
 }
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 210c1cabcb7f..cbd4e4bb9fc5 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -285,7 +285,7 @@ void show_regs(struct pt_regs *regs)
 /*
  * Create a kernel thread
  */
-ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
 {
 	do_exit(fn(arg));
 }
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index b0c907059067..6255f2eab112 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c
@@ -252,7 +252,7 @@ void machine_kexec(struct kimage *image)
 {
 	void *reboot_code_buffer;
 	void (*rnk)(unsigned long, void *, unsigned long)
-		ATTRIB_NORET;
+		__noreturn;
 
 	/* Mask all interrupts before starting to reboot. */
 	interrupt_mask_set_mask(~0ULL);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index aaf1753dd2b3..e8343422240a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -187,15 +187,15 @@ extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
 __printf(1, 2)
 void panic(const char *fmt, ...)
-	ATTRIB_NORET __cold;
+	__noreturn __cold;
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
 void do_exit(long error_code)
-	ATTRIB_NORET;
+	__noreturn;
 void complete_and_exit(struct completion *, long)
-	ATTRIB_NORET;
+	__noreturn;
 
 /* Internal, do not use. */
 int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
-- 
cgit v1.2.3


From 0d259cf8190b9c446eefd5225ffcc3941e76a432 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:25 -0800
Subject: include/linux/linkage.h: remove unused ATTRIB_NORET macro

The uses have been renamed so delete the unused macro.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/linkage.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 6a8f252e49ee..807f1e533226 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -88,6 +88,4 @@
 
 #endif
 
-#define ATTRIB_NORET  __attribute__((noreturn))
-
 #endif
-- 
cgit v1.2.3


From 43570fd2f47ba518145e9289f54cde3dba4c8b25 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 12 Jan 2012 17:17:27 -0800
Subject: mm,slub,x86: decouple size of struct page from CONFIG_CMPXCHG_LOCAL

While implementing cmpxchg_double() on s390 I realized that we don't set
CONFIG_CMPXCHG_LOCAL despite the fact that we have support for it.

However setting that option will increase the size of struct page by
eight bytes on 64 bit, which we certainly do not want.  Also, it doesn't
make sense that a present cpu feature should increase the size of struct
page.

Besides that it looks like the dependency to CMPXCHG_LOCAL is wrong and
that it should depend on CMPXCHG_DOUBLE instead.

This patch:

If an architecture supports CMPXCHG_LOCAL this shouldn't result
automatically in larger struct pages if the SLUB allocator is used.
Instead introduce a new config option "HAVE_ALIGNED_STRUCT_PAGE" which
can be selected if a double word aligned struct page is required.  Also
update x86 Kconfig so that it should work as before.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig             | 8 ++++++++
 arch/x86/Kconfig         | 1 +
 include/linux/mm_types.h | 9 ++++-----
 mm/slub.c                | 6 +++---
 4 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 2505740b81d2..a2c5c077c32d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -185,4 +185,12 @@ config HAVE_RCU_TABLE_FREE
 config ARCH_HAVE_NMI_SAFE_CMPXCHG
 	bool
 
+config HAVE_ALIGNED_STRUCT_PAGE
+	bool
+	help
+	  This makes sure that struct pages are double word aligned and that
+	  e.g. the SLUB allocator can perform double word atomic operations
+	  on a struct page for better performance. However selecting this
+	  might increase the size of a struct page by a word.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a150f4c35e94..5201a2c27239 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,6 +60,7 @@ config X86
 	select PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
 	select ANON_INODES
+	select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_USER_RETURN_NOTIFIER
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5b42f1b34eb7..3cc3062b3767 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -151,12 +151,11 @@ struct page {
 #endif
 }
 /*
- * If another subsystem starts using the double word pairing for atomic
- * operations on struct page then it must change the #if to ensure
- * proper alignment of the page struct.
+ * The struct page can be forced to be double word aligned so that atomic ops
+ * on double words work. The SLUB allocator can make use of such a feature.
  */
-#if defined(CONFIG_SLUB) && defined(CONFIG_CMPXCHG_LOCAL)
-	__attribute__((__aligned__(2*sizeof(unsigned long))))
+#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
+	__aligned(2 * sizeof(unsigned long))
 #endif
 ;
 
diff --git a/mm/slub.c b/mm/slub.c
index 5d37b5e44140..72aa84134609 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -366,7 +366,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
 		const char *n)
 {
 	VM_BUG_ON(!irqs_disabled());
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_CMPXCHG_DOUBLE) && defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (s->flags & __CMPXCHG_DOUBLE) {
 		if (cmpxchg_double(&page->freelist, &page->counters,
 			freelist_old, counters_old,
@@ -400,7 +400,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 		void *freelist_new, unsigned long counters_new,
 		const char *n)
 {
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_CMPXCHG_DOUBLE) && defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (s->flags & __CMPXCHG_DOUBLE) {
 		if (cmpxchg_double(&page->freelist, &page->counters,
 			freelist_old, counters_old,
@@ -3014,7 +3014,7 @@ static int kmem_cache_open(struct kmem_cache *s,
 		}
 	}
 
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_CMPXCHG_DOUBLE) && defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
 		/* Enable fast mode */
 		s->flags |= __CMPXCHG_DOUBLE;
-- 
cgit v1.2.3


From 28d82dc1c4edbc352129f97f4ca22624d1fe61de Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 12 Jan 2012 17:17:43 -0800
Subject: epoll: limit paths

The current epoll code can be tickled to run basically indefinitely in
both loop detection path check (on ep_insert()), and in the wakeup paths.
The programs that tickle this behavior set up deeply linked networks of
epoll file descriptors that cause the epoll algorithms to traverse them
indefinitely.  A couple of these sample programs have been previously
posted in this thread: https://lkml.org/lkml/2011/2/25/297.

To fix the loop detection path check algorithms, I simply keep track of
the epoll nodes that have been already visited.  Thus, the loop detection
becomes proportional to the number of epoll file descriptor and links.
This dramatically decreases the run-time of the loop check algorithm.  In
one diabolical case I tried it reduced the run-time from 15 mintues (all
in kernel time) to .3 seconds.

Fixing the wakeup paths could be done at wakeup time in a similar manner
by keeping track of nodes that have already been visited, but the
complexity is harder, since there can be multiple wakeups on different
cpus...Thus, I've opted to limit the number of possible wakeup paths when
the paths are created.

This is accomplished, by noting that the end file descriptor points that
are found during the loop detection pass (from the newly added link), are
actually the sources for wakeup events.  I keep a list of these file
descriptors and limit the number and length of these paths that emanate
from these 'source file descriptors'.  In the current implemetation I
allow 1000 paths of length 1, 500 of length 2, 100 of length 3, 50 of
length 4 and 10 of length 5.  Note that it is sufficient to check the
'source file descriptors' reachable from the newly added link, since no
other 'source file descriptors' will have newly added links.  This allows
us to check only the wakeup paths that may have gotten too long, and not
re-check all possible wakeup paths on the system.

In terms of the path limit selection, I think its first worth noting that
the most common case for epoll, is probably the model where you have 1
epoll file descriptor that is monitoring n number of 'source file
descriptors'.  In this case, each 'source file descriptor' has a 1 path of
length 1.  Thus, I believe that the limits I'm proposing are quite
reasonable and in fact may be too generous.  Thus, I'm hoping that the
proposed limits will not prevent any workloads that currently work to
fail.

In terms of locking, I have extended the use of the 'epmutex' to all
epoll_ctl add and remove operations.  Currently its only used in a subset
of the add paths.  I need to hold the epmutex, so that we can correctly
traverse a coherent graph, to check the number of paths.  I believe that
this additional locking is probably ok, since its in the setup/teardown
paths, and doesn't affect the running paths, but it certainly is going to
add some extra overhead.  Also, worth noting is that the epmuex was
recently added to the ep_ctl add operations in the initial path loop
detection code using the argument that it was not on a critical path.

Another thing to note here, is the length of epoll chains that is allowed.
Currently, eventpoll.c defines:

/* Maximum number of nesting allowed inside epoll sets */
#define EP_MAX_NESTS 4

This basically means that I am limited to a graph depth of 5 (EP_MAX_NESTS
+ 1).  However, this limit is currently only enforced during the loop
check detection code, and only when the epoll file descriptors are added
in a certain order.  Thus, this limit is currently easily bypassed.  The
newly added check for wakeup paths, stricly limits the wakeup paths to a
length of 5, regardless of the order in which ep's are linked together.
Thus, a side-effect of the new code is a more consistent enforcement of
the graph depth.

Thus far, I've tested this, using the sample programs previously
mentioned, which now either return quickly or return -EINVAL.  I've also
testing using the piptest.c epoll tester, which showed no difference in
performance.  I've also created a number of different epoll networks and
tested that they behave as expectded.

I believe this solves the original diabolical test cases, while still
preserving the sane epoll nesting.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Nelson Elhage <nelhage@ksplice.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c            | 234 +++++++++++++++++++++++++++++++++++++++++-----
 include/linux/eventpoll.h |   1 +
 include/linux/fs.h        |   1 +
 3 files changed, 211 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 828e750af23a..aabdfc38cf24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -197,6 +197,12 @@ struct eventpoll {
 
 	/* The user that created the eventpoll descriptor */
 	struct user_struct *user;
+
+	struct file *file;
+
+	/* used to optimize loop detection check */
+	int visited;
+	struct list_head visited_list_link;
 };
 
 /* Wait structure used by the poll hooks */
@@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly;
 /* Slab cache used to allocate "struct eppoll_entry" */
 static struct kmem_cache *pwq_cache __read_mostly;
 
+/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
+static LIST_HEAD(visited_list);
+
+/*
+ * List of files with newly added links, where we may need to limit the number
+ * of emanating paths. Protected by the epmutex.
+ */
+static LIST_HEAD(tfile_check_list);
+
 #ifdef CONFIG_SYSCTL
 
 #include <linux/sysctl.h>
@@ -276,6 +291,12 @@ ctl_table epoll_table[] = {
 };
 #endif /* CONFIG_SYSCTL */
 
+static const struct file_operations eventpoll_fops;
+
+static inline int is_file_epoll(struct file *f)
+{
+	return f->f_op == &eventpoll_fops;
+}
 
 /* Setup the structure that is used as key for the RB tree */
 static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -711,12 +732,6 @@ static const struct file_operations eventpoll_fops = {
 	.llseek		= noop_llseek,
 };
 
-/* Fast test to see if the file is an eventpoll file */
-static inline int is_file_epoll(struct file *f)
-{
-	return f->f_op == &eventpoll_fops;
-}
-
 /*
  * This is called from eventpoll_release() to unlink files from the eventpoll
  * interface. We need to have this facility to cleanup correctly files that are
@@ -926,6 +941,99 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
 	rb_insert_color(&epi->rbn, &ep->rbr);
 }
 
+
+
+#define PATH_ARR_SIZE 5
+/*
+ * These are the number paths of length 1 to 5, that we are allowing to emanate
+ * from a single file of interest. For example, we allow 1000 paths of length
+ * 1, to emanate from each file of interest. This essentially represents the
+ * potential wakeup paths, which need to be limited in order to avoid massive
+ * uncontrolled wakeup storms. The common use case should be a single ep which
+ * is connected to n file sources. In this case each file source has 1 path
+ * of length 1. Thus, the numbers below should be more than sufficient. These
+ * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
+ * and delete can't add additional paths. Protected by the epmutex.
+ */
+static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
+static int path_count[PATH_ARR_SIZE];
+
+static int path_count_inc(int nests)
+{
+	if (++path_count[nests] > path_limits[nests])
+		return -1;
+	return 0;
+}
+
+static void path_count_init(void)
+{
+	int i;
+
+	for (i = 0; i < PATH_ARR_SIZE; i++)
+		path_count[i] = 0;
+}
+
+static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
+{
+	int error = 0;
+	struct file *file = priv;
+	struct file *child_file;
+	struct epitem *epi;
+
+	list_for_each_entry(epi, &file->f_ep_links, fllink) {
+		child_file = epi->ep->file;
+		if (is_file_epoll(child_file)) {
+			if (list_empty(&child_file->f_ep_links)) {
+				if (path_count_inc(call_nests)) {
+					error = -1;
+					break;
+				}
+			} else {
+				error = ep_call_nested(&poll_loop_ncalls,
+							EP_MAX_NESTS,
+							reverse_path_check_proc,
+							child_file, child_file,
+							current);
+			}
+			if (error != 0)
+				break;
+		} else {
+			printk(KERN_ERR "reverse_path_check_proc: "
+				"file is not an ep!\n");
+		}
+	}
+	return error;
+}
+
+/**
+ * reverse_path_check - The tfile_check_list is list of file *, which have
+ *                      links that are proposed to be newly added. We need to
+ *                      make sure that those added links don't add too many
+ *                      paths such that we will spend all our time waking up
+ *                      eventpoll objects.
+ *
+ * Returns: Returns zero if the proposed links don't create too many paths,
+ *	    -1 otherwise.
+ */
+static int reverse_path_check(void)
+{
+	int length = 0;
+	int error = 0;
+	struct file *current_file;
+
+	/* let's call this for all tfiles */
+	list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
+		length++;
+		path_count_init();
+		error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+					reverse_path_check_proc, current_file,
+					current_file, current);
+		if (error)
+			break;
+	}
+	return error;
+}
+
 /*
  * Must be called with "mtx" held.
  */
@@ -987,6 +1095,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	 */
 	ep_rbtree_insert(ep, epi);
 
+	/* now check if we've created too many backpaths */
+	error = -EINVAL;
+	if (reverse_path_check())
+		goto error_remove_epi;
+
 	/* We have to drop the new item inside our item list to keep track of it */
 	spin_lock_irqsave(&ep->lock, flags);
 
@@ -1011,6 +1124,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 
 	return 0;
 
+error_remove_epi:
+	spin_lock(&tfile->f_lock);
+	if (ep_is_linked(&epi->fllink))
+		list_del_init(&epi->fllink);
+	spin_unlock(&tfile->f_lock);
+
+	rb_erase(&epi->rbn, &ep->rbr);
+
 error_unregister:
 	ep_unregister_pollwait(ep, epi);
 
@@ -1275,18 +1396,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
 	int error = 0;
 	struct file *file = priv;
 	struct eventpoll *ep = file->private_data;
+	struct eventpoll *ep_tovisit;
 	struct rb_node *rbp;
 	struct epitem *epi;
 
 	mutex_lock_nested(&ep->mtx, call_nests + 1);
+	ep->visited = 1;
+	list_add(&ep->visited_list_link, &visited_list);
 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
 		epi = rb_entry(rbp, struct epitem, rbn);
 		if (unlikely(is_file_epoll(epi->ffd.file))) {
+			ep_tovisit = epi->ffd.file->private_data;
+			if (ep_tovisit->visited)
+				continue;
 			error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
-					       ep_loop_check_proc, epi->ffd.file,
-					       epi->ffd.file->private_data, current);
+					ep_loop_check_proc, epi->ffd.file,
+					ep_tovisit, current);
 			if (error != 0)
 				break;
+		} else {
+			/*
+			 * If we've reached a file that is not associated with
+			 * an ep, then we need to check if the newly added
+			 * links are going to add too many wakeup paths. We do
+			 * this by adding it to the tfile_check_list, if it's
+			 * not already there, and calling reverse_path_check()
+			 * during ep_insert().
+			 */
+			if (list_empty(&epi->ffd.file->f_tfile_llink))
+				list_add(&epi->ffd.file->f_tfile_llink,
+					 &tfile_check_list);
 		}
 	}
 	mutex_unlock(&ep->mtx);
@@ -1307,8 +1446,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
  */
 static int ep_loop_check(struct eventpoll *ep, struct file *file)
 {
-	return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+	int ret;
+	struct eventpoll *ep_cur, *ep_next;
+
+	ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
 			      ep_loop_check_proc, file, ep, current);
+	/* clear visited list */
+	list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
+							visited_list_link) {
+		ep_cur->visited = 0;
+		list_del(&ep_cur->visited_list_link);
+	}
+	return ret;
+}
+
+static void clear_tfile_check_list(void)
+{
+	struct file *file;
+
+	/* first clear the tfile_check_list */
+	while (!list_empty(&tfile_check_list)) {
+		file = list_first_entry(&tfile_check_list, struct file,
+					f_tfile_llink);
+		list_del_init(&file->f_tfile_llink);
+	}
+	INIT_LIST_HEAD(&tfile_check_list);
 }
 
 /*
@@ -1316,8 +1478,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
  */
 SYSCALL_DEFINE1(epoll_create1, int, flags)
 {
-	int error;
+	int error, fd;
 	struct eventpoll *ep = NULL;
+	struct file *file;
 
 	/* Check the EPOLL_* constant for consistency.  */
 	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
@@ -1334,11 +1497,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure and a free file descriptor.
 	 */
-	error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+	fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
+	if (fd < 0) {
+		error = fd;
+		goto out_free_ep;
+	}
+	file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
 				 O_RDWR | (flags & O_CLOEXEC));
-	if (error < 0)
-		ep_free(ep);
-
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto out_free_fd;
+	}
+	fd_install(fd, file);
+	ep->file = file;
+	return fd;
+
+out_free_fd:
+	put_unused_fd(fd);
+out_free_ep:
+	ep_free(ep);
 	return error;
 }
 
@@ -1404,21 +1581,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 	/*
 	 * When we insert an epoll file descriptor, inside another epoll file
 	 * descriptor, there is the change of creating closed loops, which are
-	 * better be handled here, than in more critical paths.
+	 * better be handled here, than in more critical paths. While we are
+	 * checking for loops we also determine the list of files reachable
+	 * and hang them on the tfile_check_list, so we can check that we
+	 * haven't created too many possible wakeup paths.
 	 *
-	 * We hold epmutex across the loop check and the insert in this case, in
-	 * order to prevent two separate inserts from racing and each doing the
-	 * insert "at the same time" such that ep_loop_check passes on both
-	 * before either one does the insert, thereby creating a cycle.
+	 * We need to hold the epmutex across both ep_insert and ep_remove
+	 * b/c we want to make sure we are looking at a coherent view of
+	 * epoll network.
 	 */
-	if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+	if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
 		mutex_lock(&epmutex);
 		did_lock_epmutex = 1;
-		error = -ELOOP;
-		if (ep_loop_check(ep, tfile) != 0)
-			goto error_tgt_fput;
 	}
-
+	if (op == EPOLL_CTL_ADD) {
+		if (is_file_epoll(tfile)) {
+			error = -ELOOP;
+			if (ep_loop_check(ep, tfile) != 0)
+				goto error_tgt_fput;
+		} else
+			list_add(&tfile->f_tfile_llink, &tfile_check_list);
+	}
 
 	mutex_lock_nested(&ep->mtx, 0);
 
@@ -1437,6 +1620,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 			error = ep_insert(ep, &epds, tfile, fd);
 		} else
 			error = -EEXIST;
+		clear_tfile_check_list();
 		break;
 	case EPOLL_CTL_DEL:
 		if (epi)
@@ -1455,7 +1639,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 	mutex_unlock(&ep->mtx);
 
 error_tgt_fput:
-	if (unlikely(did_lock_epmutex))
+	if (did_lock_epmutex)
 		mutex_unlock(&epmutex);
 
 	fput(tfile);
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index f362733186a5..657ab55beda0 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -61,6 +61,7 @@ struct file;
 static inline void eventpoll_init_file(struct file *file)
 {
 	INIT_LIST_HEAD(&file->f_ep_links);
+	INIT_LIST_HEAD(&file->f_tfile_llink);
 }
 
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7aacf31418fe..a7409bc157e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1001,6 +1001,7 @@ struct file {
 #ifdef CONFIG_EPOLL
 	/* Used by fs/eventpoll.c to link all the hooks to this file */
 	struct list_head	f_ep_links;
+	struct list_head	f_tfile_llink;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
 #ifdef CONFIG_DEBUG_WRITECOUNT
-- 
cgit v1.2.3


From ab936cbcd02072a34b60d268f94440fd5cf1970b Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 12 Jan 2012 17:17:44 -0800
Subject: memcg: add mem_cgroup_replace_page_cache() to fix LRU issue

Commit ef6a3c6311 ("mm: add replace_page_cache_page() function") added a
function replace_page_cache_page().  This function replaces a page in the
radix-tree with a new page.  WHen doing this, memory cgroup needs to fix
up the accounting information.  memcg need to check PCG_USED bit etc.

In some(many?) cases, 'newpage' is on LRU before calling
replace_page_cache().  So, memcg's LRU accounting information should be
fixed, too.

This patch adds mem_cgroup_replace_page_cache() and removes the old hooks.
 In that function, old pages will be unaccounted without touching
res_counter and new page will be accounted to the memcg (of old page).
WHen overwriting pc->mem_cgroup of newpage, take zone->lru_lock and avoid
races with LRU handling.

Background:
  replace_page_cache_page() is called by FUSE code in its splice() handling.
  Here, 'newpage' is replacing oldpage but this newpage is not a newly allocated
  page and may be on LRU. LRU mis-accounting will be critical for memory cgroup
  because rmdir() checks the whole LRU is empty and there is no account leak.
  If a page is on the other LRU than it should be, rmdir() will fail.

This bug was added in March 2011, but no bug report yet.  I guess there
are not many people who use memcg and FUSE at the same time with upstream
kernels.

The result of this bug is that admin cannot destroy a memcg because of
account leak.  So, no panic, no deadlock.  And, even if an active cgroup
exist, umount can succseed.  So no problem at shutdown.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++++++
 mm/filemap.c               | 18 ++----------------
 mm/memcontrol.c            | 44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f944591765eb..3558a5e268cf 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -122,6 +122,8 @@ struct zone_reclaim_stat*
 mem_cgroup_get_reclaim_stat_from_page(struct page *page);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 					struct task_struct *p);
+extern void mem_cgroup_replace_page_cache(struct page *oldpage,
+					struct page *newpage);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
@@ -369,6 +371,10 @@ static inline
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 {
 }
+static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
+				struct page *newpage)
+{
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
diff --git a/mm/filemap.c b/mm/filemap.c
index c4ee2e918bea..97f49ed35bd2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -393,24 +393,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range);
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 {
 	int error;
-	struct mem_cgroup *memcg = NULL;
 
 	VM_BUG_ON(!PageLocked(old));
 	VM_BUG_ON(!PageLocked(new));
 	VM_BUG_ON(new->mapping);
 
-	/*
-	 * This is not page migration, but prepare_migration and
-	 * end_migration does enough work for charge replacement.
-	 *
-	 * In the longer term we probably want a specialized function
-	 * for moving the charge from old to new in a more efficient
-	 * manner.
-	 */
-	error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
-	if (error)
-		return error;
-
 	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 	if (!error) {
 		struct address_space *mapping = old->mapping;
@@ -432,13 +419,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		if (PageSwapBacked(new))
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irq(&mapping->tree_lock);
+		/* mem_cgroup codes must not be called under tree_lock */
+		mem_cgroup_replace_page_cache(old, new);
 		radix_tree_preload_end();
 		if (freepage)
 			freepage(old);
 		page_cache_release(old);
-		mem_cgroup_end_migration(memcg, old, new, true);
-	} else {
-		mem_cgroup_end_migration(memcg, old, new, false);
 	}
 
 	return error;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d87aa3510c5e..0b2d4036f1cd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3432,6 +3432,50 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
+/*
+ * At replace page cache, newpage is not under any memcg but it's on
+ * LRU. So, this function doesn't touch res_counter but handles LRU
+ * in correct way. Both pages are locked so we cannot race with uncharge.
+ */
+void mem_cgroup_replace_page_cache(struct page *oldpage,
+				  struct page *newpage)
+{
+	struct mem_cgroup *memcg;
+	struct page_cgroup *pc;
+	struct zone *zone;
+	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
+	unsigned long flags;
+
+	if (mem_cgroup_disabled())
+		return;
+
+	pc = lookup_page_cgroup(oldpage);
+	/* fix accounting on old pages */
+	lock_page_cgroup(pc);
+	memcg = pc->mem_cgroup;
+	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -1);
+	ClearPageCgroupUsed(pc);
+	unlock_page_cgroup(pc);
+
+	if (PageSwapBacked(oldpage))
+		type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+
+	zone = page_zone(newpage);
+	pc = lookup_page_cgroup(newpage);
+	/*
+	 * Even if newpage->mapping was NULL before starting replacement,
+	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
+	 * LRU while we overwrite pc->mem_cgroup.
+	 */
+	spin_lock_irqsave(&zone->lru_lock, flags);
+	if (PageLRU(newpage))
+		del_page_from_lru_list(zone, newpage, page_lru(newpage));
+	__mem_cgroup_commit_charge(memcg, newpage, 1, pc, type);
+	if (PageLRU(newpage))
+		add_page_to_lru_list(zone, newpage, page_lru(newpage));
+	spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+
 #ifdef CONFIG_DEBUG_VM
 static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 {
-- 
cgit v1.2.3


From 5660048ccac8735d9bc0a46325a02e6a6518b5b2 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:17:59 -0800
Subject: mm: move memcg hierarchy reclaim to generic reclaim code

Memory cgroup limit reclaim and traditional global pressure reclaim will
soon share the same code to reclaim from a hierarchical tree of memory
cgroups.

In preparation of this, move the two right next to each other in
shrink_zone().

The mem_cgroup_hierarchical_reclaim() polymath is split into a soft
limit reclaim function, which still does hierarchy walking on its own,
and a limit (shrinking) reclaim function, which relies on generic
reclaim code to walk the hierarchy.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  24 +++++++
 mm/memcontrol.c            | 169 +++++++++++++++++++++++----------------------
 mm/vmscan.c                |  43 ++++++++++--
 3 files changed, 148 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3558a5e268cf..3b99dce85293 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -40,6 +40,12 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct mem_cgroup *mem_cont,
 					int active, int file);
 
+struct mem_cgroup_reclaim_cookie {
+	struct zone *zone;
+	int priority;
+	unsigned int generation;
+};
+
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /*
  * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -106,6 +112,11 @@ mem_cgroup_prepare_migration(struct page *page,
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	struct page *oldpage, struct page *newpage, bool migration_ok);
 
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+				   struct mem_cgroup *,
+				   struct mem_cgroup_reclaim_cookie *);
+void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
+
 /*
  * For memory reclaim.
  */
@@ -281,6 +292,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 {
 }
 
+static inline struct mem_cgroup *
+mem_cgroup_iter(struct mem_cgroup *root,
+		struct mem_cgroup *prev,
+		struct mem_cgroup_reclaim_cookie *reclaim)
+{
+	return NULL;
+}
+
+static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
+					 struct mem_cgroup *prev)
+{
+}
+
 static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
 {
 	return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bec451da7def..750ed1449955 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -370,8 +370,6 @@ enum charge_type {
 #define MEM_CGROUP_RECLAIM_NOSWAP	(1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
 #define MEM_CGROUP_RECLAIM_SHRINK_BIT	0x1
 #define MEM_CGROUP_RECLAIM_SHRINK	(1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
-#define MEM_CGROUP_RECLAIM_SOFT_BIT	0x2
-#define MEM_CGROUP_RECLAIM_SOFT		(1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
 
 static void mem_cgroup_get(struct mem_cgroup *memcg);
 static void mem_cgroup_put(struct mem_cgroup *memcg);
@@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
 	return memcg;
 }
 
-struct mem_cgroup_reclaim_cookie {
-	struct zone *zone;
-	int priority;
-	unsigned int generation;
-};
-
-static struct mem_cgroup *
-mem_cgroup_iter(struct mem_cgroup *root,
-		struct mem_cgroup *prev,
-		struct mem_cgroup_reclaim_cookie *reclaim)
+/**
+ * mem_cgroup_iter - iterate over memory cgroup hierarchy
+ * @root: hierarchy root
+ * @prev: previously returned memcg, NULL on first invocation
+ * @reclaim: cookie for shared reclaim walks, NULL for full walks
+ *
+ * Returns references to children of the hierarchy below @root, or
+ * @root itself, or %NULL after a full round-trip.
+ *
+ * Caller must pass the return value in @prev on subsequent
+ * invocations for reference counting, or use mem_cgroup_iter_break()
+ * to cancel a hierarchy walk before the round-trip is complete.
+ *
+ * Reclaimers can specify a zone and a priority level in @reclaim to
+ * divide up the memcgs in the hierarchy among all concurrent
+ * reclaimers operating on the same zone and priority.
+ */
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+				   struct mem_cgroup *prev,
+				   struct mem_cgroup_reclaim_cookie *reclaim)
 {
 	struct mem_cgroup *memcg = NULL;
 	int id = 0;
 
+	if (mem_cgroup_disabled())
+		return NULL;
+
 	if (!root)
 		root = root_mem_cgroup;
 
@@ -926,8 +937,13 @@ mem_cgroup_iter(struct mem_cgroup *root,
 	return memcg;
 }
 
-static void mem_cgroup_iter_break(struct mem_cgroup *root,
-				  struct mem_cgroup *prev)
+/**
+ * mem_cgroup_iter_break - abort a hierarchy walk prematurely
+ * @root: hierarchy root
+ * @prev: last visited hierarchy member as returned by mem_cgroup_iter()
+ */
+void mem_cgroup_iter_break(struct mem_cgroup *root,
+			   struct mem_cgroup *prev)
 {
 	if (!root)
 		root = root_mem_cgroup;
@@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
 	return min(limit, memsw);
 }
 
+static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
+					gfp_t gfp_mask,
+					unsigned long flags)
+{
+	unsigned long total = 0;
+	bool noswap = false;
+	int loop;
+
+	if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
+		noswap = true;
+	if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum)
+		noswap = true;
+
+	for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
+		if (loop)
+			drain_all_stock_async(memcg);
+		total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
+		/*
+		 * Allow limit shrinkers, which are triggered directly
+		 * by userspace, to catch signals and stop reclaim
+		 * after minimal progress, regardless of the margin.
+		 */
+		if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
+			break;
+		if (mem_cgroup_margin(memcg))
+			break;
+		/*
+		 * If nothing was reclaimed after two attempts, there
+		 * may be no reclaimable pages in this hierarchy.
+		 */
+		if (loop && !total)
+			break;
+	}
+	return total;
+}
+
 /**
  * test_mem_cgroup_node_reclaimable
  * @mem: the target memcg
@@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
 }
 #endif
 
-/*
- * Scan the hierarchy if needed to reclaim memory. We remember the last child
- * we reclaimed from, so that we don't end up penalizing one child extensively
- * based on its position in the children list.
- *
- * root_memcg is the original ancestor that we've been reclaim from.
- *
- * We give up and return to the caller when we visit root_memcg twice.
- * (other groups can be removed while we're walking....)
- *
- * If shrink==true, for avoiding to free too much, this returns immedieately.
- */
-static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
-						struct zone *zone,
-						gfp_t gfp_mask,
-						unsigned long reclaim_options,
-						unsigned long *total_scanned)
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
+				   struct zone *zone,
+				   gfp_t gfp_mask,
+				   unsigned long *total_scanned)
 {
 	struct mem_cgroup *victim = NULL;
-	int ret, total = 0;
+	int total = 0;
 	int loop = 0;
-	bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
-	bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
-	bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
 	unsigned long excess;
 	unsigned long nr_scanned;
 	struct mem_cgroup_reclaim_cookie reclaim = {
@@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
 
 	excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
 
-	/* If memsw_is_minimum==1, swap-out is of-no-use. */
-	if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
-		noswap = true;
-
 	while (1) {
 		victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
 		if (!victim) {
 			loop++;
-			/*
-			 * We are not draining per cpu cached charges during
-			 * soft limit reclaim  because global reclaim doesn't
-			 * care about charges. It tries to free some memory and
-			 * charges will not give any.
-			 */
-			if (!check_soft && loop >= 1)
-				drain_all_stock_async(root_memcg);
 			if (loop >= 2) {
 				/*
 				 * If we have not been able to reclaim
 				 * anything, it might because there are
 				 * no reclaimable pages under this hierarchy
 				 */
-				if (!check_soft || !total)
+				if (!total)
 					break;
 				/*
 				 * We want to do more targeted reclaim.
@@ -1761,30 +1785,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
 			}
 			continue;
 		}
-		if (!mem_cgroup_reclaimable(victim, noswap)) {
-			/* this cgroup's local usage == 0 */
+		if (!mem_cgroup_reclaimable(victim, false))
 			continue;
-		}
-		/* we use swappiness of local cgroup */
-		if (check_soft) {
-			ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
-				noswap, zone, &nr_scanned);
-			*total_scanned += nr_scanned;
-		} else
-			ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
-						noswap);
-		total += ret;
-		/*
-		 * At shrinking usage, we can't check we should stop here or
-		 * reclaim more. It's depends on callers. last_scanned_child
-		 * will work enough for keeping fairness under tree.
-		 */
-		if (shrink)
-			break;
-		if (check_soft) {
-			if (!res_counter_soft_limit_excess(&root_memcg->res))
-				break;
-		} else if (mem_cgroup_margin(root_memcg))
+		total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
+						     zone, &nr_scanned);
+		*total_scanned += nr_scanned;
+		if (!res_counter_soft_limit_excess(&root_memcg->res))
 			break;
 	}
 	mem_cgroup_iter_break(root_memcg, victim);
@@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	if (!(gfp_mask & __GFP_WAIT))
 		return CHARGE_WOULDBLOCK;
 
-	ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
-					      gfp_mask, flags, NULL);
+	ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
 		return CHARGE_RETRY;
 	/*
@@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
-						MEM_CGROUP_RECLAIM_SHRINK,
-						NULL);
+		mem_cgroup_reclaim(memcg, GFP_KERNEL,
+				   MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 		/* Usage is reduced ? */
   		if (curusage >= oldusage)
@@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
-						MEM_CGROUP_RECLAIM_NOSWAP |
-						MEM_CGROUP_RECLAIM_SHRINK,
-						NULL);
+		mem_cgroup_reclaim(memcg, GFP_KERNEL,
+				   MEM_CGROUP_RECLAIM_NOSWAP |
+				   MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
@@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 			break;
 
 		nr_scanned = 0;
-		reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
-						gfp_mask,
-						MEM_CGROUP_RECLAIM_SOFT,
-						&nr_scanned);
+		reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
+						    gfp_mask, &nr_scanned);
 		nr_reclaimed += reclaimed;
 		*total_scanned += nr_scanned;
 		spin_lock(&mctz->lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e0627d07c3ac..136c7eb0ad88 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2104,12 +2104,43 @@ restart:
 static void shrink_zone(int priority, struct zone *zone,
 			struct scan_control *sc)
 {
-	struct mem_cgroup_zone mz = {
-		.mem_cgroup = sc->target_mem_cgroup,
+	struct mem_cgroup *root = sc->target_mem_cgroup;
+	struct mem_cgroup_reclaim_cookie reclaim = {
 		.zone = zone,
+		.priority = priority,
 	};
+	struct mem_cgroup *memcg;
+
+	if (global_reclaim(sc)) {
+		struct mem_cgroup_zone mz = {
+			.mem_cgroup = NULL,
+			.zone = zone,
+		};
+
+		shrink_mem_cgroup_zone(priority, &mz, sc);
+		return;
+	}
+
+	memcg = mem_cgroup_iter(root, NULL, &reclaim);
+	do {
+		struct mem_cgroup_zone mz = {
+			.mem_cgroup = memcg,
+			.zone = zone,
+		};
 
-	shrink_mem_cgroup_zone(priority, &mz, sc);
+		shrink_mem_cgroup_zone(priority, &mz, sc);
+		/*
+		 * Limit reclaim has historically picked one memcg and
+		 * scanned it with decreasing priority levels until
+		 * nr_to_reclaim had been reclaimed.  This priority
+		 * cycle is thus over after a single memcg.
+		 */
+		if (!global_reclaim(sc)) {
+			mem_cgroup_iter_break(root, memcg);
+			break;
+		}
+		memcg = mem_cgroup_iter(root, memcg, &reclaim);
+	} while (memcg);
 }
 
 /*
@@ -2374,6 +2405,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 		.order = 0,
 		.target_mem_cgroup = mem,
 	};
+	struct mem_cgroup_zone mz = {
+		.mem_cgroup = mem,
+		.zone = zone,
+	};
 
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2389,7 +2424,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 	 * will pick up pages from other mem cgroup's as well. We hack
 	 * the priority and make it zero.
 	 */
-	shrink_zone(0, zone, &sc);
+	shrink_mem_cgroup_zone(0, &mz, &sc);
 
 	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
-- 
cgit v1.2.3


From 6290df545814990ca2663baf6e894669132d5f73 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:18:10 -0800
Subject: mm: collect LRU list heads into struct lruvec

Having a unified structure with a LRU list set for both global zones and
per-memcg zones allows to keep that code simple which deals with LRU
lists and does not care about the container itself.

Once the per-memcg LRU lists directly link struct pages, the isolation
function and all other list manipulations are shared between the memcg
case and the global LRU case.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h |  2 +-
 include/linux/mmzone.h    | 10 ++++++----
 mm/memcontrol.c           | 17 +++++++----------
 mm/page_alloc.c           |  2 +-
 mm/swap.c                 | 11 +++++------
 mm/vmscan.c               | 10 +++++-----
 6 files changed, 25 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 8f7d24712dc1..e6a7ffe16d31 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -33,7 +33,7 @@ __add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
 static inline void
 add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
-	__add_page_to_lru_list(zone, page, l, &zone->lru[l].list);
+	__add_page_to_lru_list(zone, page, l, &zone->lruvec.lists[l]);
 }
 
 static inline void
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ca6ca92418a6..42e544cd4c9f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -159,6 +159,10 @@ static inline int is_unevictable_lru(enum lru_list l)
 	return (l == LRU_UNEVICTABLE);
 }
 
+struct lruvec {
+	struct list_head lists[NR_LRU_LISTS];
+};
+
 /* Mask used at gathering information at once (see memcontrol.c) */
 #define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE))
 #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
@@ -364,10 +368,8 @@ struct zone {
 	ZONE_PADDING(_pad1_)
 
 	/* Fields commonly accessed by the page reclaim scanner */
-	spinlock_t		lru_lock;	
-	struct zone_lru {
-		struct list_head list;
-	} lru[NR_LRU_LISTS];
+	spinlock_t		lru_lock;
+	struct lruvec		lruvec;
 
 	struct zone_reclaim_stat reclaim_stat;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ad7f36f676ff..6e7f849a1a9e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -134,10 +134,7 @@ struct mem_cgroup_reclaim_iter {
  * per-zone information in memory controller.
  */
 struct mem_cgroup_per_zone {
-	/*
-	 * spin_lock to protect the per cgroup LRU
-	 */
-	struct list_head	lists[NR_LRU_LISTS];
+	struct lruvec		lruvec;
 	unsigned long		count[NR_LRU_LISTS];
 
 	struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
@@ -1061,7 +1058,7 @@ void mem_cgroup_rotate_reclaimable_page(struct page *page)
 	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
 	smp_rmb();
 	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move_tail(&pc->lru, &mz->lists[lru]);
+	list_move_tail(&pc->lru, &mz->lruvec.lists[lru]);
 }
 
 void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
@@ -1079,7 +1076,7 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
 	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
 	smp_rmb();
 	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move(&pc->lru, &mz->lists[lru]);
+	list_move(&pc->lru, &mz->lruvec.lists[lru]);
 }
 
 void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
@@ -1109,7 +1106,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
 	/* huge page split is done under lru_lock. so, we have no races. */
 	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
 	SetPageCgroupAcctLRU(pc);
-	list_add(&pc->lru, &mz->lists[lru]);
+	list_add(&pc->lru, &mz->lruvec.lists[lru]);
 }
 
 /*
@@ -1307,7 +1304,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 
 	BUG_ON(!mem_cont);
 	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
-	src = &mz->lists[lru];
+	src = &mz->lruvec.lists[lru];
 
 	scan = 0;
 	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
@@ -3738,7 +3735,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 
 	zone = &NODE_DATA(node)->node_zones[zid];
 	mz = mem_cgroup_zoneinfo(memcg, node, zid);
-	list = &mz->lists[lru];
+	list = &mz->lruvec.lists[lru];
 
 	loop = MEM_CGROUP_ZSTAT(mz, lru);
 	/* give some margin against EBUSY etc...*/
@@ -4864,7 +4861,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 		mz = &pn->zoneinfo[zone];
 		for_each_lru(l)
-			INIT_LIST_HEAD(&mz->lists[l]);
+			INIT_LIST_HEAD(&mz->lruvec.lists[l]);
 		mz->usage_in_excess = 0;
 		mz->on_tree = false;
 		mz->mem = memcg;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 794e6715c226..25c248eb7d5f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4288,7 +4288,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 
 		zone_pcp_init(zone);
 		for_each_lru(l)
-			INIT_LIST_HEAD(&zone->lru[l].list);
+			INIT_LIST_HEAD(&zone->lruvec.lists[l]);
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
 		zone->reclaim_stat.recent_scanned[0] = 0;
diff --git a/mm/swap.c b/mm/swap.c
index 67a09a633a09..76ef79d3857c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -236,7 +236,7 @@ static void pagevec_move_tail_fn(struct page *page, void *arg)
 
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		enum lru_list lru = page_lru_base_type(page);
-		list_move_tail(&page->lru, &zone->lru[lru].list);
+		list_move_tail(&page->lru, &zone->lruvec.lists[lru]);
 		mem_cgroup_rotate_reclaimable_page(page);
 		(*pgmoved)++;
 	}
@@ -480,7 +480,7 @@ static void lru_deactivate_fn(struct page *page, void *arg)
 		 * The page's writeback ends up during pagevec
 		 * We moves tha page into tail of inactive.
 		 */
-		list_move_tail(&page->lru, &zone->lru[lru].list);
+		list_move_tail(&page->lru, &zone->lruvec.lists[lru]);
 		mem_cgroup_rotate_reclaimable_page(page);
 		__count_vm_event(PGROTATED);
 	}
@@ -654,7 +654,6 @@ void lru_add_page_tail(struct zone* zone,
 	int active;
 	enum lru_list lru;
 	const int file = 0;
-	struct list_head *head;
 
 	VM_BUG_ON(!PageHead(page));
 	VM_BUG_ON(PageCompound(page_tail));
@@ -674,10 +673,10 @@ void lru_add_page_tail(struct zone* zone,
 		}
 		update_page_reclaim_stat(zone, page_tail, file, active);
 		if (likely(PageLRU(page)))
-			head = page->lru.prev;
+			__add_page_to_lru_list(zone, page_tail, lru,
+					       page->lru.prev);
 		else
-			head = &zone->lru[lru].list;
-		__add_page_to_lru_list(zone, page_tail, lru, head);
+			add_page_to_lru_list(zone, page_tail, lru);
 	} else {
 		SetPageUnevictable(page_tail);
 		add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 024168cfdcb0..93cdc44a1693 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1250,8 +1250,8 @@ static unsigned long isolate_pages_global(unsigned long nr,
 		lru += LRU_ACTIVE;
 	if (file)
 		lru += LRU_FILE;
-	return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
-								mode, file);
+	return isolate_lru_pages(nr, &z->lruvec.lists[lru], dst,
+				 scanned, order, mode, file);
 }
 
 /*
@@ -1630,7 +1630,7 @@ static void move_active_pages_to_lru(struct zone *zone,
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 
-		list_move(&page->lru, &zone->lru[lru].list);
+		list_move(&page->lru, &zone->lruvec.lists[lru]);
 		mem_cgroup_add_lru_list(page, lru);
 		pgmoved += hpage_nr_pages(page);
 
@@ -3448,7 +3448,7 @@ retry:
 		enum lru_list l = page_lru_base_type(page);
 
 		__dec_zone_state(zone, NR_UNEVICTABLE);
-		list_move(&page->lru, &zone->lru[l].list);
+		list_move(&page->lru, &zone->lruvec.lists[l]);
 		mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
 		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
 		__count_vm_event(UNEVICTABLE_PGRESCUED);
@@ -3457,7 +3457,7 @@ retry:
 		 * rotate unevictable list
 		 */
 		SetPageUnevictable(page);
-		list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
+		list_move(&page->lru, &zone->lruvec.lists[LRU_UNEVICTABLE]);
 		mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
 		if (page_evictable(page, NULL))
 			goto retry;
-- 
cgit v1.2.3


From 925b7673cce39116ce61e7a06683a4a0dad1e72a Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:18:15 -0800
Subject: mm: make per-memcg LRU lists exclusive

Now that all code that operated on global per-zone LRU lists is
converted to operate on per-memory cgroup LRU lists instead, there is no
reason to keep the double-LRU scheme around any longer.

The pc->lru member is removed and page->lru is linked directly to the
per-memory cgroup LRU lists, which removes two pointers from a
descriptor that exists for every page frame in the system.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Ying Han <yinghan@google.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h  |  51 ++++----
 include/linux/mm_inline.h   |  21 ++-
 include/linux/page_cgroup.h |   1 -
 mm/memcontrol.c             | 311 +++++++++++++++++++++-----------------------
 mm/page_cgroup.c            |   1 -
 mm/swap.c                   |  23 ++--
 mm/vmscan.c                 |  64 +++++----
 7 files changed, 225 insertions(+), 247 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3b99dce85293..e2f8e7caf04b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -32,14 +32,6 @@ enum mem_cgroup_page_stat_item {
 	MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */
 };
 
-extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
-					struct list_head *dst,
-					unsigned long *scanned, int order,
-					isolate_mode_t mode,
-					struct zone *z,
-					struct mem_cgroup *mem_cont,
-					int active, int file);
-
 struct mem_cgroup_reclaim_cookie {
 	struct zone *zone;
 	int priority;
@@ -69,13 +61,14 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
 
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
-extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
-extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
-extern void mem_cgroup_rotate_reclaimable_page(struct page *page);
-extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
-extern void mem_cgroup_del_lru(struct page *page);
-extern void mem_cgroup_move_lists(struct page *page,
-				  enum lru_list from, enum lru_list to);
+
+struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
+struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *,
+				       enum lru_list);
+void mem_cgroup_lru_del_list(struct page *, enum lru_list);
+void mem_cgroup_lru_del(struct page *);
+struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *,
+					 enum lru_list, enum lru_list);
 
 /* For coalescing uncharge for reducing memcg' overhead*/
 extern void mem_cgroup_uncharge_start(void);
@@ -223,33 +216,33 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
-static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
-{
-}
-
-static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
+static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
+						    struct mem_cgroup *memcg)
 {
-	return ;
+	return &zone->lruvec;
 }
 
-static inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
+static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone,
+						     struct page *page,
+						     enum lru_list lru)
 {
-	return ;
+	return &zone->lruvec;
 }
 
-static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
+static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 {
-	return ;
 }
 
-static inline void mem_cgroup_del_lru(struct page *page)
+static inline void mem_cgroup_lru_del(struct page *page)
 {
-	return ;
 }
 
-static inline void
-mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
+static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
+						       struct page *page,
+						       enum lru_list from,
+						       enum lru_list to)
 {
+	return &zone->lruvec;
 }
 
 static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e6a7ffe16d31..4e3478e71926 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -21,27 +21,22 @@ static inline int page_is_file_cache(struct page *page)
 	return !PageSwapBacked(page);
 }
 
-static inline void
-__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
-		       struct list_head *head)
-{
-	list_add(&page->lru, head);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
-	mem_cgroup_add_lru_list(page, l);
-}
-
 static inline void
 add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
-	__add_page_to_lru_list(zone, page, l, &zone->lruvec.lists[l]);
+	struct lruvec *lruvec;
+
+	lruvec = mem_cgroup_lru_add_list(zone, page, l);
+	list_add(&page->lru, &lruvec->lists[l]);
+	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
 }
 
 static inline void
 del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
+	mem_cgroup_lru_del_list(page, l);
 	list_del(&page->lru);
 	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
-	mem_cgroup_del_lru_list(page, l);
 }
 
 /**
@@ -64,7 +59,6 @@ del_page_from_lru(struct zone *zone, struct page *page)
 {
 	enum lru_list l;
 
-	list_del(&page->lru);
 	if (PageUnevictable(page)) {
 		__ClearPageUnevictable(page);
 		l = LRU_UNEVICTABLE;
@@ -75,8 +69,9 @@ del_page_from_lru(struct zone *zone, struct page *page)
 			l += LRU_ACTIVE;
 		}
 	}
+	mem_cgroup_lru_del_list(page, l);
+	list_del(&page->lru);
 	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
-	mem_cgroup_del_lru_list(page, l);
 }
 
 /**
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 961ecc7d30bc..5bae7535c202 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -31,7 +31,6 @@ enum {
 struct page_cgroup {
 	unsigned long flags;
 	struct mem_cgroup *mem_cgroup;
-	struct list_head lru;		/* per cgroup LRU list */
 };
 
 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6e7f849a1a9e..972878b648c2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -995,6 +995,27 @@ out:
 }
 EXPORT_SYMBOL(mem_cgroup_count_vm_event);
 
+/**
+ * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
+ * @zone: zone of the wanted lruvec
+ * @mem: memcg of the wanted lruvec
+ *
+ * Returns the lru list vector holding pages for the given @zone and
+ * @mem.  This can be the global zone lruvec, if the memory controller
+ * is disabled.
+ */
+struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
+				      struct mem_cgroup *memcg)
+{
+	struct mem_cgroup_per_zone *mz;
+
+	if (mem_cgroup_disabled())
+		return &zone->lruvec;
+
+	mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone));
+	return &mz->lruvec;
+}
+
 /*
  * Following LRU functions are allowed to be used without PCG_LOCK.
  * Operations are called by routine of global LRU independently from memcg.
@@ -1009,104 +1030,123 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event);
  * When moving account, the page is not on LRU. It's isolated.
  */
 
-void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
+/**
+ * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec
+ * @zone: zone of the page
+ * @page: the page
+ * @lru: current lru
+ *
+ * This function accounts for @page being added to @lru, and returns
+ * the lruvec for the given @zone and the memcg @page is charged to.
+ *
+ * The callsite is then responsible for physically linking the page to
+ * the returned lruvec->lists[@lru].
+ */
+struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
+				       enum lru_list lru)
 {
-	struct page_cgroup *pc;
 	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup *memcg;
+	struct page_cgroup *pc;
 
 	if (mem_cgroup_disabled())
-		return;
+		return &zone->lruvec;
+
 	pc = lookup_page_cgroup(page);
-	/* can happen while we handle swapcache. */
-	if (!TestClearPageCgroupAcctLRU(pc))
-		return;
-	VM_BUG_ON(!pc->mem_cgroup);
+	VM_BUG_ON(PageCgroupAcctLRU(pc));
 	/*
-	 * We don't check PCG_USED bit. It's cleared when the "page" is finally
-	 * removed from global LRU.
+	 * putback:				charge:
+	 * SetPageLRU				SetPageCgroupUsed
+	 * smp_mb				smp_mb
+	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
+	 *
+	 * Ensure that one of the two sides adds the page to the memcg
+	 * LRU during a race.
 	 */
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	/* huge page split is done under lru_lock. so, we have no races. */
-	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
-	VM_BUG_ON(list_empty(&pc->lru));
-	list_del_init(&pc->lru);
-}
-
-void mem_cgroup_del_lru(struct page *page)
-{
-	mem_cgroup_del_lru_list(page, page_lru(page));
+	smp_mb();
+	/*
+	 * If the page is uncharged, it may be freed soon, but it
+	 * could also be swap cache (readahead, swapoff) that needs to
+	 * be reclaimable in the future.  root_mem_cgroup will babysit
+	 * it for the time being.
+	 */
+	if (PageCgroupUsed(pc)) {
+		/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
+		smp_rmb();
+		memcg = pc->mem_cgroup;
+		SetPageCgroupAcctLRU(pc);
+	} else
+		memcg = root_mem_cgroup;
+	mz = page_cgroup_zoneinfo(memcg, page);
+	/* compound_order() is stabilized through lru_lock */
+	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
+	return &mz->lruvec;
 }
 
-/*
- * Writeback is about to end against a page which has been marked for immediate
- * reclaim.  If it still appears to be reclaimable, move it to the tail of the
- * inactive list.
+/**
+ * mem_cgroup_lru_del_list - account for removing an lru page
+ * @page: the page
+ * @lru: target lru
+ *
+ * This function accounts for @page being removed from @lru.
+ *
+ * The callsite is then responsible for physically unlinking
+ * @page->lru.
  */
-void mem_cgroup_rotate_reclaimable_page(struct page *page)
+void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 {
 	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
-	enum lru_list lru = page_lru(page);
 
 	if (mem_cgroup_disabled())
 		return;
 
 	pc = lookup_page_cgroup(page);
-	/* unused page is not rotated. */
-	if (!PageCgroupUsed(pc))
-		return;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move_tail(&pc->lru, &mz->lruvec.lists[lru]);
+	/*
+	 * root_mem_cgroup babysits uncharged LRU pages, but
+	 * PageCgroupUsed is cleared when the page is about to get
+	 * freed.  PageCgroupAcctLRU remembers whether the
+	 * LRU-accounting happened against pc->mem_cgroup or
+	 * root_mem_cgroup.
+	 */
+	if (TestClearPageCgroupAcctLRU(pc)) {
+		VM_BUG_ON(!pc->mem_cgroup);
+		memcg = pc->mem_cgroup;
+	} else
+		memcg = root_mem_cgroup;
+	mz = page_cgroup_zoneinfo(memcg, page);
+	/* huge page split is done under lru_lock. so, we have no races. */
+	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
 }
 
-void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
+void mem_cgroup_lru_del(struct page *page)
 {
-	struct mem_cgroup_per_zone *mz;
-	struct page_cgroup *pc;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	pc = lookup_page_cgroup(page);
-	/* unused page is not rotated. */
-	if (!PageCgroupUsed(pc))
-		return;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move(&pc->lru, &mz->lruvec.lists[lru]);
+	mem_cgroup_lru_del_list(page, page_lru(page));
 }
 
-void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
+/**
+ * mem_cgroup_lru_move_lists - account for moving a page between lrus
+ * @zone: zone of the page
+ * @page: the page
+ * @from: current lru
+ * @to: target lru
+ *
+ * This function accounts for @page being moved between the lrus @from
+ * and @to, and returns the lruvec for the given @zone and the memcg
+ * @page is charged to.
+ *
+ * The callsite is then responsible for physically relinking
+ * @page->lru to the returned lruvec->lists[@to].
+ */
+struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
+					 struct page *page,
+					 enum lru_list from,
+					 enum lru_list to)
 {
-	struct page_cgroup *pc;
-	struct mem_cgroup_per_zone *mz;
-
-	if (mem_cgroup_disabled())
-		return;
-	pc = lookup_page_cgroup(page);
-	VM_BUG_ON(PageCgroupAcctLRU(pc));
-	/*
-	 * putback:				charge:
-	 * SetPageLRU				SetPageCgroupUsed
-	 * smp_mb				smp_mb
-	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
-	 *
-	 * Ensure that one of the two sides adds the page to the memcg
-	 * LRU during a race.
-	 */
-	smp_mb();
-	if (!PageCgroupUsed(pc))
-		return;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	/* huge page split is done under lru_lock. so, we have no races. */
-	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
-	SetPageCgroupAcctLRU(pc);
-	list_add(&pc->lru, &mz->lruvec.lists[lru]);
+	/* XXX: Optimize this, especially for @from == @to */
+	mem_cgroup_lru_del_list(page, from);
+	return mem_cgroup_lru_add_list(zone, page, to);
 }
 
 /*
@@ -1117,6 +1157,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
  */
 static void mem_cgroup_lru_del_before_commit(struct page *page)
 {
+	enum lru_list lru;
 	unsigned long flags;
 	struct zone *zone = page_zone(page);
 	struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -1133,17 +1174,28 @@ static void mem_cgroup_lru_del_before_commit(struct page *page)
 		return;
 
 	spin_lock_irqsave(&zone->lru_lock, flags);
+	lru = page_lru(page);
 	/*
-	 * Forget old LRU when this page_cgroup is *not* used. This Used bit
-	 * is guarded by lock_page() because the page is SwapCache.
+	 * The uncharged page could still be registered to the LRU of
+	 * the stale pc->mem_cgroup.
+	 *
+	 * As pc->mem_cgroup is about to get overwritten, the old LRU
+	 * accounting needs to be taken care of.  Let root_mem_cgroup
+	 * babysit the page until the new memcg is responsible for it.
+	 *
+	 * The PCG_USED bit is guarded by lock_page() as the page is
+	 * swapcache/pagecache.
 	 */
-	if (!PageCgroupUsed(pc))
-		mem_cgroup_del_lru_list(page, page_lru(page));
+	if (PageLRU(page) && PageCgroupAcctLRU(pc) && !PageCgroupUsed(pc)) {
+		del_page_from_lru_list(zone, page, lru);
+		add_page_to_lru_list(zone, page, lru);
+	}
 	spin_unlock_irqrestore(&zone->lru_lock, flags);
 }
 
 static void mem_cgroup_lru_add_after_commit(struct page *page)
 {
+	enum lru_list lru;
 	unsigned long flags;
 	struct zone *zone = page_zone(page);
 	struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -1161,22 +1213,22 @@ static void mem_cgroup_lru_add_after_commit(struct page *page)
 	if (likely(!PageLRU(page)))
 		return;
 	spin_lock_irqsave(&zone->lru_lock, flags);
-	/* link when the page is linked to LRU but page_cgroup isn't */
-	if (PageLRU(page) && !PageCgroupAcctLRU(pc))
-		mem_cgroup_add_lru_list(page, page_lru(page));
+	lru = page_lru(page);
+	/*
+	 * If the page is not on the LRU, someone will soon put it
+	 * there.  If it is, and also already accounted for on the
+	 * memcg-side, it must be on the right lruvec as setting
+	 * pc->mem_cgroup and PageCgroupUsed is properly ordered.
+	 * Otherwise, root_mem_cgroup has been babysitting the page
+	 * during the charge.  Move it to the new memcg now.
+	 */
+	if (PageLRU(page) && !PageCgroupAcctLRU(pc)) {
+		del_page_from_lru_list(zone, page, lru);
+		add_page_to_lru_list(zone, page, lru);
+	}
 	spin_unlock_irqrestore(&zone->lru_lock, flags);
 }
 
-
-void mem_cgroup_move_lists(struct page *page,
-			   enum lru_list from, enum lru_list to)
-{
-	if (mem_cgroup_disabled())
-		return;
-	mem_cgroup_del_lru_list(page, from);
-	mem_cgroup_add_lru_list(page, to);
-}
-
 /*
  * Checks whether given mem is same or in the root_mem_cgroup's
  * hierarchy subtree
@@ -1282,68 +1334,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
 	return &mz->reclaim_stat;
 }
 
-unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
-					struct list_head *dst,
-					unsigned long *scanned, int order,
-					isolate_mode_t mode,
-					struct zone *z,
-					struct mem_cgroup *mem_cont,
-					int active, int file)
-{
-	unsigned long nr_taken = 0;
-	struct page *page;
-	unsigned long scan;
-	LIST_HEAD(pc_list);
-	struct list_head *src;
-	struct page_cgroup *pc, *tmp;
-	int nid = zone_to_nid(z);
-	int zid = zone_idx(z);
-	struct mem_cgroup_per_zone *mz;
-	int lru = LRU_FILE * file + active;
-	int ret;
-
-	BUG_ON(!mem_cont);
-	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
-	src = &mz->lruvec.lists[lru];
-
-	scan = 0;
-	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
-		if (scan >= nr_to_scan)
-			break;
-
-		if (unlikely(!PageCgroupUsed(pc)))
-			continue;
-
-		page = lookup_cgroup_page(pc);
-
-		if (unlikely(!PageLRU(page)))
-			continue;
-
-		scan++;
-		ret = __isolate_lru_page(page, mode, file);
-		switch (ret) {
-		case 0:
-			list_move(&page->lru, dst);
-			mem_cgroup_del_lru(page);
-			nr_taken += hpage_nr_pages(page);
-			break;
-		case -EBUSY:
-			/* we don't affect global LRU but rotate in our LRU */
-			mem_cgroup_rotate_lru_list(page, page_lru(page));
-			break;
-		default:
-			break;
-		}
-	}
-
-	*scanned = scan;
-
-	trace_mm_vmscan_memcg_isolate(0, nr_to_scan, scan, nr_taken,
-				      0, 0, 0, mode);
-
-	return nr_taken;
-}
-
 #define mem_cgroup_from_res_counter(counter, member)	\
 	container_of(counter, struct mem_cgroup, member)
 
@@ -3726,11 +3716,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 				int node, int zid, enum lru_list lru)
 {
-	struct zone *zone;
 	struct mem_cgroup_per_zone *mz;
-	struct page_cgroup *pc, *busy;
 	unsigned long flags, loop;
 	struct list_head *list;
+	struct page *busy;
+	struct zone *zone;
 	int ret = 0;
 
 	zone = &NODE_DATA(node)->node_zones[zid];
@@ -3742,6 +3732,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 	loop += 256;
 	busy = NULL;
 	while (loop--) {
+		struct page_cgroup *pc;
 		struct page *page;
 
 		ret = 0;
@@ -3750,16 +3741,16 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 			spin_unlock_irqrestore(&zone->lru_lock, flags);
 			break;
 		}
-		pc = list_entry(list->prev, struct page_cgroup, lru);
-		if (busy == pc) {
-			list_move(&pc->lru, list);
+		page = list_entry(list->prev, struct page, lru);
+		if (busy == page) {
+			list_move(&page->lru, list);
 			busy = NULL;
 			spin_unlock_irqrestore(&zone->lru_lock, flags);
 			continue;
 		}
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
-		page = lookup_cgroup_page(pc);
+		pc = lookup_page_cgroup(page);
 
 		ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
 		if (ret == -ENOMEM)
@@ -3767,7 +3758,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 
 		if (ret == -EBUSY || ret == -EINVAL) {
 			/* found lock contention or "pc" is obsolete. */
-			busy = pc;
+			busy = page;
 			cond_resched();
 		} else
 			busy = NULL;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 2d123f94a8df..f59405a8d752 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -16,7 +16,6 @@ static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
 	pc->flags = 0;
 	set_page_cgroup_array_id(pc, id);
 	pc->mem_cgroup = NULL;
-	INIT_LIST_HEAD(&pc->lru);
 }
 static unsigned long total_usage;
 
diff --git a/mm/swap.c b/mm/swap.c
index 76ef79d3857c..126da2919f60 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -232,12 +232,14 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 static void pagevec_move_tail_fn(struct page *page, void *arg)
 {
 	int *pgmoved = arg;
-	struct zone *zone = page_zone(page);
 
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		enum lru_list lru = page_lru_base_type(page);
-		list_move_tail(&page->lru, &zone->lruvec.lists[lru]);
-		mem_cgroup_rotate_reclaimable_page(page);
+		struct lruvec *lruvec;
+
+		lruvec = mem_cgroup_lru_move_lists(page_zone(page),
+						   page, lru, lru);
+		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		(*pgmoved)++;
 	}
 }
@@ -476,12 +478,13 @@ static void lru_deactivate_fn(struct page *page, void *arg)
 		 */
 		SetPageReclaim(page);
 	} else {
+		struct lruvec *lruvec;
 		/*
 		 * The page's writeback ends up during pagevec
 		 * We moves tha page into tail of inactive.
 		 */
-		list_move_tail(&page->lru, &zone->lruvec.lists[lru]);
-		mem_cgroup_rotate_reclaimable_page(page);
+		lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru);
+		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		__count_vm_event(PGROTATED);
 	}
 
@@ -663,6 +666,8 @@ void lru_add_page_tail(struct zone* zone,
 	SetPageLRU(page_tail);
 
 	if (page_evictable(page_tail, NULL)) {
+		struct lruvec *lruvec;
+
 		if (PageActive(page)) {
 			SetPageActive(page_tail);
 			active = 1;
@@ -672,11 +677,13 @@ void lru_add_page_tail(struct zone* zone,
 			lru = LRU_INACTIVE_ANON;
 		}
 		update_page_reclaim_stat(zone, page_tail, file, active);
+		lruvec = mem_cgroup_lru_add_list(zone, page_tail, lru);
 		if (likely(PageLRU(page)))
-			__add_page_to_lru_list(zone, page_tail, lru,
-					       page->lru.prev);
+			list_add(&page_tail->lru, page->lru.prev);
 		else
-			add_page_to_lru_list(zone, page_tail, lru);
+			list_add(&page_tail->lru, &lruvec->lists[lru]);
+		__mod_zone_page_state(zone, NR_LRU_BASE + lru,
+				      hpage_nr_pages(page_tail));
 	} else {
 		SetPageUnevictable(page_tail);
 		add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 93cdc44a1693..813aae820a27 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1139,15 +1139,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 		switch (__isolate_lru_page(page, mode, file)) {
 		case 0:
+			mem_cgroup_lru_del(page);
 			list_move(&page->lru, dst);
-			mem_cgroup_del_lru(page);
 			nr_taken += hpage_nr_pages(page);
 			break;
 
 		case -EBUSY:
 			/* else it is being freed elsewhere */
 			list_move(&page->lru, src);
-			mem_cgroup_rotate_lru_list(page, page_lru(page));
 			continue;
 
 		default:
@@ -1197,8 +1196,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 				break;
 
 			if (__isolate_lru_page(cursor_page, mode, file) == 0) {
+				mem_cgroup_lru_del(cursor_page);
 				list_move(&cursor_page->lru, dst);
-				mem_cgroup_del_lru(cursor_page);
 				nr_taken += hpage_nr_pages(cursor_page);
 				nr_lumpy_taken++;
 				if (PageDirty(cursor_page))
@@ -1239,18 +1238,20 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 	return nr_taken;
 }
 
-static unsigned long isolate_pages_global(unsigned long nr,
-					struct list_head *dst,
-					unsigned long *scanned, int order,
-					isolate_mode_t mode,
-					struct zone *z,	int active, int file)
+static unsigned long isolate_pages(unsigned long nr, struct mem_cgroup_zone *mz,
+				   struct list_head *dst,
+				   unsigned long *scanned, int order,
+				   isolate_mode_t mode, int active, int file)
 {
+	struct lruvec *lruvec;
 	int lru = LRU_BASE;
+
+	lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
 	if (active)
 		lru += LRU_ACTIVE;
 	if (file)
 		lru += LRU_FILE;
-	return isolate_lru_pages(nr, &z->lruvec.lists[lru], dst,
+	return isolate_lru_pages(nr, &lruvec->lists[lru], dst,
 				 scanned, order, mode, file);
 }
 
@@ -1518,14 +1519,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
 
 	spin_lock_irq(&zone->lru_lock);
 
-	if (scanning_global_lru(mz)) {
-		nr_taken = isolate_pages_global(nr_to_scan, &page_list,
-			&nr_scanned, sc->order, reclaim_mode, zone, 0, file);
-	} else {
-		nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list,
-			&nr_scanned, sc->order, reclaim_mode, zone,
-			mz->mem_cgroup, 0, file);
-	}
+	nr_taken = isolate_pages(nr_to_scan, mz, &page_list,
+				 &nr_scanned, sc->order,
+				 reclaim_mode, 0, file);
 	if (global_reclaim(sc)) {
 		zone->pages_scanned += nr_scanned;
 		if (current_is_kswapd())
@@ -1625,13 +1621,15 @@ static void move_active_pages_to_lru(struct zone *zone,
 	pagevec_init(&pvec, 1);
 
 	while (!list_empty(list)) {
+		struct lruvec *lruvec;
+
 		page = lru_to_page(list);
 
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 
-		list_move(&page->lru, &zone->lruvec.lists[lru]);
-		mem_cgroup_add_lru_list(page, lru);
+		lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+		list_move(&page->lru, &lruvec->lists[lru]);
 		pgmoved += hpage_nr_pages(page);
 
 		if (!pagevec_add(&pvec, page) || list_empty(list)) {
@@ -1672,17 +1670,10 @@ static void shrink_active_list(unsigned long nr_pages,
 		reclaim_mode |= ISOLATE_CLEAN;
 
 	spin_lock_irq(&zone->lru_lock);
-	if (scanning_global_lru(mz)) {
-		nr_taken = isolate_pages_global(nr_pages, &l_hold,
-						&pgscanned, sc->order,
-						reclaim_mode, zone,
-						1, file);
-	} else {
-		nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
-						&pgscanned, sc->order,
-						reclaim_mode, zone,
-						mz->mem_cgroup, 1, file);
-	}
+
+	nr_taken = isolate_pages(nr_pages, mz, &l_hold,
+				 &pgscanned, sc->order,
+				 reclaim_mode, 1, file);
 
 	if (global_reclaim(sc))
 		zone->pages_scanned += pgscanned;
@@ -3440,16 +3431,18 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
  */
 static void check_move_unevictable_page(struct page *page, struct zone *zone)
 {
-	VM_BUG_ON(PageActive(page));
+	struct lruvec *lruvec;
 
+	VM_BUG_ON(PageActive(page));
 retry:
 	ClearPageUnevictable(page);
 	if (page_evictable(page, NULL)) {
 		enum lru_list l = page_lru_base_type(page);
 
 		__dec_zone_state(zone, NR_UNEVICTABLE);
-		list_move(&page->lru, &zone->lruvec.lists[l]);
-		mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
+		lruvec = mem_cgroup_lru_move_lists(zone, page,
+						   LRU_UNEVICTABLE, l);
+		list_move(&page->lru, &lruvec->lists[l]);
 		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
 		__count_vm_event(UNEVICTABLE_PGRESCUED);
 	} else {
@@ -3457,8 +3450,9 @@ retry:
 		 * rotate unevictable list
 		 */
 		SetPageUnevictable(page);
-		list_move(&page->lru, &zone->lruvec.lists[LRU_UNEVICTABLE]);
-		mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
+		lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE,
+						   LRU_UNEVICTABLE);
+		list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]);
 		if (page_evictable(page, NULL))
 			goto retry;
 	}
-- 
cgit v1.2.3


From 6b208e3f6e35aa76d254c395bdcd984b17c6b626 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:18:18 -0800
Subject: mm: memcg: remove unused node/section info from pc->flags

To find the page corresponding to a certain page_cgroup, the pc->flags
encoded the node or section ID with the base array to compare the pc
pointer to.

Now that the per-memory cgroup LRU lists link page descriptors directly,
there is no longer any code that knows the struct page_cgroup of a PFN
but not the struct page.

[hughd@google.com: remove unused node/section info from pc->flags fix]
Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 33 -------------------------
 mm/page_cgroup.c            | 59 ++++++---------------------------------------
 2 files changed, 7 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5bae7535c202..aaa60da8783c 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -121,39 +121,6 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
 	local_irq_restore(*flags);
 }
 
-#ifdef CONFIG_SPARSEMEM
-#define PCG_ARRAYID_WIDTH	SECTIONS_SHIFT
-#else
-#define PCG_ARRAYID_WIDTH	NODES_SHIFT
-#endif
-
-#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS)
-#error Not enough space left in pc->flags to store page_cgroup array IDs
-#endif
-
-/* pc->flags: ARRAY-ID | FLAGS */
-
-#define PCG_ARRAYID_MASK	((1UL << PCG_ARRAYID_WIDTH) - 1)
-
-#define PCG_ARRAYID_OFFSET	(BITS_PER_LONG - PCG_ARRAYID_WIDTH)
-/*
- * Zero the shift count for non-existent fields, to prevent compiler
- * warnings and ensure references are optimized away.
- */
-#define PCG_ARRAYID_SHIFT	(PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0))
-
-static inline void set_page_cgroup_array_id(struct page_cgroup *pc,
-					    unsigned long id)
-{
-	pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT);
-	pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT;
-}
-
-static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc)
-{
-	return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK;
-}
-
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct page_cgroup;
 
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index f59405a8d752..f0559e049e00 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -11,12 +11,6 @@
 #include <linux/swapops.h>
 #include <linux/kmemleak.h>
 
-static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
-{
-	pc->flags = 0;
-	set_page_cgroup_array_id(pc, id);
-	pc->mem_cgroup = NULL;
-}
 static unsigned long total_usage;
 
 #if !defined(CONFIG_SPARSEMEM)
@@ -41,28 +35,13 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
 	return base + offset;
 }
 
-struct page *lookup_cgroup_page(struct page_cgroup *pc)
-{
-	unsigned long pfn;
-	struct page *page;
-	pg_data_t *pgdat;
-
-	pgdat = NODE_DATA(page_cgroup_array_id(pc));
-	pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
-	page = pfn_to_page(pfn);
-	VM_BUG_ON(pc != lookup_page_cgroup(page));
-	return page;
-}
-
 static int __init alloc_node_page_cgroup(int nid)
 {
-	struct page_cgroup *base, *pc;
+	struct page_cgroup *base;
 	unsigned long table_size;
-	unsigned long start_pfn, nr_pages, index;
+	unsigned long nr_pages;
 
-	start_pfn = NODE_DATA(nid)->node_start_pfn;
 	nr_pages = NODE_DATA(nid)->node_spanned_pages;
-
 	if (!nr_pages)
 		return 0;
 
@@ -72,10 +51,6 @@ static int __init alloc_node_page_cgroup(int nid)
 			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 	if (!base)
 		return -ENOMEM;
-	for (index = 0; index < nr_pages; index++) {
-		pc = base + index;
-		init_page_cgroup(pc, nid);
-	}
 	NODE_DATA(nid)->node_page_cgroup = base;
 	total_usage += table_size;
 	return 0;
@@ -116,23 +91,10 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
 	return section->page_cgroup + pfn;
 }
 
-struct page *lookup_cgroup_page(struct page_cgroup *pc)
-{
-	struct mem_section *section;
-	struct page *page;
-	unsigned long nr;
-
-	nr = page_cgroup_array_id(pc);
-	section = __nr_to_section(nr);
-	page = pfn_to_page(pc - section->page_cgroup);
-	VM_BUG_ON(pc != lookup_page_cgroup(page));
-	return page;
-}
-
 static void *__meminit alloc_page_cgroup(size_t size, int nid)
 {
+	gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
 	void *addr = NULL;
-	gfp_t flags = GFP_KERNEL | __GFP_NOWARN;
 
 	addr = alloc_pages_exact_nid(nid, size, flags);
 	if (addr) {
@@ -141,9 +103,9 @@ static void *__meminit alloc_page_cgroup(size_t size, int nid)
 	}
 
 	if (node_state(nid, N_HIGH_MEMORY))
-		addr = vmalloc_node(size, nid);
+		addr = vzalloc_node(size, nid);
 	else
-		addr = vmalloc(size);
+		addr = vzalloc(size);
 
 	return addr;
 }
@@ -166,14 +128,11 @@ static void free_page_cgroup(void *addr)
 
 static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
 {
-	struct page_cgroup *base, *pc;
 	struct mem_section *section;
+	struct page_cgroup *base;
 	unsigned long table_size;
-	unsigned long nr;
-	int index;
 
-	nr = pfn_to_section_nr(pfn);
-	section = __nr_to_section(nr);
+	section = __pfn_to_section(pfn);
 
 	if (section->page_cgroup)
 		return 0;
@@ -193,10 +152,6 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
 		return -ENOMEM;
 	}
 
-	for (index = 0; index < PAGES_PER_SECTION; index++) {
-		pc = base + index;
-		init_page_cgroup(pc, nr);
-	}
 	/*
 	 * The passed "pfn" may not be aligned to SECTION.  For the calculation
 	 * we need to apply a mask.
-- 
cgit v1.2.3


From e94c8a9cbce1aee4af9e1285802785481b7f93c5 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 12 Jan 2012 17:18:20 -0800
Subject: memcg: make mem_cgroup_split_huge_fixup() more efficient

In split_huge_page(), mem_cgroup_split_huge_fixup() is called to handle
page_cgroup modifcations.  It takes move_lock_page_cgroup() and modifies
page_cgroup and LRU accounting jobs and called HPAGE_PMD_SIZE - 1 times.

But thinking again,
  - compound_lock() is held at move_accout...then, it's not necessary
    to take move_lock_page_cgroup().
  - LRU is locked and all tail pages will go into the same LRU as
    head is now on.
  - page_cgroup is contiguous in huge page range.

This patch fixes mem_cgroup_split_huge_fixup() as to be called once per
hugepage and reduce costs for spliting.

[akpm@linux-foundation.org: fix typo, per Michal]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 ++---
 mm/huge_memory.c           |  3 ++-
 mm/memcontrol.c            | 34 +++++++++++++++++-----------------
 3 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e2f8e7caf04b..cee3761666f0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -163,7 +163,7 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg);
 
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
+void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
 
 #ifdef CONFIG_DEBUG_VM
@@ -379,8 +379,7 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
 	return 0;
 }
 
-static inline void mem_cgroup_split_huge_fixup(struct page *head,
-						struct page *tail)
+static inline void mem_cgroup_split_huge_fixup(struct page *head)
 {
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 36b3d988b4ef..db522e160cca 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1207,6 +1207,8 @@ static void __split_huge_page_refcount(struct page *page)
 	/* prevent PageLRU to go away from under us, and freeze lru stats */
 	spin_lock_irq(&zone->lru_lock);
 	compound_lock(page);
+	/* complete memcg works before add pages to LRU */
+	mem_cgroup_split_huge_fixup(page);
 
 	for (i = 1; i < HPAGE_PMD_NR; i++) {
 		struct page *page_tail = page + i;
@@ -1278,7 +1280,6 @@ static void __split_huge_page_refcount(struct page *page)
 		BUG_ON(!PageDirty(page_tail));
 		BUG_ON(!PageSwapBacked(page_tail));
 
-		mem_cgroup_split_huge_fixup(page, page_tail);
 
 		lru_add_page_tail(zone, page, page_tail);
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 972878b648c2..42174612cc0b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2553,39 +2553,39 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 			(1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
 /*
  * Because tail pages are not marked as "used", set it. We're under
- * zone->lru_lock, 'splitting on pmd' and compund_lock.
+ * zone->lru_lock, 'splitting on pmd' and compound_lock.
+ * charge/uncharge will be never happen and move_account() is done under
+ * compound_lock(), so we don't have to take care of races.
  */
-void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
+void mem_cgroup_split_huge_fixup(struct page *head)
 {
 	struct page_cgroup *head_pc = lookup_page_cgroup(head);
-	struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
-	unsigned long flags;
+	struct page_cgroup *pc;
+	int i;
 
 	if (mem_cgroup_disabled())
 		return;
-	/*
-	 * We have no races with charge/uncharge but will have races with
-	 * page state accounting.
-	 */
-	move_lock_page_cgroup(head_pc, &flags);
+	for (i = 1; i < HPAGE_PMD_NR; i++) {
+		pc = head_pc + i;
+		pc->mem_cgroup = head_pc->mem_cgroup;
+		smp_wmb();/* see __commit_charge() */
+		/*
+		 * LRU flags cannot be copied because we need to add tail
+		 * page to LRU by generic call and our hooks will be called.
+		 */
+		pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+	}
 
-	tail_pc->mem_cgroup = head_pc->mem_cgroup;
-	smp_wmb(); /* see __commit_charge() */
 	if (PageCgroupAcctLRU(head_pc)) {
 		enum lru_list lru;
 		struct mem_cgroup_per_zone *mz;
-
 		/*
-		 * LRU flags cannot be copied because we need to add tail
-		 *.page to LRU by generic call and our hook will be called.
 		 * We hold lru_lock, then, reduce counter directly.
 		 */
 		lru = page_lru(head);
 		mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
-		MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+		MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
 	}
-	tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
-	move_unlock_page_cgroup(head_pc, &flags);
 }
 #endif
 
-- 
cgit v1.2.3


From 72835c86ca15d0126354b73d5f29ce9194931c9b Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:18:32 -0800
Subject: mm: unify remaining mem_cont, mem, etc. variable names to memcg

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 16 +++++++-------
 include/linux/oom.h        |  2 +-
 include/linux/rmap.h       |  4 ++--
 mm/memcontrol.c            | 52 ++++++++++++++++++++++++----------------------
 mm/oom_kill.c              | 38 ++++++++++++++++-----------------
 mm/rmap.c                  | 20 +++++++++---------
 mm/swapfile.c              |  9 ++++----
 mm/vmscan.c                | 12 +++++------
 8 files changed, 78 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index cee3761666f0..b80de520670b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,10 +54,10 @@ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
 /* for swap handling */
 extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t mask, struct mem_cgroup **ptr);
+		struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
 extern void mem_cgroup_commit_charge_swapin(struct page *page,
-					struct mem_cgroup *ptr);
-extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
+					struct mem_cgroup *memcg);
+extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
 
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
@@ -101,7 +101,7 @@ extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
 
 extern int
 mem_cgroup_prepare_migration(struct page *page,
-	struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask);
+	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask);
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	struct page *oldpage, struct page *newpage, bool migration_ok);
 
@@ -186,17 +186,17 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 }
 
 static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
+		struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
 {
 	return 0;
 }
 
 static inline void mem_cgroup_commit_charge_swapin(struct page *page,
-					  struct mem_cgroup *ptr)
+					  struct mem_cgroup *memcg)
 {
 }
 
-static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
+static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
 {
 }
 
@@ -275,7 +275,7 @@ static inline struct cgroup_subsys_state
 
 static inline int
 mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-	struct mem_cgroup **ptr, gfp_t gfp_mask)
+	struct mem_cgroup **memcgp, gfp_t gfp_mask)
 {
 	return 0;
 }
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 6f9d04a85336..552fba9c7d5a 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -43,7 +43,7 @@ enum oom_constraint {
 extern void compare_swap_oom_score_adj(int old_val, int new_val);
 extern int test_set_oom_score_adj(int new_val);
 
-extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
+extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 			const nodemask_t *nodemask, unsigned long totalpages);
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1afb9954bbf1..1cdd62a2788a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -158,7 +158,7 @@ static inline void page_dup_rmap(struct page *page)
  * Called from mm/vmscan.c to handle paging out
  */
 int page_referenced(struct page *, int is_locked,
-			struct mem_cgroup *cnt, unsigned long *vm_flags);
+			struct mem_cgroup *memcg, unsigned long *vm_flags);
 int page_referenced_one(struct page *, struct vm_area_struct *,
 	unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
 
@@ -236,7 +236,7 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
 #define anon_vma_link(vma)	do {} while (0)
 
 static inline int page_referenced(struct page *page, int is_locked,
-				  struct mem_cgroup *cnt,
+				  struct mem_cgroup *memcg,
 				  unsigned long *vm_flags)
 {
 	*vm_flags = 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index abb66a2cba65..aeb23933a052 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2844,12 +2844,12 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
  */
 int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 				 struct page *page,
-				 gfp_t mask, struct mem_cgroup **ptr)
+				 gfp_t mask, struct mem_cgroup **memcgp)
 {
 	struct mem_cgroup *memcg;
 	int ret;
 
-	*ptr = NULL;
+	*memcgp = NULL;
 
 	if (mem_cgroup_disabled())
 		return 0;
@@ -2867,27 +2867,27 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 	memcg = try_get_mem_cgroup_from_page(page);
 	if (!memcg)
 		goto charge_cur_mm;
-	*ptr = memcg;
-	ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true);
+	*memcgp = memcg;
+	ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
 	css_put(&memcg->css);
 	return ret;
 charge_cur_mm:
 	if (unlikely(!mm))
 		mm = &init_mm;
-	return __mem_cgroup_try_charge(mm, mask, 1, ptr, true);
+	return __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
 }
 
 static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
+__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
 					enum charge_type ctype)
 {
 	if (mem_cgroup_disabled())
 		return;
-	if (!ptr)
+	if (!memcg)
 		return;
-	cgroup_exclude_rmdir(&ptr->css);
+	cgroup_exclude_rmdir(&memcg->css);
 
-	__mem_cgroup_commit_charge_lrucare(page, ptr, ctype);
+	__mem_cgroup_commit_charge_lrucare(page, memcg, ctype);
 	/*
 	 * Now swap is on-memory. This means this page may be
 	 * counted both as mem and swap....double count.
@@ -2897,21 +2897,22 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 	 */
 	if (do_swap_account && PageSwapCache(page)) {
 		swp_entry_t ent = {.val = page_private(page)};
+		struct mem_cgroup *swap_memcg;
 		unsigned short id;
-		struct mem_cgroup *memcg;
 
 		id = swap_cgroup_record(ent, 0);
 		rcu_read_lock();
-		memcg = mem_cgroup_lookup(id);
-		if (memcg) {
+		swap_memcg = mem_cgroup_lookup(id);
+		if (swap_memcg) {
 			/*
 			 * This recorded memcg can be obsolete one. So, avoid
 			 * calling css_tryget
 			 */
-			if (!mem_cgroup_is_root(memcg))
-				res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
-			mem_cgroup_swap_statistics(memcg, false);
-			mem_cgroup_put(memcg);
+			if (!mem_cgroup_is_root(swap_memcg))
+				res_counter_uncharge(&swap_memcg->memsw,
+						     PAGE_SIZE);
+			mem_cgroup_swap_statistics(swap_memcg, false);
+			mem_cgroup_put(swap_memcg);
 		}
 		rcu_read_unlock();
 	}
@@ -2920,13 +2921,14 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 	 * So, rmdir()->pre_destroy() can be called while we do this charge.
 	 * In that case, we need to call pre_destroy() again. check it here.
 	 */
-	cgroup_release_and_wakeup_rmdir(&ptr->css);
+	cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
-void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
+void mem_cgroup_commit_charge_swapin(struct page *page,
+				     struct mem_cgroup *memcg)
 {
-	__mem_cgroup_commit_charge_swapin(page, ptr,
-					MEM_CGROUP_CHARGE_TYPE_MAPPED);
+	__mem_cgroup_commit_charge_swapin(page, memcg,
+					  MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
 void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
@@ -3255,14 +3257,14 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
  * page belongs to.
  */
 int mem_cgroup_prepare_migration(struct page *page,
-	struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask)
+	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
 {
 	struct mem_cgroup *memcg = NULL;
 	struct page_cgroup *pc;
 	enum charge_type ctype;
 	int ret = 0;
 
-	*ptr = NULL;
+	*memcgp = NULL;
 
 	VM_BUG_ON(PageTransHuge(page));
 	if (mem_cgroup_disabled())
@@ -3313,10 +3315,10 @@ int mem_cgroup_prepare_migration(struct page *page,
 	if (!memcg)
 		return 0;
 
-	*ptr = memcg;
-	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false);
+	*memcgp = memcg;
+	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
 	css_put(&memcg->css);/* drop extra refcnt */
-	if (ret || *ptr == NULL) {
+	if (ret || *memcgp == NULL) {
 		if (PageAnon(page)) {
 			lock_page_cgroup(pc);
 			ClearPageCgroupMigration(pc);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 05c0f27d4ed1..2958fd8e7c9a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -152,7 +152,7 @@ struct task_struct *find_lock_task_mm(struct task_struct *p)
 
 /* return true if the task is not adequate as candidate victim task. */
 static bool oom_unkillable_task(struct task_struct *p,
-		const struct mem_cgroup *mem, const nodemask_t *nodemask)
+		const struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	if (is_global_init(p))
 		return true;
@@ -160,7 +160,7 @@ static bool oom_unkillable_task(struct task_struct *p,
 		return true;
 
 	/* When mem_cgroup_out_of_memory() and p is not member of the group */
-	if (mem && !task_in_mem_cgroup(p, mem))
+	if (memcg && !task_in_mem_cgroup(p, memcg))
 		return true;
 
 	/* p may not have freeable memory in nodemask */
@@ -179,12 +179,12 @@ static bool oom_unkillable_task(struct task_struct *p,
  * predictable as possible.  The goal is to return the highest value for the
  * task consuming the most memory to avoid subsequent oom failures.
  */
-unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
+unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 		      const nodemask_t *nodemask, unsigned long totalpages)
 {
 	long points;
 
-	if (oom_unkillable_task(p, mem, nodemask))
+	if (oom_unkillable_task(p, memcg, nodemask))
 		return 0;
 
 	p = find_lock_task_mm(p);
@@ -308,7 +308,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
  * (not docbooked, we don't want this one cluttering up the manual)
  */
 static struct task_struct *select_bad_process(unsigned int *ppoints,
-		unsigned long totalpages, struct mem_cgroup *mem,
+		unsigned long totalpages, struct mem_cgroup *memcg,
 		const nodemask_t *nodemask)
 {
 	struct task_struct *g, *p;
@@ -320,7 +320,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 
 		if (p->exit_state)
 			continue;
-		if (oom_unkillable_task(p, mem, nodemask))
+		if (oom_unkillable_task(p, memcg, nodemask))
 			continue;
 
 		/*
@@ -364,7 +364,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 			}
 		}
 
-		points = oom_badness(p, mem, nodemask, totalpages);
+		points = oom_badness(p, memcg, nodemask, totalpages);
 		if (points > *ppoints) {
 			chosen = p;
 			*ppoints = points;
@@ -387,14 +387,14 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
  *
  * Call with tasklist_lock read-locked.
  */
-static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask)
+static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	struct task_struct *p;
 	struct task_struct *task;
 
 	pr_info("[ pid ]   uid  tgid total_vm      rss cpu oom_adj oom_score_adj name\n");
 	for_each_process(p) {
-		if (oom_unkillable_task(p, mem, nodemask))
+		if (oom_unkillable_task(p, memcg, nodemask))
 			continue;
 
 		task = find_lock_task_mm(p);
@@ -417,7 +417,7 @@ static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask)
 }
 
 static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
-			struct mem_cgroup *mem, const nodemask_t *nodemask)
+			struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	task_lock(current);
 	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
@@ -427,10 +427,10 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 	cpuset_print_task_mems_allowed(current);
 	task_unlock(current);
 	dump_stack();
-	mem_cgroup_print_oom_info(mem, p);
+	mem_cgroup_print_oom_info(memcg, p);
 	show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
-		dump_tasks(mem, nodemask);
+		dump_tasks(memcg, nodemask);
 }
 
 #define K(x) ((x) << (PAGE_SHIFT-10))
@@ -484,7 +484,7 @@ static int oom_kill_task(struct task_struct *p)
 
 static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			    unsigned int points, unsigned long totalpages,
-			    struct mem_cgroup *mem, nodemask_t *nodemask,
+			    struct mem_cgroup *memcg, nodemask_t *nodemask,
 			    const char *message)
 {
 	struct task_struct *victim = p;
@@ -493,7 +493,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	unsigned int victim_points = 0;
 
 	if (printk_ratelimit())
-		dump_header(p, gfp_mask, order, mem, nodemask);
+		dump_header(p, gfp_mask, order, memcg, nodemask);
 
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -524,7 +524,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			/*
 			 * oom_badness() returns 0 if the thread is unkillable
 			 */
-			child_points = oom_badness(child, mem, nodemask,
+			child_points = oom_badness(child, memcg, nodemask,
 								totalpages);
 			if (child_points > victim_points) {
 				victim = child;
@@ -561,7 +561,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 }
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
-void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
+void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask)
 {
 	unsigned long limit;
 	unsigned int points = 0;
@@ -578,14 +578,14 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
 	}
 
 	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL);
-	limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT;
+	limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT;
 	read_lock(&tasklist_lock);
 retry:
-	p = select_bad_process(&points, limit, mem, NULL);
+	p = select_bad_process(&points, limit, memcg, NULL);
 	if (!p || PTR_ERR(p) == -1UL)
 		goto out;
 
-	if (oom_kill_process(p, gfp_mask, 0, points, limit, mem, NULL,
+	if (oom_kill_process(p, gfp_mask, 0, points, limit, memcg, NULL,
 				"Memory cgroup out of memory"))
 		goto retry;
 out:
diff --git a/mm/rmap.c b/mm/rmap.c
index a2e5ce1fa081..c8454e06b6c8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -773,7 +773,7 @@ out:
 }
 
 static int page_referenced_anon(struct page *page,
-				struct mem_cgroup *mem_cont,
+				struct mem_cgroup *memcg,
 				unsigned long *vm_flags)
 {
 	unsigned int mapcount;
@@ -796,7 +796,7 @@ static int page_referenced_anon(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
+		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
 			continue;
 		referenced += page_referenced_one(page, vma, address,
 						  &mapcount, vm_flags);
@@ -811,7 +811,7 @@ static int page_referenced_anon(struct page *page,
 /**
  * page_referenced_file - referenced check for object-based rmap
  * @page: the page we're checking references on.
- * @mem_cont: target memory controller
+ * @memcg: target memory control group
  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * For an object-based mapped page, find all the places it is mapped and
@@ -822,7 +822,7 @@ static int page_referenced_anon(struct page *page,
  * This function is only called from page_referenced for object-based pages.
  */
 static int page_referenced_file(struct page *page,
-				struct mem_cgroup *mem_cont,
+				struct mem_cgroup *memcg,
 				unsigned long *vm_flags)
 {
 	unsigned int mapcount;
@@ -864,7 +864,7 @@ static int page_referenced_file(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
+		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
 			continue;
 		referenced += page_referenced_one(page, vma, address,
 						  &mapcount, vm_flags);
@@ -880,7 +880,7 @@ static int page_referenced_file(struct page *page,
  * page_referenced - test if the page was referenced
  * @page: the page to test
  * @is_locked: caller holds lock on the page
- * @mem_cont: target memory controller
+ * @memcg: target memory cgroup
  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * Quick test_and_clear_referenced for all mappings to a page,
@@ -888,7 +888,7 @@ static int page_referenced_file(struct page *page,
  */
 int page_referenced(struct page *page,
 		    int is_locked,
-		    struct mem_cgroup *mem_cont,
+		    struct mem_cgroup *memcg,
 		    unsigned long *vm_flags)
 {
 	int referenced = 0;
@@ -904,13 +904,13 @@ int page_referenced(struct page *page,
 			}
 		}
 		if (unlikely(PageKsm(page)))
-			referenced += page_referenced_ksm(page, mem_cont,
+			referenced += page_referenced_ksm(page, memcg,
 								vm_flags);
 		else if (PageAnon(page))
-			referenced += page_referenced_anon(page, mem_cont,
+			referenced += page_referenced_anon(page, memcg,
 								vm_flags);
 		else if (page->mapping)
-			referenced += page_referenced_file(page, mem_cont,
+			referenced += page_referenced_file(page, memcg,
 								vm_flags);
 		if (we_locked)
 			unlock_page(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 9520592d4231..d999f090dfda 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -847,12 +847,13 @@ unsigned int count_swap_pages(int type, int free)
 static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, swp_entry_t entry, struct page *page)
 {
-	struct mem_cgroup *ptr;
+	struct mem_cgroup *memcg;
 	spinlock_t *ptl;
 	pte_t *pte;
 	int ret = 1;
 
-	if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {
+	if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
+					 GFP_KERNEL, &memcg)) {
 		ret = -ENOMEM;
 		goto out_nolock;
 	}
@@ -860,7 +861,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
 		if (ret > 0)
-			mem_cgroup_cancel_charge_swapin(ptr);
+			mem_cgroup_cancel_charge_swapin(memcg);
 		ret = 0;
 		goto out;
 	}
@@ -871,7 +872,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	set_pte_at(vma->vm_mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, addr);
-	mem_cgroup_commit_charge_swapin(page, ptr);
+	mem_cgroup_commit_charge_swapin(page, memcg);
 	swap_free(entry);
 	/*
 	 * Move the page to the active list so it is not
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 813aae820a27..e16ca8384ef7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2376,7 +2376,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
-unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
 						gfp_t gfp_mask, bool noswap,
 						struct zone *zone,
 						unsigned long *nr_scanned)
@@ -2388,10 +2388,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 		.may_unmap = 1,
 		.may_swap = !noswap,
 		.order = 0,
-		.target_mem_cgroup = mem,
+		.target_mem_cgroup = memcg,
 	};
 	struct mem_cgroup_zone mz = {
-		.mem_cgroup = mem,
+		.mem_cgroup = memcg,
 		.zone = zone,
 	};
 
@@ -2417,7 +2417,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 	return sc.nr_reclaimed;
 }
 
-unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
+unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 					   gfp_t gfp_mask,
 					   bool noswap)
 {
@@ -2430,7 +2430,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 		.may_swap = !noswap,
 		.nr_to_reclaim = SWAP_CLUSTER_MAX,
 		.order = 0,
-		.target_mem_cgroup = mem_cont,
+		.target_mem_cgroup = memcg,
 		.nodemask = NULL, /* we don't care the placement */
 		.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 				(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2444,7 +2444,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 	 * take care of from where we get pages. So the node where we start the
 	 * scan does not need to be the current node.
 	 */
-	nid = mem_cgroup_select_victim_node(mem_cont);
+	nid = mem_cgroup_select_victim_node(memcg);
 
 	zonelist = NODE_DATA(nid)->node_zonelists;
 
-- 
cgit v1.2.3


From 9fb4b7cc0724f178d4b24a2a566ea1e7cb120b82 Mon Sep 17 00:00:00 2001
From: Bob Liu <lliubbo@gmail.com>
Date: Thu, 12 Jan 2012 17:18:48 -0800
Subject: page_cgroup: add helper function to get swap_cgroup

There are multiple places which need to get the swap_cgroup address, so
add a helper function:

  static struct swap_cgroup *swap_cgroup_getsc(swp_entry_t ent,
                                struct swap_cgroup_ctrl **ctrl);

to simplify the code.

Signed-off-by: Bob Liu <lliubbo@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h |  4 ++--
 mm/memcontrol.c             |  4 ++--
 mm/page_cgroup.c            | 56 ++++++++++++++++-----------------------------
 3 files changed, 24 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index aaa60da8783c..1153095ee457 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -149,7 +149,7 @@ static inline void __init page_cgroup_init_flatmem(void)
 extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
 					unsigned short old, unsigned short new);
 extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
-extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
+extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
 extern void swap_cgroup_swapoff(int type);
 #else
@@ -161,7 +161,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 }
 
 static inline
-unsigned short lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
 {
 	return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 71a9774e6ead..4c53e971749e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2474,7 +2474,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 			memcg = NULL;
 	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
-		id = lookup_swap_cgroup(ent);
+		id = lookup_swap_cgroup_id(ent);
 		rcu_read_lock();
 		memcg = mem_cgroup_lookup(id);
 		if (memcg && !css_tryget(&memcg->css))
@@ -5264,7 +5264,7 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma,
 	}
 	/* There is a swap entry and a page doesn't exist or isn't charged */
 	if (ent.val && !ret &&
-			css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
+			css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) {
 		ret = MC_TARGET_SWAP;
 		if (target)
 			target->ent = ent;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index e910524e5a08..b99d19edf89b 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -334,7 +334,6 @@ struct swap_cgroup {
 	unsigned short		id;
 };
 #define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
-#define SC_POS_MASK	(SC_PER_PAGE - 1)
 
 /*
  * SwapCgroup implements "lookup" and "exchange" operations.
@@ -376,6 +375,21 @@ not_enough_page:
 	return -ENOMEM;
 }
 
+static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
+					struct swap_cgroup_ctrl **ctrlp)
+{
+	pgoff_t offset = swp_offset(ent);
+	struct swap_cgroup_ctrl *ctrl;
+	struct page *mappage;
+
+	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
+	if (ctrlp)
+		*ctrlp = ctrl;
+
+	mappage = ctrl->map[offset / SC_PER_PAGE];
+	return page_address(mappage) + offset % SC_PER_PAGE;
+}
+
 /**
  * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
  * @end: swap entry to be cmpxchged
@@ -388,21 +402,13 @@ not_enough_page:
 unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
 					unsigned short old, unsigned short new)
 {
-	int type = swp_type(ent);
-	unsigned long offset = swp_offset(ent);
-	unsigned long idx = offset / SC_PER_PAGE;
-	unsigned long pos = offset & SC_POS_MASK;
 	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
 	struct swap_cgroup *sc;
 	unsigned long flags;
 	unsigned short retval;
 
-	ctrl = &swap_cgroup_ctrl[type];
+	sc = lookup_swap_cgroup(ent, &ctrl);
 
-	mappage = ctrl->map[idx];
-	sc = page_address(mappage);
-	sc += pos;
 	spin_lock_irqsave(&ctrl->lock, flags);
 	retval = sc->id;
 	if (retval == old)
@@ -423,21 +429,13 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
  */
 unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 {
-	int type = swp_type(ent);
-	unsigned long offset = swp_offset(ent);
-	unsigned long idx = offset / SC_PER_PAGE;
-	unsigned long pos = offset & SC_POS_MASK;
 	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
 	struct swap_cgroup *sc;
 	unsigned short old;
 	unsigned long flags;
 
-	ctrl = &swap_cgroup_ctrl[type];
+	sc = lookup_swap_cgroup(ent, &ctrl);
 
-	mappage = ctrl->map[idx];
-	sc = page_address(mappage);
-	sc += pos;
 	spin_lock_irqsave(&ctrl->lock, flags);
 	old = sc->id;
 	sc->id = id;
@@ -447,28 +445,14 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 }
 
 /**
- * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
+ * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
  * @ent: swap entry to be looked up.
  *
  * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
  */
-unsigned short lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
 {
-	int type = swp_type(ent);
-	unsigned long offset = swp_offset(ent);
-	unsigned long idx = offset / SC_PER_PAGE;
-	unsigned long pos = offset & SC_POS_MASK;
-	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
-	struct swap_cgroup *sc;
-	unsigned short ret;
-
-	ctrl = &swap_cgroup_ctrl[type];
-	mappage = ctrl->map[idx];
-	sc = page_address(mappage);
-	sc += pos;
-	ret = sc->id;
-	return ret;
+	return lookup_swap_cgroup(ent, NULL)->id;
 }
 
 int swap_cgroup_swapon(int type, unsigned long max_pages)
-- 
cgit v1.2.3


From 4e5f01c2b9b94321992acb09c35d34f5ee5bb274 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 12 Jan 2012 17:18:58 -0800
Subject: memcg: clear pc->mem_cgroup if necessary.

This is a preparation before removing a flag PCG_ACCT_LRU in page_cgroup
and reducing atomic ops/complexity in memcg LRU handling.

In some cases, pages are added to lru before charge to memcg and pages
are not classfied to memory cgroup at lru addtion.  Now, the lru where
the page should be added is determined a bit in page_cgroup->flags and
pc->mem_cgroup.  I'd like to remove the check of flag.

To handle the case pc->mem_cgroup may contain stale pointers if pages
are added to LRU before classification.  This patch resets
pc->mem_cgroup to root_mem_cgroup before lru additions.

[akpm@linux-foundation.org: fix CONFIG_CGROUP_MEM_CONT=n build]
[hughd@google.com: fix CONFIG_CGROUP_MEM_RES_CTLR=y CONFIG_CGROUP_MEM_RES_CTLR_SWAP=n build]
[akpm@linux-foundation.org: ksm.c needs memcontrol.h, per Michal]
[hughd@google.com: stop oops in mem_cgroup_reset_owner()]
[hughd@google.com: fix page migration to reset_owner]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 +++++
 mm/ksm.c                   | 11 +++++++++++
 mm/memcontrol.c            | 17 +++++++++++++++++
 mm/migrate.c               |  2 ++
 mm/swap_state.c            | 10 ++++++++++
 5 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b80de520670b..4d34356fe644 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -129,6 +129,7 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 					struct page *newpage);
 
+extern void mem_cgroup_reset_owner(struct page *page);
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
 #endif
@@ -391,6 +392,10 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
 				struct page *newpage)
 {
 }
+
+static inline void mem_cgroup_reset_owner(struct page *page)
+{
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
diff --git a/mm/ksm.c b/mm/ksm.c
index 310544a379ae..1925ffbfb27f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -28,6 +28,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
+#include <linux/memcontrol.h>
 #include <linux/rbtree.h>
 #include <linux/memory.h>
 #include <linux/mmu_notifier.h>
@@ -1571,6 +1572,16 @@ struct page *ksm_does_need_to_copy(struct page *page,
 
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 	if (new_page) {
+		/*
+		 * The memcg-specific accounting when moving
+		 * pages around the LRU lists relies on the
+		 * page's owner (memcg) to be valid.  Usually,
+		 * pages are assigned to a new owner before
+		 * being put on the LRU list, but since this
+		 * is not the case here, the stale owner from
+		 * a previous allocation cycle must be reset.
+		 */
+		mem_cgroup_reset_owner(new_page);
 		copy_user_highpage(new_page, page, address, vma);
 
 		SetPageDirty(new_page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d58bb5fa4403..c74102d6eb5a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3050,6 +3050,23 @@ void mem_cgroup_uncharge_end(void)
 	batch->memcg = NULL;
 }
 
+/*
+ * A function for resetting pc->mem_cgroup for newly allocated pages.
+ * This function should be called if the newpage will be added to LRU
+ * before start accounting.
+ */
+void mem_cgroup_reset_owner(struct page *newpage)
+{
+	struct page_cgroup *pc;
+
+	if (mem_cgroup_disabled())
+		return;
+
+	pc = lookup_page_cgroup(newpage);
+	VM_BUG_ON(PageCgroupUsed(pc));
+	pc->mem_cgroup = root_mem_cgroup;
+}
+
 #ifdef CONFIG_SWAP
 /*
  * called after __delete_from_swap_cache() and drop "page" account.
diff --git a/mm/migrate.c b/mm/migrate.c
index 89ea0854332e..fc391985899f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -777,6 +777,8 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	if (!newpage)
 		return -ENOMEM;
 
+	mem_cgroup_reset_owner(newpage);
+
 	if (page_count(page) == 1) {
 		/* page was freed from under us. So we are done. */
 		goto out;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ea6b32d61873..470038a91873 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -300,6 +300,16 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			new_page = alloc_page_vma(gfp_mask, vma, addr);
 			if (!new_page)
 				break;		/* Out of memory */
+			/*
+			 * The memcg-specific accounting when moving
+			 * pages around the LRU lists relies on the
+			 * page's owner (memcg) to be valid.  Usually,
+			 * pages are assigned to a new owner before
+			 * being put on the LRU list, but since this
+			 * is not the case here, the stale owner from
+			 * a previous allocation cycle must be reset.
+			 */
+			mem_cgroup_reset_owner(new_page);
 		}
 
 		/*
-- 
cgit v1.2.3


From 38c5d72f3ebe5ddd57d2f08dc035070fc6c9a287 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 12 Jan 2012 17:19:01 -0800
Subject: memcg: simplify LRU handling by new rule

Now, at LRU handling, memory cgroup needs to do complicated works to see
valid pc->mem_cgroup, which may be overwritten.

This patch is for relaxing the protocol. This patch guarantees
   - when pc->mem_cgroup is overwritten, page must not be on LRU.

By this, LRU routine can believe pc->mem_cgroup and don't need to check
bits on pc->flags.  This new rule may adds small overheads to swapin.  But
in most case, lru handling gets faster.

After this patch, PCG_ACCT_LRU bit is obsolete and removed.

[akpm@linux-foundation.org: remove unneeded VM_BUG_ON(), restore hannes's christmas tree]
[akpm@linux-foundation.org: clean up code comment]
[hughd@google.com: fix NULL mem_cgroup_try_charge]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h |   8 ---
 mm/memcontrol.c             | 123 +++++++++++++++++++-------------------------
 2 files changed, 54 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 1153095ee457..a2d11771c84b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -10,8 +10,6 @@ enum {
 	/* flags for mem_cgroup and file and I/O status */
 	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
 	PCG_FILE_MAPPED, /* page is accounted as "mapped" */
-	/* No lock in page_cgroup */
-	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
 	__NR_PCG_FLAGS,
 };
 
@@ -75,12 +73,6 @@ TESTPCGFLAG(Used, USED)
 CLEARPCGFLAG(Used, USED)
 SETPCGFLAG(Used, USED)
 
-SETPCGFLAG(AcctLRU, ACCT_LRU)
-CLEARPCGFLAG(AcctLRU, ACCT_LRU)
-TESTPCGFLAG(AcctLRU, ACCT_LRU)
-TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
-
-
 SETPCGFLAG(FileMapped, FILE_MAPPED)
 CLEARPCGFLAG(FileMapped, FILE_MAPPED)
 TESTPCGFLAG(FileMapped, FILE_MAPPED)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c74102d6eb5a..ff051ee8fb4b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1040,30 +1040,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
 		return &zone->lruvec;
 
 	pc = lookup_page_cgroup(page);
-	VM_BUG_ON(PageCgroupAcctLRU(pc));
-	/*
-	 * putback:				charge:
-	 * SetPageLRU				SetPageCgroupUsed
-	 * smp_mb				smp_mb
-	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
-	 *
-	 * Ensure that one of the two sides adds the page to the memcg
-	 * LRU during a race.
-	 */
-	smp_mb();
-	/*
-	 * If the page is uncharged, it may be freed soon, but it
-	 * could also be swap cache (readahead, swapoff) that needs to
-	 * be reclaimable in the future.  root_mem_cgroup will babysit
-	 * it for the time being.
-	 */
-	if (PageCgroupUsed(pc)) {
-		/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-		smp_rmb();
-		memcg = pc->mem_cgroup;
-		SetPageCgroupAcctLRU(pc);
-	} else
-		memcg = root_mem_cgroup;
+	memcg = pc->mem_cgroup;
 	mz = page_cgroup_zoneinfo(memcg, page);
 	/* compound_order() is stabilized through lru_lock */
 	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
@@ -1090,18 +1067,8 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 		return;
 
 	pc = lookup_page_cgroup(page);
-	/*
-	 * root_mem_cgroup babysits uncharged LRU pages, but
-	 * PageCgroupUsed is cleared when the page is about to get
-	 * freed.  PageCgroupAcctLRU remembers whether the
-	 * LRU-accounting happened against pc->mem_cgroup or
-	 * root_mem_cgroup.
-	 */
-	if (TestClearPageCgroupAcctLRU(pc)) {
-		VM_BUG_ON(!pc->mem_cgroup);
-		memcg = pc->mem_cgroup;
-	} else
-		memcg = root_mem_cgroup;
+	memcg = pc->mem_cgroup;
+	VM_BUG_ON(!memcg);
 	mz = page_cgroup_zoneinfo(memcg, page);
 	/* huge page split is done under lru_lock. so, we have no races. */
 	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
@@ -2217,8 +2184,25 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 }
 
 /*
- * Unlike exported interface, "oom" parameter is added. if oom==true,
- * oom-killer can be invoked.
+ * __mem_cgroup_try_charge() does
+ * 1. detect memcg to be charged against from passed *mm and *ptr,
+ * 2. update res_counter
+ * 3. call memory reclaim if necessary.
+ *
+ * In some special case, if the task is fatal, fatal_signal_pending() or
+ * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
+ * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
+ * as possible without any hazards. 2: all pages should have a valid
+ * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
+ * pointer, that is treated as a charge to root_mem_cgroup.
+ *
+ * So __mem_cgroup_try_charge() will return
+ *  0       ...  on success, filling *ptr with a valid memcg pointer.
+ *  -ENOMEM ...  charge failure because of resource limits.
+ *  -EINTR  ...  if thread is fatal. *ptr is filled with root_mem_cgroup.
+ *
+ * Unlike the exported interface, an "oom" parameter is added. if oom==true,
+ * the oom-killer can be invoked.
  */
 static int __mem_cgroup_try_charge(struct mm_struct *mm,
 				   gfp_t gfp_mask,
@@ -2247,7 +2231,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
 	if (!*ptr && !mm)
-		goto bypass;
+		*ptr = root_mem_cgroup;
 again:
 	if (*ptr) { /* css should be a valid one */
 		memcg = *ptr;
@@ -2273,7 +2257,9 @@ again:
 		 * task-struct. So, mm->owner can be NULL.
 		 */
 		memcg = mem_cgroup_from_task(p);
-		if (!memcg || mem_cgroup_is_root(memcg)) {
+		if (!memcg)
+			memcg = root_mem_cgroup;
+		if (mem_cgroup_is_root(memcg)) {
 			rcu_read_unlock();
 			goto done;
 		}
@@ -2348,8 +2334,8 @@ nomem:
 	*ptr = NULL;
 	return -ENOMEM;
 bypass:
-	*ptr = NULL;
-	return 0;
+	*ptr = root_mem_cgroup;
+	return -EINTR;
 }
 
 /*
@@ -2457,6 +2443,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 
 	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
 	unlock_page_cgroup(pc);
+	WARN_ON_ONCE(PageLRU(page));
 	/*
 	 * "charge_statistics" updated event counter. Then, check it.
 	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2468,7 +2455,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
-			(1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
+			(1 << PCG_MIGRATION))
 /*
  * Because tail pages are not marked as "used", set it. We're under
  * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -2478,7 +2465,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 void mem_cgroup_split_huge_fixup(struct page *head)
 {
 	struct page_cgroup *head_pc = lookup_page_cgroup(head);
+	struct mem_cgroup_per_zone *mz;
 	struct page_cgroup *pc;
+	enum lru_list lru;
 	int i;
 
 	if (mem_cgroup_disabled())
@@ -2487,23 +2476,15 @@ void mem_cgroup_split_huge_fixup(struct page *head)
 		pc = head_pc + i;
 		pc->mem_cgroup = head_pc->mem_cgroup;
 		smp_wmb();/* see __commit_charge() */
-		/*
-		 * LRU flags cannot be copied because we need to add tail
-		 * page to LRU by generic call and our hooks will be called.
-		 */
 		pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
 	}
-
-	if (PageCgroupAcctLRU(head_pc)) {
-		enum lru_list lru;
-		struct mem_cgroup_per_zone *mz;
-		/*
-		 * We hold lru_lock, then, reduce counter directly.
-		 */
-		lru = page_lru(head);
-		mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
-		MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
-	}
+	/*
+	 * Tail pages will be added to LRU.
+	 * We hold lru_lock,then,reduce counter directly.
+	 */
+	lru = page_lru(head);
+	mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
+	MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
 }
 #endif
 
@@ -2620,7 +2601,7 @@ static int mem_cgroup_move_parent(struct page *page,
 
 	parent = mem_cgroup_from_cont(pcg);
 	ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false);
-	if (ret || !parent)
+	if (ret)
 		goto put_back;
 
 	if (nr_pages > 1)
@@ -2667,9 +2648,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 
 	pc = lookup_page_cgroup(page);
 	ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
-	if (ret || !memcg)
+	if (ret == -ENOMEM)
 		return ret;
-
 	__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
 	return 0;
 }
@@ -2736,10 +2716,9 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 	if (!page_is_file_cache(page))
 		type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
 
-	if (!PageSwapCache(page)) {
+	if (!PageSwapCache(page))
 		ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
-		WARN_ON_ONCE(PageLRU(page));
-	} else { /* page is swapcache/shmem */
+	else { /* page is swapcache/shmem */
 		ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
 		if (!ret)
 			__mem_cgroup_commit_charge_swapin(page, memcg, type);
@@ -2781,11 +2760,16 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 	*memcgp = memcg;
 	ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
 	css_put(&memcg->css);
+	if (ret == -EINTR)
+		ret = 0;
 	return ret;
 charge_cur_mm:
 	if (unlikely(!mm))
 		mm = &init_mm;
-	return __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
+	ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
+	if (ret == -EINTR)
+		ret = 0;
+	return ret;
 }
 
 static void
@@ -3245,7 +3229,7 @@ int mem_cgroup_prepare_migration(struct page *page,
 	*memcgp = memcg;
 	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
 	css_put(&memcg->css);/* drop extra refcnt */
-	if (ret || *memcgp == NULL) {
+	if (ret) {
 		if (PageAnon(page)) {
 			lock_page_cgroup(pc);
 			ClearPageCgroupMigration(pc);
@@ -3255,6 +3239,7 @@ int mem_cgroup_prepare_migration(struct page *page,
 			 */
 			mem_cgroup_uncharge_page(page);
 		}
+		/* we'll need to revisit this error code (we have -EINTR) */
 		return -ENOMEM;
 	}
 	/*
@@ -3674,7 +3659,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 		pc = lookup_page_cgroup(page);
 
 		ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
-		if (ret == -ENOMEM)
+		if (ret == -ENOMEM || ret == -EINTR)
 			break;
 
 		if (ret == -EBUSY || ret == -EINVAL) {
@@ -5065,9 +5050,9 @@ one_by_one:
 		}
 		ret = __mem_cgroup_try_charge(NULL,
 					GFP_KERNEL, 1, &memcg, false);
-		if (ret || !memcg)
+		if (ret)
 			/* mem_cgroup_clear_mc() will do uncharge later */
-			return -ENOMEM;
+			return ret;
 		mc.precharge++;
 	}
 	return ret;
-- 
cgit v1.2.3


From f21760b15dcd091e5afd38d0b97197b45f7ef2ea Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 12 Jan 2012 17:19:16 -0800
Subject: thp: add tlb_remove_pmd_tlb_entry

We have tlb_remove_tlb_entry to indicate a pte tlb flush entry should be
flushed, but not a corresponding API for pmd entry.  This isn't a
problem so far because THP is only for x86 currently and tlb_flush()
under x86 will flush entire TLB.  But this is confusion and could be
missed if thp is ported to other arch.

Also convert tlb->need_flush = 1 to a VM_BUG_ON(!tlb->need_flush) in
__tlb_remove_page() as suggested by Andrea Arcangeli.  The
__tlb_remove_page() function is supposed to be called after
tlb_remove_xxx_tlb_entry() and we can catch any misuse.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/tlb.h | 14 ++++++++++++++
 include/linux/huge_mm.h   |  2 +-
 mm/huge_memory.c          |  3 ++-
 mm/memory.c               |  4 ++--
 4 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index e58fa777fa09..f96a5b58a975 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -139,6 +139,20 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
+/**
+ * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
+ * This is a nop so far, because only x86 needs it.
+ */
+#ifndef __tlb_remove_pmd_tlb_entry
+#define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
+#endif
+
+#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)		\
+	do {							\
+		tlb->need_flush = 1;				\
+		__tlb_remove_pmd_tlb_entry(tlb, pmdp, address);	\
+	} while (0)
+
 #define pte_free_tlb(tlb, ptep, address)			\
 	do {							\
 		tlb->need_flush = 1;				\
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a9ace9c32507..1b921299abc4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -18,7 +18,7 @@ extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
 					  unsigned int flags);
 extern int zap_huge_pmd(struct mmu_gather *tlb,
 			struct vm_area_struct *vma,
-			pmd_t *pmd);
+			pmd_t *pmd, unsigned long addr);
 extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, unsigned long end,
 			unsigned char *vec);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 964fc5a2edd2..5a595554bd8c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1026,7 +1026,7 @@ out:
 }
 
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
-		 pmd_t *pmd)
+		 pmd_t *pmd, unsigned long addr)
 {
 	int ret = 0;
 
@@ -1042,6 +1042,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			pgtable = get_pmd_huge_pte(tlb->mm);
 			page = pmd_page(*pmd);
 			pmd_clear(pmd);
+			tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
 			page_remove_rmap(page);
 			VM_BUG_ON(page_mapcount(page) < 0);
 			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
diff --git a/mm/memory.c b/mm/memory.c
index 829d43735402..5e30583c2605 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -293,7 +293,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	struct mmu_gather_batch *batch;
 
-	tlb->need_flush = 1;
+	VM_BUG_ON(!tlb->need_flush);
 
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
@@ -1231,7 +1231,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 			if (next-addr != HPAGE_PMD_SIZE) {
 				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			} else if (zap_huge_pmd(tlb, vma, pmd))
+			} else if (zap_huge_pmd(tlb, vma, pmd, addr))
 				continue;
 			/* fall through */
 		}
-- 
cgit v1.2.3


From b969c4ab9f182a6e1b2a0848be349f99714947b0 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 12 Jan 2012 17:19:34 -0800
Subject: mm: compaction: determine if dirty pages can be migrated without
 blocking within ->migratepage

Asynchronous compaction is used when allocating transparent hugepages to
avoid blocking for long periods of time.  Due to reports of stalling,
there was a debate on disabling synchronous compaction but this severely
impacted allocation success rates.  Part of the reason was that many dirty
pages are skipped in asynchronous compaction by the following check;

	if (PageDirty(page) && !sync &&
		mapping->a_ops->migratepage != migrate_page)
			rc = -EBUSY;

This skips over all mapping aops using buffer_migrate_page() even though
it is possible to migrate some of these pages without blocking.  This
patch updates the ->migratepage callback with a "sync" parameter.  It is
the responsibility of the callback to fail gracefully if migration would
block.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/disk-io.c      |   4 +-
 fs/hugetlbfs/inode.c    |   3 +-
 fs/nfs/internal.h       |   2 +-
 fs/nfs/write.c          |   4 +-
 include/linux/fs.h      |   9 ++--
 include/linux/migrate.h |   2 +-
 mm/migrate.c            | 129 ++++++++++++++++++++++++++++++++++--------------
 7 files changed, 106 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f99a099a7747..1375494c8cb6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -872,7 +872,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
 #ifdef CONFIG_MIGRATION
 static int btree_migratepage(struct address_space *mapping,
-			struct page *newpage, struct page *page)
+			struct page *newpage, struct page *page, bool sync)
 {
 	/*
 	 * we can't safely write a btree page from here,
@@ -887,7 +887,7 @@ static int btree_migratepage(struct address_space *mapping,
 	if (page_has_private(page) &&
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
-	return migrate_page(mapping, newpage, page);
+	return migrate_page(mapping, newpage, page, sync);
 }
 #endif
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e425ad9d0490..06fd4608a990 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -583,7 +583,8 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 }
 
 static int hugetlbfs_migrate_page(struct address_space *mapping,
-				struct page *newpage, struct page *page)
+				struct page *newpage, struct page *page,
+				bool sync)
 {
 	int rc;
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5ee92538b063..114398a15830 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -332,7 +332,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
-		struct page *, struct page *);
+		struct page *, struct page *, bool);
 #else
 #define nfs_migrate_page NULL
 #endif
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0c3885255f97..889e98bc5a21 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1688,7 +1688,7 @@ out_error:
 
 #ifdef CONFIG_MIGRATION
 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
-		struct page *page)
+		struct page *page, bool sync)
 {
 	/*
 	 * If PagePrivate is set, then the page is currently associated with
@@ -1703,7 +1703,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 
 	nfs_fscache_release_page(page, GFP_KERNEL);
 
-	return migrate_page(mapping, newpage, page);
+	return migrate_page(mapping, newpage, page, sync);
 }
 #endif
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a7409bc157e0..b92b73d0b2b9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -609,9 +609,12 @@ struct address_space_operations {
 			loff_t offset, unsigned long nr_segs);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
 						void **, unsigned long *);
-	/* migrate the contents of a page to the specified target */
+	/*
+	 * migrate the contents of a page to the specified target. If sync
+	 * is false, it must not block.
+	 */
 	int (*migratepage) (struct address_space *,
-			struct page *, struct page *);
+			struct page *, struct page *, bool);
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
@@ -2537,7 +2540,7 @@ extern int generic_check_addressable(unsigned, u64);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-				struct page *, struct page *);
+				struct page *, struct page *, bool);
 #else
 #define buffer_migrate_page NULL
 #endif
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index e39aeecfe9a2..14e6d2a88475 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -11,7 +11,7 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
 extern void putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
-			struct page *, struct page *);
+			struct page *, struct page *, bool);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
 			bool sync);
diff --git a/mm/migrate.c b/mm/migrate.c
index fc391985899f..4e86f3bacb85 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -216,6 +216,55 @@ out:
 	pte_unmap_unlock(ptep, ptl);
 }
 
+#ifdef CONFIG_BLOCK
+/* Returns true if all buffers are successfully locked */
+static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
+{
+	struct buffer_head *bh = head;
+
+	/* Simple case, sync compaction */
+	if (sync) {
+		do {
+			get_bh(bh);
+			lock_buffer(bh);
+			bh = bh->b_this_page;
+
+		} while (bh != head);
+
+		return true;
+	}
+
+	/* async case, we cannot block on lock_buffer so use trylock_buffer */
+	do {
+		get_bh(bh);
+		if (!trylock_buffer(bh)) {
+			/*
+			 * We failed to lock the buffer and cannot stall in
+			 * async migration. Release the taken locks
+			 */
+			struct buffer_head *failed_bh = bh;
+			put_bh(failed_bh);
+			bh = head;
+			while (bh != failed_bh) {
+				unlock_buffer(bh);
+				put_bh(bh);
+				bh = bh->b_this_page;
+			}
+			return false;
+		}
+
+		bh = bh->b_this_page;
+	} while (bh != head);
+	return true;
+}
+#else
+static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
+								bool sync)
+{
+	return true;
+}
+#endif /* CONFIG_BLOCK */
+
 /*
  * Replace the page in the mapping.
  *
@@ -225,7 +274,8 @@ out:
  * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
  */
 static int migrate_page_move_mapping(struct address_space *mapping,
-		struct page *newpage, struct page *page)
+		struct page *newpage, struct page *page,
+		struct buffer_head *head, bool sync)
 {
 	int expected_count;
 	void **pslot;
@@ -254,6 +304,19 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 		return -EAGAIN;
 	}
 
+	/*
+	 * In the async migration case of moving a page with buffers, lock the
+	 * buffers using trylock before the mapping is moved. If the mapping
+	 * was moved, we later failed to lock the buffers and could not move
+	 * the mapping back due to an elevated page count, we would have to
+	 * block waiting on other references to be dropped.
+	 */
+	if (!sync && head && !buffer_migrate_lock_buffers(head, sync)) {
+		page_unfreeze_refs(page, expected_count);
+		spin_unlock_irq(&mapping->tree_lock);
+		return -EAGAIN;
+	}
+
 	/*
 	 * Now we know that no one else is looking at the page.
 	 */
@@ -409,13 +472,13 @@ EXPORT_SYMBOL(fail_migrate_page);
  * Pages are locked upon entry and exit.
  */
 int migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page)
+		struct page *newpage, struct page *page, bool sync)
 {
 	int rc;
 
 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
 
-	rc = migrate_page_move_mapping(mapping, newpage, page);
+	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, sync);
 
 	if (rc)
 		return rc;
@@ -432,28 +495,28 @@ EXPORT_SYMBOL(migrate_page);
  * exist.
  */
 int buffer_migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page)
+		struct page *newpage, struct page *page, bool sync)
 {
 	struct buffer_head *bh, *head;
 	int rc;
 
 	if (!page_has_buffers(page))
-		return migrate_page(mapping, newpage, page);
+		return migrate_page(mapping, newpage, page, sync);
 
 	head = page_buffers(page);
 
-	rc = migrate_page_move_mapping(mapping, newpage, page);
+	rc = migrate_page_move_mapping(mapping, newpage, page, head, sync);
 
 	if (rc)
 		return rc;
 
-	bh = head;
-	do {
-		get_bh(bh);
-		lock_buffer(bh);
-		bh = bh->b_this_page;
-
-	} while (bh != head);
+	/*
+	 * In the async case, migrate_page_move_mapping locked the buffers
+	 * with an IRQ-safe spinlock held. In the sync case, the buffers
+	 * need to be locked now
+	 */
+	if (sync)
+		BUG_ON(!buffer_migrate_lock_buffers(head, sync));
 
 	ClearPagePrivate(page);
 	set_page_private(newpage, page_private(page));
@@ -530,10 +593,13 @@ static int writeout(struct address_space *mapping, struct page *page)
  * Default handling if a filesystem does not provide a migration function.
  */
 static int fallback_migrate_page(struct address_space *mapping,
-	struct page *newpage, struct page *page)
+	struct page *newpage, struct page *page, bool sync)
 {
-	if (PageDirty(page))
+	if (PageDirty(page)) {
+		if (!sync)
+			return -EBUSY;
 		return writeout(mapping, page);
+	}
 
 	/*
 	 * Buffers may be managed in a filesystem specific way.
@@ -543,7 +609,7 @@ static int fallback_migrate_page(struct address_space *mapping,
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
 
-	return migrate_page(mapping, newpage, page);
+	return migrate_page(mapping, newpage, page, sync);
 }
 
 /*
@@ -579,29 +645,18 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 
 	mapping = page_mapping(page);
 	if (!mapping)
-		rc = migrate_page(mapping, newpage, page);
-	else {
+		rc = migrate_page(mapping, newpage, page, sync);
+	else if (mapping->a_ops->migratepage)
 		/*
-		 * Do not writeback pages if !sync and migratepage is
-		 * not pointing to migrate_page() which is nonblocking
-		 * (swapcache/tmpfs uses migratepage = migrate_page).
+		 * Most pages have a mapping and most filesystems provide a
+		 * migratepage callback. Anonymous pages are part of swap
+		 * space which also has its own migratepage callback. This
+		 * is the most common path for page migration.
 		 */
-		if (PageDirty(page) && !sync &&
-		    mapping->a_ops->migratepage != migrate_page)
-			rc = -EBUSY;
-		else if (mapping->a_ops->migratepage)
-			/*
-			 * Most pages have a mapping and most filesystems
-			 * should provide a migration function. Anonymous
-			 * pages are part of swap space which also has its
-			 * own migration function. This is the most common
-			 * path for page migration.
-			 */
-			rc = mapping->a_ops->migratepage(mapping,
-							newpage, page);
-		else
-			rc = fallback_migrate_page(mapping, newpage, page);
-	}
+		rc = mapping->a_ops->migratepage(mapping,
+						newpage, page, sync);
+	else
+		rc = fallback_migrate_page(mapping, newpage, page, sync);
 
 	if (rc) {
 		newpage->mapping = NULL;
-- 
cgit v1.2.3


From c82449352854ff09e43062246af86bdeb628f0c3 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 12 Jan 2012 17:19:38 -0800
Subject: mm: compaction: make isolate_lru_page() filter-aware again

Commit 39deaf85 ("mm: compaction: make isolate_lru_page() filter-aware")
noted that compaction does not migrate dirty or writeback pages and that
is was meaningless to pick the page and re-add it to the LRU list.  This
had to be partially reverted because some dirty pages can be migrated by
compaction without blocking.

This patch updates "mm: compaction: make isolate_lru_page" by skipping
over pages that migration has no possibility of migrating to minimise LRU
disruption.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel<riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  2 ++
 mm/compaction.c        |  3 +++
 mm/vmscan.c            | 35 +++++++++++++++++++++++++++++++++--
 3 files changed, 38 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 42e544cd4c9f..2038b90ca6e3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -177,6 +177,8 @@ struct lruvec {
 #define ISOLATE_CLEAN		((__force isolate_mode_t)0x4)
 /* Isolate unmapped file */
 #define ISOLATE_UNMAPPED	((__force isolate_mode_t)0x8)
+/* Isolate for asynchronous migration */
+#define ISOLATE_ASYNC_MIGRATE	((__force isolate_mode_t)0x10)
 
 /* LRU Isolation modes. */
 typedef unsigned __bitwise__ isolate_mode_t;
diff --git a/mm/compaction.c b/mm/compaction.c
index d31e64becb38..fb291585e1bf 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -349,6 +349,9 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 			continue;
 		}
 
+		if (!cc->sync)
+			mode |= ISOLATE_ASYNC_MIGRATE;
+
 		/* Try isolate the page */
 		if (__isolate_lru_page(page, mode, 0) != 0)
 			continue;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cb68c53db4ec..efbcab1c8f54 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1075,8 +1075,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
 
 	ret = -EBUSY;
 
-	if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page)))
-		return ret;
+	/*
+	 * To minimise LRU disruption, the caller can indicate that it only
+	 * wants to isolate pages it will be able to operate on without
+	 * blocking - clean pages for the most part.
+	 *
+	 * ISOLATE_CLEAN means that only clean pages should be isolated. This
+	 * is used by reclaim when it is cannot write to backing storage
+	 *
+	 * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+	 * that it is possible to migrate without blocking
+	 */
+	if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+		/* All the caller can do on PageWriteback is block */
+		if (PageWriteback(page))
+			return ret;
+
+		if (PageDirty(page)) {
+			struct address_space *mapping;
+
+			/* ISOLATE_CLEAN means only clean pages */
+			if (mode & ISOLATE_CLEAN)
+				return ret;
+
+			/*
+			 * Only pages without mappings or that have a
+			 * ->migratepage callback are possible to migrate
+			 * without blocking
+			 */
+			mapping = page_mapping(page);
+			if (mapping && !mapping->a_ops->migratepage)
+				return ret;
+		}
+	}
 
 	if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
 		return ret;
-- 
cgit v1.2.3


From a6bc32b899223a877f595ef9ddc1e89ead5072b8 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 12 Jan 2012 17:19:43 -0800
Subject: mm: compaction: introduce sync-light migration for use by compaction

This patch adds a lightweight sync migrate operation MIGRATE_SYNC_LIGHT
mode that avoids writing back pages to backing storage.  Async compaction
maps to MIGRATE_ASYNC while sync compaction maps to MIGRATE_SYNC_LIGHT.
For other migrate_pages users such as memory hotplug, MIGRATE_SYNC is
used.

This avoids sync compaction stalling for an excessive length of time,
particularly when copying files to a USB stick where there might be a
large number of dirty pages backed by a filesystem that does not support
->writepages.

[aarcange@redhat.com: This patch is heavily based on Andrea's work]
[akpm@linux-foundation.org: fix fs/nfs/write.c build]
[akpm@linux-foundation.org: fix fs/btrfs/disk-io.c build]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/disk-io.c      |  5 ++--
 fs/hugetlbfs/inode.c    |  2 +-
 fs/nfs/internal.h       |  2 +-
 fs/nfs/write.c          |  4 +--
 include/linux/fs.h      |  6 ++--
 include/linux/migrate.h | 23 +++++++++++----
 mm/compaction.c         |  2 +-
 mm/memory-failure.c     |  2 +-
 mm/memory_hotplug.c     |  2 +-
 mm/mempolicy.c          |  2 +-
 mm/migrate.c            | 78 +++++++++++++++++++++++++++----------------------
 11 files changed, 76 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1375494c8cb6..d8525662ca7a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -872,7 +872,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
 #ifdef CONFIG_MIGRATION
 static int btree_migratepage(struct address_space *mapping,
-			struct page *newpage, struct page *page, bool sync)
+			struct page *newpage, struct page *page,
+			enum migrate_mode mode)
 {
 	/*
 	 * we can't safely write a btree page from here,
@@ -887,7 +888,7 @@ static int btree_migratepage(struct address_space *mapping,
 	if (page_has_private(page) &&
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
-	return migrate_page(mapping, newpage, page, sync);
+	return migrate_page(mapping, newpage, page, mode);
 }
 #endif
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 06fd4608a990..1e85a7ac0217 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -584,7 +584,7 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 
 static int hugetlbfs_migrate_page(struct address_space *mapping,
 				struct page *newpage, struct page *page,
-				bool sync)
+				enum migrate_mode mode)
 {
 	int rc;
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 114398a15830..8102db9b926c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -332,7 +332,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
-		struct page *, struct page *, bool);
+		struct page *, struct page *, enum migrate_mode);
 #else
 #define nfs_migrate_page NULL
 #endif
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 889e98bc5a21..834f0fe96f89 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1688,7 +1688,7 @@ out_error:
 
 #ifdef CONFIG_MIGRATION
 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
-		struct page *page, bool sync)
+		struct page *page, enum migrate_mode mode)
 {
 	/*
 	 * If PagePrivate is set, then the page is currently associated with
@@ -1703,7 +1703,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 
 	nfs_fscache_release_page(page, GFP_KERNEL);
 
-	return migrate_page(mapping, newpage, page, sync);
+	return migrate_page(mapping, newpage, page, mode);
 }
 #endif
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b92b73d0b2b9..e694bd4434a4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -525,6 +525,7 @@ enum positive_aop_returns {
 struct page;
 struct address_space;
 struct writeback_control;
+enum migrate_mode;
 
 struct iov_iter {
 	const struct iovec *iov;
@@ -614,7 +615,7 @@ struct address_space_operations {
 	 * is false, it must not block.
 	 */
 	int (*migratepage) (struct address_space *,
-			struct page *, struct page *, bool);
+			struct page *, struct page *, enum migrate_mode);
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
@@ -2540,7 +2541,8 @@ extern int generic_check_addressable(unsigned, u64);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-				struct page *, struct page *, bool);
+				struct page *, struct page *,
+				enum migrate_mode);
 #else
 #define buffer_migrate_page NULL
 #endif
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 14e6d2a88475..eaf867412f7a 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -6,18 +6,31 @@
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
+/*
+ * MIGRATE_ASYNC means never block
+ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
+ *	on most operations but not ->writepage as the potential stall time
+ *	is too significant
+ * MIGRATE_SYNC will block when migrating pages
+ */
+enum migrate_mode {
+	MIGRATE_ASYNC,
+	MIGRATE_SYNC_LIGHT,
+	MIGRATE_SYNC,
+};
+
 #ifdef CONFIG_MIGRATION
 #define PAGE_MIGRATION 1
 
 extern void putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
-			struct page *, struct page *, bool);
+			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			bool sync);
+			enum migrate_mode mode);
 extern int migrate_huge_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			bool sync);
+			enum migrate_mode mode);
 
 extern int fail_migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -36,10 +49,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		bool sync) { return -ENOSYS; }
+		enum migrate_mode mode) { return -ENOSYS; }
 static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		bool sync) { return -ENOSYS; }
+		enum migrate_mode mode) { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
diff --git a/mm/compaction.c b/mm/compaction.c
index fb291585e1bf..71a58f67f481 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -557,7 +557,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
 				(unsigned long)cc, false,
-				cc->sync);
+				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 06d3479513aa..56080ea36140 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1557,7 +1557,7 @@ int soft_offline_page(struct page *page, int flags)
 					    page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
 		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
-								0, true);
+							0, MIGRATE_SYNC);
 		if (ret) {
 			putback_lru_pages(&pagelist);
 			pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2168489c0bc9..6629fafd6ce4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -809,7 +809,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		}
 		/* this function returns # of failed pages */
 		ret = migrate_pages(&source, hotremove_migrate_alloc, 0,
-								true, true);
+							true, MIGRATE_SYNC);
 		if (ret)
 			putback_lru_pages(&source);
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e3d58f088466..06b145fb64ab 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -942,7 +942,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_node_page, dest,
-								false, true);
+							false, MIGRATE_SYNC);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index 4e86f3bacb85..9871a56d82c3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -218,12 +218,13 @@ out:
 
 #ifdef CONFIG_BLOCK
 /* Returns true if all buffers are successfully locked */
-static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
+static bool buffer_migrate_lock_buffers(struct buffer_head *head,
+							enum migrate_mode mode)
 {
 	struct buffer_head *bh = head;
 
 	/* Simple case, sync compaction */
-	if (sync) {
+	if (mode != MIGRATE_ASYNC) {
 		do {
 			get_bh(bh);
 			lock_buffer(bh);
@@ -259,7 +260,7 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
 }
 #else
 static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
-								bool sync)
+							enum migrate_mode mode)
 {
 	return true;
 }
@@ -275,7 +276,7 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
  */
 static int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page,
-		struct buffer_head *head, bool sync)
+		struct buffer_head *head, enum migrate_mode mode)
 {
 	int expected_count;
 	void **pslot;
@@ -311,7 +312,8 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	 * the mapping back due to an elevated page count, we would have to
 	 * block waiting on other references to be dropped.
 	 */
-	if (!sync && head && !buffer_migrate_lock_buffers(head, sync)) {
+	if (mode == MIGRATE_ASYNC && head &&
+			!buffer_migrate_lock_buffers(head, mode)) {
 		page_unfreeze_refs(page, expected_count);
 		spin_unlock_irq(&mapping->tree_lock);
 		return -EAGAIN;
@@ -472,13 +474,14 @@ EXPORT_SYMBOL(fail_migrate_page);
  * Pages are locked upon entry and exit.
  */
 int migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page, bool sync)
+		struct page *newpage, struct page *page,
+		enum migrate_mode mode)
 {
 	int rc;
 
 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
 
-	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, sync);
+	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
 
 	if (rc)
 		return rc;
@@ -495,17 +498,17 @@ EXPORT_SYMBOL(migrate_page);
  * exist.
  */
 int buffer_migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page, bool sync)
+		struct page *newpage, struct page *page, enum migrate_mode mode)
 {
 	struct buffer_head *bh, *head;
 	int rc;
 
 	if (!page_has_buffers(page))
-		return migrate_page(mapping, newpage, page, sync);
+		return migrate_page(mapping, newpage, page, mode);
 
 	head = page_buffers(page);
 
-	rc = migrate_page_move_mapping(mapping, newpage, page, head, sync);
+	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
 
 	if (rc)
 		return rc;
@@ -515,8 +518,8 @@ int buffer_migrate_page(struct address_space *mapping,
 	 * with an IRQ-safe spinlock held. In the sync case, the buffers
 	 * need to be locked now
 	 */
-	if (sync)
-		BUG_ON(!buffer_migrate_lock_buffers(head, sync));
+	if (mode != MIGRATE_ASYNC)
+		BUG_ON(!buffer_migrate_lock_buffers(head, mode));
 
 	ClearPagePrivate(page);
 	set_page_private(newpage, page_private(page));
@@ -593,10 +596,11 @@ static int writeout(struct address_space *mapping, struct page *page)
  * Default handling if a filesystem does not provide a migration function.
  */
 static int fallback_migrate_page(struct address_space *mapping,
-	struct page *newpage, struct page *page, bool sync)
+	struct page *newpage, struct page *page, enum migrate_mode mode)
 {
 	if (PageDirty(page)) {
-		if (!sync)
+		/* Only writeback pages in full synchronous migration */
+		if (mode != MIGRATE_SYNC)
 			return -EBUSY;
 		return writeout(mapping, page);
 	}
@@ -609,7 +613,7 @@ static int fallback_migrate_page(struct address_space *mapping,
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
 
-	return migrate_page(mapping, newpage, page, sync);
+	return migrate_page(mapping, newpage, page, mode);
 }
 
 /*
@@ -624,7 +628,7 @@ static int fallback_migrate_page(struct address_space *mapping,
  *  == 0 - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page,
-					int remap_swapcache, bool sync)
+				int remap_swapcache, enum migrate_mode mode)
 {
 	struct address_space *mapping;
 	int rc;
@@ -645,7 +649,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 
 	mapping = page_mapping(page);
 	if (!mapping)
-		rc = migrate_page(mapping, newpage, page, sync);
+		rc = migrate_page(mapping, newpage, page, mode);
 	else if (mapping->a_ops->migratepage)
 		/*
 		 * Most pages have a mapping and most filesystems provide a
@@ -654,9 +658,9 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 		 * is the most common path for page migration.
 		 */
 		rc = mapping->a_ops->migratepage(mapping,
-						newpage, page, sync);
+						newpage, page, mode);
 	else
-		rc = fallback_migrate_page(mapping, newpage, page, sync);
+		rc = fallback_migrate_page(mapping, newpage, page, mode);
 
 	if (rc) {
 		newpage->mapping = NULL;
@@ -671,7 +675,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 }
 
 static int __unmap_and_move(struct page *page, struct page *newpage,
-				int force, bool offlining, bool sync)
+			int force, bool offlining, enum migrate_mode mode)
 {
 	int rc = -EAGAIN;
 	int remap_swapcache = 1;
@@ -680,7 +684,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	struct anon_vma *anon_vma = NULL;
 
 	if (!trylock_page(page)) {
-		if (!force || !sync)
+		if (!force || mode == MIGRATE_ASYNC)
 			goto out;
 
 		/*
@@ -726,10 +730,12 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 
 	if (PageWriteback(page)) {
 		/*
-		 * For !sync, there is no point retrying as the retry loop
-		 * is expected to be too short for PageWriteback to be cleared
+		 * Only in the case of a full syncronous migration is it
+		 * necessary to wait for PageWriteback. In the async case,
+		 * the retry loop is too short and in the sync-light case,
+		 * the overhead of stalling is too much
 		 */
-		if (!sync) {
+		if (mode != MIGRATE_SYNC) {
 			rc = -EBUSY;
 			goto uncharge;
 		}
@@ -800,7 +806,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 
 skip_unmap:
 	if (!page_mapped(page))
-		rc = move_to_new_page(newpage, page, remap_swapcache, sync);
+		rc = move_to_new_page(newpage, page, remap_swapcache, mode);
 
 	if (rc && remap_swapcache)
 		remove_migration_ptes(page, page);
@@ -823,7 +829,8 @@ out:
  * to the newly allocated page in newpage.
  */
 static int unmap_and_move(new_page_t get_new_page, unsigned long private,
-			struct page *page, int force, bool offlining, bool sync)
+			struct page *page, int force, bool offlining,
+			enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -843,7 +850,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		if (unlikely(split_huge_page(page)))
 			goto out;
 
-	rc = __unmap_and_move(page, newpage, force, offlining, sync);
+	rc = __unmap_and_move(page, newpage, force, offlining, mode);
 out:
 	if (rc != -EAGAIN) {
 		/*
@@ -891,7 +898,8 @@ out:
  */
 static int unmap_and_move_huge_page(new_page_t get_new_page,
 				unsigned long private, struct page *hpage,
-				int force, bool offlining, bool sync)
+				int force, bool offlining,
+				enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -904,7 +912,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	rc = -EAGAIN;
 
 	if (!trylock_page(hpage)) {
-		if (!force || !sync)
+		if (!force || mode != MIGRATE_SYNC)
 			goto out;
 		lock_page(hpage);
 	}
@@ -915,7 +923,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
 	if (!page_mapped(hpage))
-		rc = move_to_new_page(new_hpage, hpage, 1, sync);
+		rc = move_to_new_page(new_hpage, hpage, 1, mode);
 
 	if (rc)
 		remove_migration_ptes(hpage, hpage);
@@ -958,7 +966,7 @@ out:
  */
 int migrate_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		bool sync)
+		enum migrate_mode mode)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -979,7 +987,7 @@ int migrate_pages(struct list_head *from,
 
 			rc = unmap_and_move(get_new_page, private,
 						page, pass > 2, offlining,
-						sync);
+						mode);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -1009,7 +1017,7 @@ out:
 
 int migrate_huge_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		bool sync)
+		enum migrate_mode mode)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -1026,7 +1034,7 @@ int migrate_huge_pages(struct list_head *from,
 
 			rc = unmap_and_move_huge_page(get_new_page,
 					private, page, pass > 2, offlining,
-					sync);
+					mode);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -1155,7 +1163,7 @@ set_status:
 	err = 0;
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_page_node,
-				(unsigned long)pm, 0, true);
+				(unsigned long)pm, 0, MIGRATE_SYNC);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
-- 
cgit v1.2.3


From 2bcf887963812c075f80a14e1fad8ec7e1c67acf Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 12 Jan 2012 17:19:56 -0800
Subject: mm: take pagevecs off reclaim stack

Replace pagevecs in putback_lru_pages() and move_active_pages_to_lru()
by lists of pages_to_free: then apply Konstantin Khlebnikov's
free_hot_cold_page_list() to them instead of pagevec_release().

Which simplifies the flow (no need to drop and retake lock whenever
pagevec fills up) and reduces stale addresses in stack backtraces
(which often showed through the pagevecs); but more importantly,
removes another 120 bytes from the deepest stacks in page reclaim.
Although I've not recently seen an actual stack overflow here with
a vanilla kernel, move_active_pages_to_lru() has often featured in
deep backtraces.

However, free_hot_cold_page_list() does not handle compound pages
(nor need it: a Transparent HugePage would have been split by the
time it reaches the call in shrink_page_list()), but it is possible
for putback_lru_pages() or move_active_pages_to_lru() to be left
holding the last reference on a THP, so must exclude the unlikely
compound case before putting on pages_to_free.

Remove pagevec_strip(), its work now done in move_active_pages_to_lru().
The pagevec in scan_mapping_unevictable_pages() remains in mm/vmscan.c,
but that is never on the reclaim path, and cannot be replaced by a list.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagevec.h |  2 --
 mm/swap.c               | 19 ----------------
 mm/vmscan.c             | 58 ++++++++++++++++++++++++++++++++++---------------
 3 files changed, 40 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index ed17024d2ebe..9def9121f8a2 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -22,7 +22,6 @@ struct pagevec {
 
 void __pagevec_release(struct pagevec *pvec);
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
-void pagevec_strip(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
@@ -59,7 +58,6 @@ static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page)
 	return pagevec_space(pvec);
 }
 
-
 static inline void pagevec_release(struct pagevec *pvec)
 {
 	if (pagevec_count(pvec))
diff --git a/mm/swap.c b/mm/swap.c
index db6defaf2e55..79c22a649a3e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -23,7 +23,6 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/mm_inline.h>
-#include <linux/buffer_head.h>	/* for try_to_release_page() */
 #include <linux/percpu_counter.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
@@ -730,24 +729,6 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 
 EXPORT_SYMBOL(____pagevec_lru_add);
 
-/*
- * Try to drop buffers from the pages in a pagevec
- */
-void pagevec_strip(struct pagevec *pvec)
-{
-	int i;
-
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-
-		if (page_has_private(page) && trylock_page(page)) {
-			if (page_has_private(page))
-				try_to_release_page(page, 0);
-			unlock_page(page);
-		}
-	}
-}
-
 /**
  * pagevec_lookup - gang pagecache lookup
  * @pvec:	Where the resulting pages are placed
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 25f90383b391..7724fb8e7498 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1398,12 +1398,10 @@ putback_lru_pages(struct mem_cgroup_zone *mz, struct scan_control *sc,
 		  struct list_head *page_list)
 {
 	struct page *page;
-	struct pagevec pvec;
+	LIST_HEAD(pages_to_free);
 	struct zone *zone = mz->zone;
 	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
 
-	pagevec_init(&pvec, 1);
-
 	/*
 	 * Put back any unfreeable pages.
 	 */
@@ -1427,17 +1425,24 @@ putback_lru_pages(struct mem_cgroup_zone *mz, struct scan_control *sc,
 			int numpages = hpage_nr_pages(page);
 			reclaim_stat->recent_rotated[file] += numpages;
 		}
-		if (!pagevec_add(&pvec, page)) {
-			spin_unlock_irq(&zone->lru_lock);
-			__pagevec_release(&pvec);
-			spin_lock_irq(&zone->lru_lock);
+		if (put_page_testzero(page)) {
+			__ClearPageLRU(page);
+			__ClearPageActive(page);
+			del_page_from_lru_list(zone, page, lru);
+
+			if (unlikely(PageCompound(page))) {
+				spin_unlock_irq(&zone->lru_lock);
+				(*get_compound_page_dtor(page))(page);
+				spin_lock_irq(&zone->lru_lock);
+			} else
+				list_add(&page->lru, &pages_to_free);
 		}
 	}
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
 	__mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
 
 	spin_unlock_irq(&zone->lru_lock);
-	pagevec_release(&pvec);
+	free_hot_cold_page_list(&pages_to_free, 1);
 }
 
 static noinline_for_stack void
@@ -1647,13 +1652,23 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
 
 static void move_active_pages_to_lru(struct zone *zone,
 				     struct list_head *list,
+				     struct list_head *pages_to_free,
 				     enum lru_list lru)
 {
 	unsigned long pgmoved = 0;
-	struct pagevec pvec;
 	struct page *page;
 
-	pagevec_init(&pvec, 1);
+	if (buffer_heads_over_limit) {
+		spin_unlock_irq(&zone->lru_lock);
+		list_for_each_entry(page, list, lru) {
+			if (page_has_private(page) && trylock_page(page)) {
+				if (page_has_private(page))
+					try_to_release_page(page, 0);
+				unlock_page(page);
+			}
+		}
+		spin_lock_irq(&zone->lru_lock);
+	}
 
 	while (!list_empty(list)) {
 		struct lruvec *lruvec;
@@ -1667,12 +1682,17 @@ static void move_active_pages_to_lru(struct zone *zone,
 		list_move(&page->lru, &lruvec->lists[lru]);
 		pgmoved += hpage_nr_pages(page);
 
-		if (!pagevec_add(&pvec, page) || list_empty(list)) {
-			spin_unlock_irq(&zone->lru_lock);
-			if (buffer_heads_over_limit)
-				pagevec_strip(&pvec);
-			__pagevec_release(&pvec);
-			spin_lock_irq(&zone->lru_lock);
+		if (put_page_testzero(page)) {
+			__ClearPageLRU(page);
+			__ClearPageActive(page);
+			del_page_from_lru_list(zone, page, lru);
+
+			if (unlikely(PageCompound(page))) {
+				spin_unlock_irq(&zone->lru_lock);
+				(*get_compound_page_dtor(page))(page);
+				spin_lock_irq(&zone->lru_lock);
+			} else
+				list_add(&page->lru, pages_to_free);
 		}
 	}
 	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1766,12 +1786,14 @@ static void shrink_active_list(unsigned long nr_pages,
 	 */
 	reclaim_stat->recent_rotated[file] += nr_rotated;
 
-	move_active_pages_to_lru(zone, &l_active,
+	move_active_pages_to_lru(zone, &l_active, &l_hold,
 						LRU_ACTIVE + file * LRU_FILE);
-	move_active_pages_to_lru(zone, &l_inactive,
+	move_active_pages_to_lru(zone, &l_inactive, &l_hold,
 						LRU_BASE   + file * LRU_FILE);
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
+
+	free_hot_cold_page_list(&l_hold, 1);
 }
 
 #ifdef CONFIG_SWAP
-- 
cgit v1.2.3


From 5095ae83759f035c823fb375c6ed2de99c81d5ec Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 12 Jan 2012 17:19:58 -0800
Subject: mm: fewer underscores in ____pagevec_lru_add

What's so special about ____pagevec_lru_add() that it needs four leading
underscores?  Nothing, it just helped to distinguish from
__pagevec_lru_add() in 2.6.28 development.  Cut two leading underscores.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagevec.h | 10 +++++-----
 mm/swap.c               | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 9def9121f8a2..2aa12b8499c0 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -21,7 +21,7 @@ struct pagevec {
 };
 
 void __pagevec_release(struct pagevec *pvec);
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
+void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
@@ -66,22 +66,22 @@ static inline void pagevec_release(struct pagevec *pvec)
 
 static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
+	__pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
+	__pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+	__pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
 }
 
 static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+	__pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
 }
 
 static inline void pagevec_lru_add_file(struct pagevec *pvec)
diff --git a/mm/swap.c b/mm/swap.c
index 79c22a649a3e..e1cd623d9b2b 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -378,7 +378,7 @@ void __lru_cache_add(struct page *page, enum lru_list lru)
 
 	page_cache_get(page);
 	if (!pagevec_add(pvec, page))
-		____pagevec_lru_add(pvec, lru);
+		__pagevec_lru_add(pvec, lru);
 	put_cpu_var(lru_add_pvecs);
 }
 EXPORT_SYMBOL(__lru_cache_add);
@@ -506,7 +506,7 @@ static void drain_cpu_pagevecs(int cpu)
 	for_each_lru(lru) {
 		pvec = &pvecs[lru - LRU_BASE];
 		if (pagevec_count(pvec))
-			____pagevec_lru_add(pvec, lru);
+			__pagevec_lru_add(pvec, lru);
 	}
 
 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
@@ -698,7 +698,7 @@ void lru_add_page_tail(struct zone* zone,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static void ____pagevec_lru_add_fn(struct page *page, void *arg)
+static void __pagevec_lru_add_fn(struct page *page, void *arg)
 {
 	enum lru_list lru = (enum lru_list)arg;
 	struct zone *zone = page_zone(page);
@@ -720,14 +720,14 @@ static void ____pagevec_lru_add_fn(struct page *page, void *arg)
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
+void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
 	VM_BUG_ON(is_unevictable_lru(lru));
 
-	pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
+	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, (void *)lru);
 }
 
-EXPORT_SYMBOL(____pagevec_lru_add);
+EXPORT_SYMBOL(__pagevec_lru_add);
 
 /**
  * pagevec_lookup - gang pagecache lookup
-- 
cgit v1.2.3


From 4111304dab198c687bc60f2e235a9f7ee92c47c8 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 12 Jan 2012 17:20:01 -0800
Subject: mm: enum lru_list lru

Mostly we use "enum lru_list lru": change those few "l"s to "lru"s.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 26 +++++++++++++-------------
 include/linux/mmzone.h    | 16 ++++++++--------
 mm/page_alloc.c           |  6 +++---
 mm/vmscan.c               | 22 +++++++++++-----------
 4 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 4e3478e71926..8f84d2e53d0f 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -22,21 +22,21 @@ static inline int page_is_file_cache(struct page *page)
 }
 
 static inline void
-add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
 {
 	struct lruvec *lruvec;
 
-	lruvec = mem_cgroup_lru_add_list(zone, page, l);
-	list_add(&page->lru, &lruvec->lists[l]);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
+	lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+	list_add(&page->lru, &lruvec->lists[lru]);
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, hpage_nr_pages(page));
 }
 
 static inline void
-del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
 {
-	mem_cgroup_lru_del_list(page, l);
+	mem_cgroup_lru_del_list(page, lru);
 	list_del(&page->lru);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page));
 }
 
 /**
@@ -57,21 +57,21 @@ static inline enum lru_list page_lru_base_type(struct page *page)
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
-	enum lru_list l;
+	enum lru_list lru;
 
 	if (PageUnevictable(page)) {
 		__ClearPageUnevictable(page);
-		l = LRU_UNEVICTABLE;
+		lru = LRU_UNEVICTABLE;
 	} else {
-		l = page_lru_base_type(page);
+		lru = page_lru_base_type(page);
 		if (PageActive(page)) {
 			__ClearPageActive(page);
-			l += LRU_ACTIVE;
+			lru += LRU_ACTIVE;
 		}
 	}
-	mem_cgroup_lru_del_list(page, l);
+	mem_cgroup_lru_del_list(page, lru);
 	list_del(&page->lru);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page));
 }
 
 /**
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2038b90ca6e3..650ba2fb3301 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -140,23 +140,23 @@ enum lru_list {
 	NR_LRU_LISTS
 };
 
-#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
+#define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++)
 
-#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
+#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++)
 
-static inline int is_file_lru(enum lru_list l)
+static inline int is_file_lru(enum lru_list lru)
 {
-	return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
+	return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE);
 }
 
-static inline int is_active_lru(enum lru_list l)
+static inline int is_active_lru(enum lru_list lru)
 {
-	return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
+	return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
 }
 
-static inline int is_unevictable_lru(enum lru_list l)
+static inline int is_unevictable_lru(enum lru_list lru)
 {
-	return (l == LRU_UNEVICTABLE);
+	return (lru == LRU_UNEVICTABLE);
 }
 
 struct lruvec {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cb5723c491f0..0027d8f4a1bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4262,7 +4262,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize, memmap_pages;
-		enum lru_list l;
+		enum lru_list lru;
 
 		size = zone_spanned_pages_in_node(nid, j, zones_size);
 		realsize = size - zone_absent_pages_in_node(nid, j,
@@ -4312,8 +4312,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone->zone_pgdat = pgdat;
 
 		zone_pcp_init(zone);
-		for_each_lru(l)
-			INIT_LIST_HEAD(&zone->lruvec.lists[l]);
+		for_each_lru(lru)
+			INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
 		zone->reclaim_stat.recent_scanned[0] = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7724fb8e7498..01466bf783fd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1920,7 +1920,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
 	unsigned long ap, fp;
 	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
 	u64 fraction[2], denominator;
-	enum lru_list l;
+	enum lru_list lru;
 	int noswap = 0;
 	bool force_scan = false;
 
@@ -2010,18 +2010,18 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
 	fraction[1] = fp;
 	denominator = ap + fp + 1;
 out:
-	for_each_evictable_lru(l) {
-		int file = is_file_lru(l);
+	for_each_evictable_lru(lru) {
+		int file = is_file_lru(lru);
 		unsigned long scan;
 
-		scan = zone_nr_lru_pages(mz, l);
+		scan = zone_nr_lru_pages(mz, lru);
 		if (priority || noswap) {
 			scan >>= priority;
 			if (!scan && force_scan)
 				scan = SWAP_CLUSTER_MAX;
 			scan = div64_u64(scan * fraction[file], denominator);
 		}
-		nr[l] = scan;
+		nr[lru] = scan;
 	}
 }
 
@@ -2097,7 +2097,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
 {
 	unsigned long nr[NR_LRU_LISTS];
 	unsigned long nr_to_scan;
-	enum lru_list l;
+	enum lru_list lru;
 	unsigned long nr_reclaimed, nr_scanned;
 	unsigned long nr_to_reclaim = sc->nr_to_reclaim;
 	struct blk_plug plug;
@@ -2110,13 +2110,13 @@ restart:
 	blk_start_plug(&plug);
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
 					nr[LRU_INACTIVE_FILE]) {
-		for_each_evictable_lru(l) {
-			if (nr[l]) {
+		for_each_evictable_lru(lru) {
+			if (nr[lru]) {
 				nr_to_scan = min_t(unsigned long,
-						   nr[l], SWAP_CLUSTER_MAX);
-				nr[l] -= nr_to_scan;
+						   nr[lru], SWAP_CLUSTER_MAX);
+				nr[lru] -= nr_to_scan;
 
-				nr_reclaimed += shrink_list(l, nr_to_scan,
+				nr_reclaimed += shrink_list(lru, nr_to_scan,
 							    mz, sc, priority);
 			}
 		}
-- 
cgit v1.2.3


From 1c1c53d43b387d02174911ecb42ce846577b0ea0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 12 Jan 2012 17:20:04 -0800
Subject: mm: remove del_page_from_lru, add page_off_lru

del_page_from_lru() repeats del_page_from_lru_list(), also working out
which LRU the page was on, clearing the relevant bits.  Decouple those
functions: remove del_page_from_lru() and add page_off_lru().

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 15 +++++++++------
 mm/swap.c                 |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 8f84d2e53d0f..227fd3e9a9c9 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -54,8 +54,14 @@ static inline enum lru_list page_lru_base_type(struct page *page)
 	return LRU_INACTIVE_ANON;
 }
 
-static inline void
-del_page_from_lru(struct zone *zone, struct page *page)
+/**
+ * page_off_lru - which LRU list was page on? clearing its lru flags.
+ * @page: the page to test
+ *
+ * Returns the LRU list a page was on, as an index into the array of LRU
+ * lists; and clears its Unevictable or Active flags, ready for freeing.
+ */
+static inline enum lru_list page_off_lru(struct page *page)
 {
 	enum lru_list lru;
 
@@ -69,9 +75,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
 			lru += LRU_ACTIVE;
 		}
 	}
-	mem_cgroup_lru_del_list(page, lru);
-	list_del(&page->lru);
-	__mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page));
+	return lru;
 }
 
 /**
@@ -92,7 +96,6 @@ static inline enum lru_list page_lru(struct page *page)
 		if (PageActive(page))
 			lru += LRU_ACTIVE;
 	}
-
 	return lru;
 }
 
diff --git a/mm/swap.c b/mm/swap.c
index 0d1b24b3fa87..b0f529b38979 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -53,7 +53,7 @@ static void __page_cache_release(struct page *page)
 		spin_lock_irqsave(&zone->lru_lock, flags);
 		VM_BUG_ON(!PageLRU(page));
 		__ClearPageLRU(page);
-		del_page_from_lru(zone, page);
+		del_page_from_lru_list(zone, page, page_off_lru(page));
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
 }
@@ -617,7 +617,7 @@ void release_pages(struct page **pages, int nr, int cold)
 			}
 			VM_BUG_ON(!PageLRU(page));
 			__ClearPageLRU(page);
-			del_page_from_lru(zone, page);
+			del_page_from_lru_list(zone, page, page_off_lru(page));
 		}
 
 		list_add(&page->lru, &pages_to_free);
-- 
cgit v1.2.3


From a3dd3323058d281abd584b15ad4c5b65064d7a61 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 12 Jan 2012 17:20:11 -0800
Subject: kexec: remove KMSG_DUMP_KEXEC

KMSG_DUMP_KEXEC is useless because we already save kernel messages inside
/proc/vmcore, and it is unsafe to allow modules to do other stuffs in a
crash dump scenario.

[akpm@linux-foundation.org: fix powerpc build]
Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Reported-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/pseries/nvram.c | 1 -
 drivers/char/ramoops.c                 | 3 +--
 drivers/mtd/mtdoops.c                  | 3 +--
 include/linux/kmsg_dump.h              | 1 -
 kernel/kexec.c                         | 3 ---
 5 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 330a57b7c17c..36f957f31842 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -638,7 +638,6 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
 		/* These are almost always orderly shutdowns. */
 		return;
 	case KMSG_DUMP_OOPS:
-	case KMSG_DUMP_KEXEC:
 		break;
 	case KMSG_DUMP_PANIC:
 		panicking = true;
diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
index 7c7f42a1f880..feda90cdac9f 100644
--- a/drivers/char/ramoops.c
+++ b/drivers/char/ramoops.c
@@ -83,8 +83,7 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
 	struct timeval timestamp;
 
 	if (reason != KMSG_DUMP_OOPS &&
-	    reason != KMSG_DUMP_PANIC &&
-	    reason != KMSG_DUMP_KEXEC)
+	    reason != KMSG_DUMP_PANIC)
 		return;
 
 	/* Only dump oopses if dump_oops is set */
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index db8e8272d69b..3ce99e00a49e 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -315,8 +315,7 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
 	char *dst;
 
 	if (reason != KMSG_DUMP_OOPS &&
-	    reason != KMSG_DUMP_PANIC &&
-	    reason != KMSG_DUMP_KEXEC)
+	    reason != KMSG_DUMP_PANIC)
 		return;
 
 	/* Only dump oopses if dump_oops is set */
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index ee0c952188de..fee66317e071 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -18,7 +18,6 @@
 enum kmsg_dump_reason {
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_PANIC,
-	KMSG_DUMP_KEXEC,
 	KMSG_DUMP_RESTART,
 	KMSG_DUMP_HALT,
 	KMSG_DUMP_POWEROFF,
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 090ee10d9604..20ed47ae252f 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -32,7 +32,6 @@
 #include <linux/console.h>
 #include <linux/vmalloc.h>
 #include <linux/swap.h>
-#include <linux/kmsg_dump.h>
 #include <linux/syscore_ops.h>
 
 #include <asm/page.h>
@@ -1094,8 +1093,6 @@ void crash_kexec(struct pt_regs *regs)
 		if (kexec_crash_image) {
 			struct pt_regs fixed_regs;
 
-			kmsg_dump(KMSG_DUMP_KEXEC);
-
 			crash_setup_regs(&fixed_regs, regs);
 			crash_save_vmcoreinfo();
 			machine_crash_shutdown(&fixed_regs);
-- 
cgit v1.2.3


From 1f536b9e9f85456df93614b3c2f6a1a2b7d7cb9b Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Thu, 12 Jan 2012 17:20:20 -0800
Subject: include/linux/crash_dump.h needs elf.h

Building an ARM target we get the following warnings:

  CC      arch/arm/kernel/setup.o
  In file included from arch/arm/kernel/setup.c:39:
  arch/arm/include/asm/elf.h:102:1: warning: "vmcore_elf64_check_arch" redefined
  In file included from arch/arm/kernel/setup.c:24:
  include/linux/crash_dump.h:30:1: warning: this is the location of the previous definition

Quoting Russell King:

"linux/crash_dump.h makes no attempt to include asm/elf.h, but it depends
on stuff in asm/elf.h to determine how stuff inside this file is defined
at parse time.

So, if asm/elf.h is included after linux/crash_dump.h or not at all, you
get a different result from the situation where asm/elf.h is included
before."

So add elf.h header to crash_dump.h to avoid this problem.

The original discussion about this can be found at:
http://www.spinics.net/lists/arm-kernel/msg154113.html

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: <stable@vger.kernel.org>	[3.2.1]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crash_dump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 5c4abce94ad1..b936763f2236 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -5,6 +5,7 @@
 #include <linux/kexec.h>
 #include <linux/device.h>
 #include <linux/proc_fs.h>
+#include <linux/elf.h>
 
 #define ELFCORE_ADDR_MAX	(-1ULL)
 #define ELFCORE_ADDR_ERR	(-2ULL)
-- 
cgit v1.2.3


From 87192a2a49c475cf322cb143e0fa63b0102d8567 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 12 Jan 2012 17:20:34 -0800
Subject: vfs: cache request_queue in struct block_device

This makes it possible to get from the inode to the request_queue with one
less cache miss.  Used in followon optimization.

The livetime of the pointer is the same as the gendisk.

This assumes that the queue will always stay the same in the gendisk while
it's visible to block_devices.  I think that's safe correct?

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/block_dev.c     | 3 +++
 include/linux/fs.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index afe74dda632b..0e575d1304b4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1139,6 +1139,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
+		bdev->bd_queue = disk->queue;
 		bdev->bd_contains = bdev;
 		if (!partno) {
 			struct backing_dev_info *bdi;
@@ -1159,6 +1160,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					disk_put_part(bdev->bd_part);
 					bdev->bd_part = NULL;
 					bdev->bd_disk = NULL;
+					bdev->bd_queue = NULL;
 					mutex_unlock(&bdev->bd_mutex);
 					disk_unblock_events(disk);
 					put_disk(disk);
@@ -1232,6 +1234,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	disk_put_part(bdev->bd_part);
 	bdev->bd_disk = NULL;
 	bdev->bd_part = NULL;
+	bdev->bd_queue = NULL;
 	bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
 	if (bdev != bdev->bd_contains)
 		__blkdev_put(bdev->bd_contains, mode, 1);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e694bd4434a4..4bc8169fb5a1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -660,6 +660,7 @@ struct address_space {
 	 * must be enforced here for CRIS, to let the least significant bit
 	 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
 	 */
+struct request_queue;
 
 struct block_device {
 	dev_t			bd_dev;  /* not a kdev_t - it's a search key */
@@ -682,6 +683,7 @@ struct block_device {
 	unsigned		bd_part_count;
 	int			bd_invalidated;
 	struct gendisk *	bd_disk;
+	struct request_queue *  bd_queue;
 	struct list_head	bd_list;
 	/*
 	 * Private data.  You must have bd_claim'ed the block_device
-- 
cgit v1.2.3


From 928da837aca77a9d3cb5076bf07b3224b1ba293b Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 12 Jan 2012 17:20:39 -0800
Subject: radix_tree: remove radix_tree_indirect_to_ptr()

It is not used anymore, remove it

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 9d4539c52e53..07e360b1b282 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -49,9 +49,6 @@
 #define RADIX_TREE_EXCEPTIONAL_ENTRY	2
 #define RADIX_TREE_EXCEPTIONAL_SHIFT	2
 
-#define radix_tree_indirect_to_ptr(ptr) \
-	radix_tree_indirect_to_ptr((void __force *)(ptr))
-
 static inline int radix_tree_is_indirect_ptr(void *ptr)
 {
 	return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
-- 
cgit v1.2.3


From 028ee4be34a09a6d48bdf30ab991ae933a7bc036 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 12 Jan 2012 17:20:55 -0800
Subject: c/r: prctl: add PR_SET_MM codes to set up mm_struct entries

When we restore a task we need to set up text, data and data heap sizes
from userspace to the values a task had at checkpoint time.  This patch
adds auxilary prctl codes for that.

While most of them have a statistical nature (their values are involved
into calculation of /proc/<pid>/statm output) the start_brk and brk values
are used to compute an allowed size of program data segment expansion.
Which means an arbitrary changes of this values might be dangerous
operation.  So to restrict access the following requirements applied to
prctl calls:

 - The process has to have CAP_SYS_ADMIN capability granted.
 - For all opcodes except start_brk/brk members an appropriate
   VMA area must exist and should fit certain VMA flags,
   such as:
   - code segment must be executable but not writable;
   - data segment must not be executable.

start_brk/brk values must not intersect with data segment and must not
exceed RLIMIT_DATA resource limit.

Still the main guard is CAP_SYS_ADMIN capability check.

Note the kernel should be compiled with CONFIG_CHECKPOINT_RESTORE support
otherwise these prctl calls will return -EINVAL.

[akpm@linux-foundation.org: cache current->mm in a local, saving 200 bytes text]
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prctl.h |  12 +++++
 kernel/sys.c          | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a3baeb2c2161..7ddc7f1b480f 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -102,4 +102,16 @@
 
 #define PR_MCE_KILL_GET 34
 
+/*
+ * Tune up process memory map specifics.
+ */
+#define PR_SET_MM		35
+# define PR_SET_MM_START_CODE		1
+# define PR_SET_MM_END_CODE		2
+# define PR_SET_MM_START_DATA		3
+# define PR_SET_MM_END_DATA		4
+# define PR_SET_MM_START_STACK		5
+# define PR_SET_MM_START_BRK		6
+# define PR_SET_MM_BRK			7
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index ddf8155bf3f8..40701538fbd1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1692,6 +1692,124 @@ SYSCALL_DEFINE1(umask, int, mask)
 	return mask;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int prctl_set_mm(int opt, unsigned long addr,
+			unsigned long arg4, unsigned long arg5)
+{
+	unsigned long rlim = rlimit(RLIMIT_DATA);
+	unsigned long vm_req_flags;
+	unsigned long vm_bad_flags;
+	struct vm_area_struct *vma;
+	int error = 0;
+	struct mm_struct *mm = current->mm;
+
+	if (arg4 | arg5)
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (addr >= TASK_SIZE)
+		return -EINVAL;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, addr);
+
+	if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
+		/* It must be existing VMA */
+		if (!vma || vma->vm_start > addr)
+			goto out;
+	}
+
+	error = -EINVAL;
+	switch (opt) {
+	case PR_SET_MM_START_CODE:
+	case PR_SET_MM_END_CODE:
+		vm_req_flags = VM_READ | VM_EXEC;
+		vm_bad_flags = VM_WRITE | VM_MAYSHARE;
+
+		if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
+		    (vma->vm_flags & vm_bad_flags))
+			goto out;
+
+		if (opt == PR_SET_MM_START_CODE)
+			mm->start_code = addr;
+		else
+			mm->end_code = addr;
+		break;
+
+	case PR_SET_MM_START_DATA:
+	case PR_SET_MM_END_DATA:
+		vm_req_flags = VM_READ | VM_WRITE;
+		vm_bad_flags = VM_EXEC | VM_MAYSHARE;
+
+		if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
+		    (vma->vm_flags & vm_bad_flags))
+			goto out;
+
+		if (opt == PR_SET_MM_START_DATA)
+			mm->start_data = addr;
+		else
+			mm->end_data = addr;
+		break;
+
+	case PR_SET_MM_START_STACK:
+
+#ifdef CONFIG_STACK_GROWSUP
+		vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
+#else
+		vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
+#endif
+		if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
+			goto out;
+
+		mm->start_stack = addr;
+		break;
+
+	case PR_SET_MM_START_BRK:
+		if (addr <= mm->end_data)
+			goto out;
+
+		if (rlim < RLIM_INFINITY &&
+		    (mm->brk - addr) +
+		    (mm->end_data - mm->start_data) > rlim)
+			goto out;
+
+		mm->start_brk = addr;
+		break;
+
+	case PR_SET_MM_BRK:
+		if (addr <= mm->end_data)
+			goto out;
+
+		if (rlim < RLIM_INFINITY &&
+		    (addr - mm->start_brk) +
+		    (mm->end_data - mm->start_data) > rlim)
+			goto out;
+
+		mm->brk = addr;
+		break;
+
+	default:
+		error = -EINVAL;
+		goto out;
+	}
+
+	error = 0;
+
+out:
+	up_read(&mm->mmap_sem);
+
+	return error;
+}
+#else /* CONFIG_CHECKPOINT_RESTORE */
+static int prctl_set_mm(int opt, unsigned long addr,
+			unsigned long arg4, unsigned long arg5)
+{
+	return -EINVAL;
+}
+#endif
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -1841,6 +1959,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			else
 				error = PR_MCE_KILL_DEFAULT;
 			break;
+		case PR_SET_MM:
+			error = prctl_set_mm(arg2, arg3, arg4, arg5);
+			break;
 		default:
 			error = -EINVAL;
 			break;
-- 
cgit v1.2.3


From 6898e3bd11cc9a931ef115eee9000ac9d8f8c3cf Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 13 Jan 2012 08:15:33 +0100
Subject: block: Stop using macro stubs for the bio data integrity calls

Replace preprocessor macro stubs with real function declarations to
prevent warnings when CONFIG_BLK_DEV_INTEGRITY is disabled.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 66 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 847994aef0e9..129a9c097958 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -515,24 +515,64 @@ extern void bio_integrity_init(void);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
-#define bio_integrity(a)		(0)
-#define bioset_integrity_create(a, b)	(0)
-#define bio_integrity_prep(a)		(0)
-#define bio_integrity_enabled(a)	(0)
+static inline int bio_integrity(struct bio *bio)
+{
+	return 0;
+}
+
+static inline int bio_integrity_enabled(struct bio *bio)
+{
+	return 0;
+}
+
+static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
+{
+	return 0;
+}
+
+static inline void bioset_integrity_free (struct bio_set *bs)
+{
+	return;
+}
+
+static inline int bio_integrity_prep(struct bio *bio)
+{
+	return 0;
+}
+
+static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs)
+{
+	return;
+}
+
 static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
 				      gfp_t gfp_mask, struct bio_set *bs)
 {
 	return 0;
 }
-#define bioset_integrity_free(a)	do { } while (0)
-#define bio_integrity_free(a, b)	do { } while (0)
-#define bio_integrity_endio(a, b)	do { } while (0)
-#define bio_integrity_advance(a, b)	do { } while (0)
-#define bio_integrity_trim(a, b, c)	do { } while (0)
-#define bio_integrity_split(a, b, c)	do { } while (0)
-#define bio_integrity_set_tag(a, b, c)	do { } while (0)
-#define bio_integrity_get_tag(a, b, c)	do { } while (0)
-#define bio_integrity_init(a)		do { } while (0)
+
+static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
+				       int sectors)
+{
+	return;
+}
+
+static inline void bio_integrity_advance(struct bio *bio,
+					 unsigned int bytes_done)
+{
+	return;
+}
+
+static inline void bio_integrity_trim(struct bio *bio, unsigned int offset,
+				      unsigned int sectors)
+{
+	return;
+}
+
+static inline void bio_integrity_init(void)
+{
+	return;
+}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit v1.2.3


From 7c7c7f01cc5e3e423120a4848a73dd5e4586f2f9 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 11 Jan 2012 19:30:38 +0000
Subject: vhost-net: add module alias (v2.1)

By adding some module aliases, programs (or users) won't have to explicitly
call modprobe. Vhost-net will always be available if built into the kernel.
It does require assigning a permanent minor number for depmod to work.

Also:
  - use C99 style initialization.
  - add missing entry in documentation for loop-control

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-By: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devices.txt  | 3 +++
 drivers/vhost/net.c        | 8 +++++---
 include/linux/miscdevice.h | 1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index cec8864ce4e8..00383186d8fb 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -447,6 +447,9 @@ Your cooperation is appreciated.
 		234 = /dev/btrfs-control	Btrfs control device
 		235 = /dev/autofs	Autofs control device
 		236 = /dev/mapper/control	Device-Mapper control device
+		237 = /dev/loop-control Loopback control device
+		238 = /dev/vhost-net	Host kernel accelerator for virtio net
+
 		240-254			Reserved for local use
 		255			Reserved for MISC_DYNAMIC_MINOR
 
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 882a51fe7b3c..9dab1f51dd43 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -856,9 +856,9 @@ static const struct file_operations vhost_net_fops = {
 };
 
 static struct miscdevice vhost_net_misc = {
-	MISC_DYNAMIC_MINOR,
-	"vhost-net",
-	&vhost_net_fops,
+	.minor = VHOST_NET_MINOR,
+	.name = "vhost-net",
+	.fops = &vhost_net_fops,
 };
 
 static int vhost_net_init(void)
@@ -879,3 +879,5 @@ MODULE_VERSION("0.0.1");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Michael S. Tsirkin");
 MODULE_DESCRIPTION("Host kernel accelerator for virtio net");
+MODULE_ALIAS_MISCDEV(VHOST_NET_MINOR);
+MODULE_ALIAS("devname:vhost-net");
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 32085249e9cb..0549d2115507 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -42,6 +42,7 @@
 #define AUTOFS_MINOR		235
 #define MAPPER_CTRL_MINOR	236
 #define LOOP_CTRL_MINOR		237
+#define VHOST_NET_MINOR		238
 #define MISC_DYNAMIC_MINOR	255
 
 struct device;
-- 
cgit v1.2.3


From 577ebb374c78314ac4617242f509e2f5e7156649 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 12 Jan 2012 16:01:27 +0100
Subject: block: add and use scsi_blk_cmd_ioctl

Introduce a wrapper around scsi_cmd_ioctl that takes a block device.

The function will then be enhanced to detect partition block devices
and, in that case, subject the ioctls to whitelisting.

Cc: linux-scsi@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: James Bottomley <JBottomley@parallels.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/scsi_ioctl.c             | 7 +++++++
 drivers/block/cciss.c          | 6 +++---
 drivers/block/ub.c             | 3 +--
 drivers/block/virtio_blk.c     | 4 ++--
 drivers/cdrom/cdrom.c          | 3 +--
 drivers/ide/ide-floppy_ioctl.c | 3 +--
 drivers/scsi/sd.c              | 2 +-
 include/linux/blkdev.h         | 2 ++
 8 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index fbdf0d802ec4..a2c11f330872 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -690,6 +690,13 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
 }
 EXPORT_SYMBOL(scsi_cmd_ioctl);
 
+int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
+		       unsigned int cmd, void __user *arg)
+{
+	return scsi_cmd_ioctl(bd->bd_disk->queue, bd->bd_disk, mode, cmd, arg);
+}
+EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
+
 static int __init blk_scsi_ioctl_init(void)
 {
 	blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 587cce57adae..b0f553b26d0f 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1735,7 +1735,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 	case CCISS_BIG_PASSTHRU:
 		return cciss_bigpassthru(h, argp);
 
-	/* scsi_cmd_ioctl handles these, below, though some are not */
+	/* scsi_cmd_blk_ioctl handles these, below, though some are not */
 	/* very meaningful for cciss.  SG_IO is the main one people want. */
 
 	case SG_GET_VERSION_NUM:
@@ -1746,9 +1746,9 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 	case SG_EMULATED_HOST:
 	case SG_IO:
 	case SCSI_IOCTL_SEND_COMMAND:
-		return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
+		return scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
 
-	/* scsi_cmd_ioctl would normally handle these, below, but */
+	/* scsi_cmd_blk_ioctl would normally handle these, below, but */
 	/* they aren't a good fit for cciss, as CD-ROMs are */
 	/* not supported, and we don't have any bus/target/lun */
 	/* which we present to the kernel. */
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 0e376d46bdd1..7333b9e44411 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -1744,12 +1744,11 @@ static int ub_bd_release(struct gendisk *disk, fmode_t mode)
 static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode,
     unsigned int cmd, unsigned long arg)
 {
-	struct gendisk *disk = bdev->bd_disk;
 	void __user *usermem = (void __user *) arg;
 	int ret;
 
 	mutex_lock(&ub_mutex);
-	ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem);
+	ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, usermem);
 	mutex_unlock(&ub_mutex);
 
 	return ret;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index ffd5ca919295..c4a60badf252 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -250,8 +250,8 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
 		return -ENOTTY;
 
-	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
-			      (void __user *)data);
+	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
+				  (void __user *)data);
 }
 
 /* We provide getgeo only to please some old bootloader/partitioning tools */
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 1bbf7645a97c..55eaf474d32c 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2746,12 +2746,11 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
 {
 	void __user *argp = (void __user *)arg;
 	int ret;
-	struct gendisk *disk = bdev->bd_disk;
 
 	/*
 	 * Try the generic SCSI command ioctl's first.
 	 */
-	ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
+	ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
 	if (ret != -ENOTTY)
 		return ret;
 
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index d267b7affad6..a22ca8467010 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -292,8 +292,7 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct block_device *bdev,
 	 * and CDROM_SEND_PACKET (legacy) ioctls
 	 */
 	if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND)
-		err = scsi_cmd_ioctl(bdev->bd_disk->queue, bdev->bd_disk,
-				mode, cmd, argp);
+		err = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
 
 	if (err == -ENOTTY)
 		err = generic_ide_ioctl(drive, bdev, cmd, arg);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 7b3f8075e2a5..b4d57bb04c72 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1097,7 +1097,7 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode,
 			error = scsi_ioctl(sdp, cmd, p);
 			break;
 		default:
-			error = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, p);
+			error = scsi_cmd_blk_ioctl(bdev, mode, cmd, p);
 			if (error != -ENOTTY)
 				break;
 			error = scsi_ioctl(sdp, cmd, p);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 94acd8172b5b..ca7b869508c7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -675,6 +675,8 @@ extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
+extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
+			      unsigned int, void __user *);
 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			  unsigned int, void __user *);
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
-- 
cgit v1.2.3


From 0bfc96cb77224736dfa35c3c555d37b3646ef35e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 12 Jan 2012 16:01:28 +0100
Subject: block: fail SCSI passthrough ioctls on partition devices

Linux allows executing the SG_IO ioctl on a partition or LVM volume, and
will pass the command to the underlying block device.  This is
well-known, but it is also a large security problem when (via Unix
permissions, ACLs, SELinux or a combination thereof) a program or user
needs to be granted access only to part of the disk.

This patch lets partitions forward a small set of harmless ioctls;
others are logged with printk so that we can see which ioctls are
actually sent.  In my tests only CDROM_GET_CAPABILITY actually occurred.
Of course it was being sent to a (partition on a) hard disk, so it would
have failed with ENOTTY and the patch isn't changing anything in
practice.  Still, I'm treating it specially to avoid spamming the logs.

In principle, this restriction should include programs running with
CAP_SYS_RAWIO.  If for example I let a program access /dev/sda2 and
/dev/sdb, it still should not be able to read/write outside the
boundaries of /dev/sda2 independent of the capabilities.  However, for
now programs with CAP_SYS_RAWIO will still be allowed to send the
ioctls.  Their actions will still be logged.

This patch does not affect the non-libata IDE driver.  That driver
however already tests for bd != bd->bd_contains before issuing some
ioctl; it could be restricted further to forbid these ioctls even for
programs running with CAP_SYS_ADMIN/CAP_SYS_RAWIO.

Cc: linux-scsi@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: James Bottomley <JBottomley@parallels.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[ Make it also print the command name when warning - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/scsi_ioctl.c     | 45 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.c      | 11 +++++++++--
 include/linux/blkdev.h |  1 +
 3 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index a2c11f330872..260fa80ef575 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -24,6 +24,7 @@
 #include <linux/capability.h>
 #include <linux/completion.h>
 #include <linux/cdrom.h>
+#include <linux/ratelimit.h>
 #include <linux/slab.h>
 #include <linux/times.h>
 #include <asm/uaccess.h>
@@ -690,9 +691,53 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
 }
 EXPORT_SYMBOL(scsi_cmd_ioctl);
 
+int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
+{
+	if (bd && bd == bd->bd_contains)
+		return 0;
+
+	/* Actually none of these is particularly useful on a partition,
+	 * but they are safe.
+	 */
+	switch (cmd) {
+	case SCSI_IOCTL_GET_IDLUN:
+	case SCSI_IOCTL_GET_BUS_NUMBER:
+	case SCSI_IOCTL_GET_PCI:
+	case SCSI_IOCTL_PROBE_HOST:
+	case SG_GET_VERSION_NUM:
+	case SG_SET_TIMEOUT:
+	case SG_GET_TIMEOUT:
+	case SG_GET_RESERVED_SIZE:
+	case SG_SET_RESERVED_SIZE:
+	case SG_EMULATED_HOST:
+		return 0;
+	case CDROM_GET_CAPABILITY:
+		/* Keep this until we remove the printk below.  udev sends it
+		 * and we do not want to spam dmesg about it.   CD-ROMs do
+		 * not have partitions, so we get here only for disks.
+		 */
+		return -ENOIOCTLCMD;
+	default:
+		break;
+	}
+
+	/* In particular, rule out all resets and host-specific ioctls.  */
+	printk_ratelimited(KERN_WARNING
+			   "%s: sending ioctl %x to a partition!\n", current->comm, cmd);
+
+	return capable(CAP_SYS_RAWIO) ? 0 : -ENOIOCTLCMD;
+}
+EXPORT_SYMBOL(scsi_verify_blk_ioctl);
+
 int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
 		       unsigned int cmd, void __user *arg)
 {
+	int ret;
+
+	ret = scsi_verify_blk_ioctl(bd, cmd);
+	if (ret < 0)
+		return ret;
+
 	return scsi_cmd_ioctl(bd->bd_disk->queue, bd->bd_disk, mode, cmd, arg);
 }
 EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b4d57bb04c72..c691fb50e6cb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1075,6 +1075,10 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode,
 	SCSI_LOG_IOCTL(1, sd_printk(KERN_INFO, sdkp, "sd_ioctl: disk=%s, "
 				    "cmd=0x%x\n", disk->disk_name, cmd));
 
+	error = scsi_verify_blk_ioctl(bdev, cmd);
+	if (error < 0)
+		return error;
+
 	/*
 	 * If we are in the middle of error recovery, don't let anyone
 	 * else try and use this device.  Also, if error recovery fails, it
@@ -1267,6 +1271,11 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
 			   unsigned int cmd, unsigned long arg)
 {
 	struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device;
+	int ret;
+
+	ret = scsi_verify_blk_ioctl(bdev, cmd);
+	if (ret < 0)
+		return ret;
 
 	/*
 	 * If we are in the middle of error recovery, don't let anyone
@@ -1278,8 +1287,6 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
 		return -ENODEV;
 	       
 	if (sdev->host->hostt->compat_ioctl) {
-		int ret;
-
 		ret = sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg);
 
 		return ret;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca7b869508c7..0ed1eb062313 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -675,6 +675,7 @@ extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
+extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
 			      unsigned int, void __user *);
 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
-- 
cgit v1.2.3


From 9bf04646b0b41c5438ed8a27c5f8dbe0ff40d756 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 15 Jan 2012 16:57:12 +0100
Subject: netfilter: revert user-space expectation helper support

This patch partially reverts:
3d058d7 netfilter: rework user-space expectation helper support
that was applied during the 3.2 development cycle.

After this patch, the tree remains just like before patch bc01bef,
that initially added the preliminary infrastructure.

I decided to partially revert this patch because the approach
that I proposed to resolve this problem is broken in NAT setups.
Moreover, a new infrastructure will be submitted for the 3.3.x
development cycle that resolve the existing issues while
providing a neat solution.

Since nobody has been seriously using this infrastructure in
user-space, the removal of this feature should affect any know
FOSS project (to my knowledge).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h |  4 ----
 include/linux/netfilter/xt_CT.h               |  3 +--
 net/netfilter/nf_conntrack_helper.c           | 12 ------------
 net/netfilter/nf_conntrack_netlink.c          |  4 ----
 net/netfilter/xt_CT.c                         |  8 +++-----
 5 files changed, 4 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 9e3a2838291b..0d3dd66322ec 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -83,10 +83,6 @@ enum ip_conntrack_status {
 	/* Conntrack is a fake untracked entry */
 	IPS_UNTRACKED_BIT = 12,
 	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
-
-	/* Conntrack has a userspace helper. */
-	IPS_USERSPACE_HELPER_BIT = 13,
-	IPS_USERSPACE_HELPER = (1 << IPS_USERSPACE_HELPER_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h
index 6390f0992f36..b56e76811c04 100644
--- a/include/linux/netfilter/xt_CT.h
+++ b/include/linux/netfilter/xt_CT.h
@@ -3,8 +3,7 @@
 
 #include <linux/types.h>
 
-#define XT_CT_NOTRACK		0x1
-#define XT_CT_USERSPACE_HELPER	0x2
+#define XT_CT_NOTRACK	0x1
 
 struct xt_ct_target_info {
 	__u16 flags;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 299fec91f741..bbe23baa19b6 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -121,18 +121,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 	int ret = 0;
 
 	if (tmpl != NULL) {
-		/* we've got a userspace helper. */
-		if (tmpl->status & IPS_USERSPACE_HELPER) {
-			help = nf_ct_helper_ext_add(ct, flags);
-			if (help == NULL) {
-				ret = -ENOMEM;
-				goto out;
-			}
-			rcu_assign_pointer(help->helper, NULL);
-			__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
-			ret = 0;
-			goto out;
-		}
 		help = nfct_help(tmpl);
 		if (help != NULL)
 			helper = help->helper;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2a4834b83332..9307b033c0c9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2042,10 +2042,6 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 	}
 	help = nfct_help(ct);
 	if (!help) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
-	if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) {
 		if (!cda[CTA_EXPECT_TIMEOUT]) {
 			err = -EINVAL;
 			goto out;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 8e87123f1373..0221d10de75a 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	int ret = 0;
 	u8 proto;
 
-	if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER))
-		return -EOPNOTSUPP;
+	if (info->flags & ~XT_CT_NOTRACK)
+		return -EINVAL;
 
 	if (info->flags & XT_CT_NOTRACK) {
 		ct = nf_ct_untracked_get();
@@ -92,9 +92,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 				  GFP_KERNEL))
 		goto err3;
 
-	if (info->flags & XT_CT_USERSPACE_HELPER) {
-		__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
-	} else if (info->helper[0]) {
+	if (info->helper[0]) {
 		ret = -ENOENT;
 		proto = xt_ct_find_proto(par);
 		if (!proto) {
-- 
cgit v1.2.3


From 7061ca3b6c99fc78115560b9a10227c8c5fafc45 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 20 Dec 2011 08:20:46 -0800
Subject: sched: Add "const" to is_idle_task() parameter

This patch fixes a build warning in -next due to a const pointer being
passed to is_idle_task().  Because is_idle_task() does not modify anything,
this commit adds the "const" to is_idle_task()'s argument declaration.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a7e4d333a27..56fa25a5b1eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2074,7 +2074,7 @@ extern struct task_struct *idle_task(int cpu);
  * is_idle_task - is the specified task an idle task?
  * @tsk: the task in question.
  */
-static inline bool is_idle_task(struct task_struct *p)
+static inline bool is_idle_task(const struct task_struct *p)
 {
 	return p->pid == 0;
 }
-- 
cgit v1.2.3


From b54ac6d2a25084667da781c7ca2cebef52a2bcdd Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 8 Dec 2011 11:25:49 +0800
Subject: ACPI, Record ACPI NVS regions

Some firmware will access memory in ACPI NVS region via APEI.  That
is, instructions in APEI ERST/EINJ table will read/write ACPI NVS
region.  The original resource conflict checking in APEI code will
check memory/ioport accessed by APEI via general resource management
mechanism.  But ACPI NVS region is marked as busy already, so that the
false resource conflict will prevent APEI ERST/EINJ to work.

To fix this, this patch record ACPI NVS regions, so that we can avoid
request resources for memory region inside it.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/e820.c |  4 ++--
 drivers/acpi/Makefile  |  3 ++-
 drivers/acpi/nvs.c     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/acpi.h   | 20 +++++++++++++------
 4 files changed, 70 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f076..51c3b186e5b9 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -714,7 +714,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 }
 #endif
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_ACPI
 /**
  * Mark ACPI NVS memory region, so that we can save/restore it during
  * hibernation and the subsequent resume.
@@ -727,7 +727,7 @@ static int __init e820_mark_nvs_memory(void)
 		struct e820entry *ei = &e820.map[i];
 
 		if (ei->type == E820_NVS)
-			suspend_nvs_register(ei->addr, ei->size);
+			acpi_nvs_register(ei->addr, ei->size);
 	}
 
 	return 0;
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index ecb26b4f29a0..c07f44f05f9d 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -20,11 +20,12 @@ obj-y				+= acpi.o \
 # All the builtin files are in the "acpi." module_param namespace.
 acpi-y				+= osl.o utils.o reboot.o
 acpi-y				+= atomicio.o
+acpi-y				+= nvs.o
 
 # sleep related files
 acpi-y				+= wakeup.o
 acpi-y				+= sleep.o
-acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o nvs.o
+acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o
 
 
 #
diff --git a/drivers/acpi/nvs.c b/drivers/acpi/nvs.c
index 096787b43c96..7a2035fa8c71 100644
--- a/drivers/acpi/nvs.c
+++ b/drivers/acpi/nvs.c
@@ -15,6 +15,56 @@
 #include <linux/acpi_io.h>
 #include <acpi/acpiosxf.h>
 
+/* ACPI NVS regions, APEI may use it */
+
+struct nvs_region {
+	__u64 phys_start;
+	__u64 size;
+	struct list_head node;
+};
+
+static LIST_HEAD(nvs_region_list);
+
+#ifdef CONFIG_ACPI_SLEEP
+static int suspend_nvs_register(unsigned long start, unsigned long size);
+#else
+static inline int suspend_nvs_register(unsigned long a, unsigned long b)
+{
+	return 0;
+}
+#endif
+
+int acpi_nvs_register(__u64 start, __u64 size)
+{
+	struct nvs_region *region;
+
+	region = kmalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+	region->phys_start = start;
+	region->size = size;
+	list_add_tail(&region->node, &nvs_region_list);
+
+	return suspend_nvs_register(start, size);
+}
+
+int acpi_nvs_for_each_region(int (*func)(__u64 start, __u64 size, void *data),
+			     void *data)
+{
+	int rc;
+	struct nvs_region *region;
+
+	list_for_each_entry(region, &nvs_region_list, node) {
+		rc = func(region->phys_start, region->size, data);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+
+#ifdef CONFIG_ACPI_SLEEP
 /*
  * Platforms, like ACPI, may want us to save some memory used by them during
  * suspend and to restore the contents of this memory during the subsequent
@@ -41,7 +91,7 @@ static LIST_HEAD(nvs_list);
  *	things so that the data from page-aligned addresses in this region will
  *	be copied into separate RAM pages.
  */
-int suspend_nvs_register(unsigned long start, unsigned long size)
+static int suspend_nvs_register(unsigned long start, unsigned long size)
 {
 	struct nvs_page *entry, *next;
 
@@ -159,3 +209,4 @@ void suspend_nvs_restore(void)
 		if (entry->data)
 			memcpy(entry->kaddr, entry->data, entry->size);
 }
+#endif
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6001b4da39dd..26b75442ff7a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -306,6 +306,11 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 extern void acpi_early_init(void);
 
+extern int acpi_nvs_register(__u64 start, __u64 size);
+
+extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
+				    void *data);
+
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
@@ -348,15 +353,18 @@ static inline int acpi_table_parse(char *id,
 {
 	return -1;
 }
-#endif	/* !CONFIG_ACPI */
 
-#ifdef CONFIG_ACPI_SLEEP
-int suspend_nvs_register(unsigned long start, unsigned long size);
-#else
-static inline int suspend_nvs_register(unsigned long a, unsigned long b)
+static inline int acpi_nvs_register(__u64 start, __u64 size)
 {
 	return 0;
 }
-#endif
+
+static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
+					   void *data)
+{
+	return 0;
+}
+
+#endif	/* !CONFIG_ACPI */
 
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From 6f68c91c55ea3576d366797fa8d45e31c4aa79f8 Mon Sep 17 00:00:00 2001
From: Myron Stowe <mstowe@redhat.com>
Date: Mon, 7 Nov 2011 16:23:34 -0700
Subject: ACPI: Export interfaces for ioremapping/iounmapping ACPI registers

Export remapping and unmapping interfaces - acpi_os_map_generic_address()
and acpi_os_unmap_generic_address() - for ACPI generic registers that are
backed by memory mapped I/O (MMIO).

The acpi_os_map_generic_address() and acpi_os_unmap_generic_address()
declarations may more properly belong in include/acpi/acpiosxf.h next to
acpi_os_read_memory() but I believe that would require the ACPI CA making
them an official part of the ACPI CA - OS interface.

ACPI Generic Address Structure (GAS) reference (ACPI's fixed/generic
hardware registers use the GAS format):
  ACPI Specification, Revision 4.0, Section 5.2.3.1, "Generic Address
  Structure"

Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c      | 6 ++++--
 include/linux/acpi_io.h | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 2e285cdbefb1..b11f2676f7c9 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -431,7 +431,7 @@ void __init early_acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
 		__acpi_unmap_table(virt, size);
 }
 
-static int acpi_os_map_generic_address(struct acpi_generic_address *gas)
+int acpi_os_map_generic_address(struct acpi_generic_address *gas)
 {
 	u64 addr;
 	void __iomem *virt;
@@ -450,8 +450,9 @@ static int acpi_os_map_generic_address(struct acpi_generic_address *gas)
 
 	return 0;
 }
+EXPORT_SYMBOL(acpi_os_map_generic_address);
 
-static void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
+void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
 {
 	u64 addr;
 	struct acpi_ioremap *map;
@@ -475,6 +476,7 @@ static void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
 
 	acpi_os_map_cleanup(map);
 }
+EXPORT_SYMBOL(acpi_os_unmap_generic_address);
 
 #ifdef ACPI_FUTURE_USAGE
 acpi_status
diff --git a/include/linux/acpi_io.h b/include/linux/acpi_io.h
index 4afd7102459d..b0ffa219993e 100644
--- a/include/linux/acpi_io.h
+++ b/include/linux/acpi_io.h
@@ -12,4 +12,7 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
 
 void __iomem *acpi_os_get_iomem(acpi_physical_address phys, unsigned int size);
 
+int acpi_os_map_generic_address(struct acpi_generic_address *addr);
+void acpi_os_unmap_generic_address(struct acpi_generic_address *addr);
+
 #endif
-- 
cgit v1.2.3


From 7d5869e78f4c9d32f834dadefbb7dcb3c9d4d85f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 13 Jan 2012 23:58:41 +0100
Subject: bcma: connect the bcma bus suspend/resume to the bcma driver
 suspend/resume

Now the low-level driver actually gets informed that it is getting suspended and resumed.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/main.c                                   | 19 +++++++++++++++++++
 drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c |  2 +-
 include/linux/bcma/bcma.h                             |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index b711d9d634a7..febbc0a1222a 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -243,6 +243,16 @@ int __init bcma_bus_early_register(struct bcma_bus *bus,
 #ifdef CONFIG_PM
 int bcma_bus_suspend(struct bcma_bus *bus)
 {
+	struct bcma_device *core;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		struct device_driver *drv = core->dev.driver;
+		if (drv) {
+			struct bcma_driver *adrv = container_of(drv, struct bcma_driver, drv);
+			if (adrv->suspend)
+				adrv->suspend(core);
+		}
+	}
 	return 0;
 }
 
@@ -257,6 +267,15 @@ int bcma_bus_resume(struct bcma_bus *bus)
 		bcma_core_chipcommon_init(&bus->drv_cc);
 	}
 
+	list_for_each_entry(core, &bus->cores, list) {
+		struct device_driver *drv = core->dev.driver;
+		if (drv) {
+			struct bcma_driver *adrv = container_of(drv, struct bcma_driver, drv);
+			if (adrv->resume)
+				adrv->resume(core);
+		}
+	}
+
 	return 0;
 }
 #endif
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
index d106576ce338..213130afdaf7 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
@@ -1135,7 +1135,7 @@ static int brcms_pci_suspend(struct pci_dev *pdev)
 	return pci_set_power_state(pdev, PCI_D3hot);
 }
 
-static int brcms_suspend(struct bcma_device *pdev, pm_message_t state)
+static int brcms_suspend(struct bcma_device *pdev)
 {
 	struct brcms_info *wl;
 	struct ieee80211_hw *hw;
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index f4b8346b1a33..83c209f39493 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -162,7 +162,7 @@ struct bcma_driver {
 
 	int (*probe)(struct bcma_device *dev);
 	void (*remove)(struct bcma_device *dev);
-	int (*suspend)(struct bcma_device *dev, pm_message_t state);
+	int (*suspend)(struct bcma_device *dev);
 	int (*resume)(struct bcma_device *dev);
 	void (*shutdown)(struct bcma_device *dev);
 
-- 
cgit v1.2.3


From 20c300b10c358daa507be335aec6aa3987ef425a Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 17 Jan 2012 12:54:01 +0400
Subject: tty: remove unused tty_driver->termios_locked

This field is unused since 2.6.28 (commit fe6e29fdb1a7: "tty: simplify
ktermios allocation", to be exact)

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty_driver.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index ecdaeb98b293..5cf685086dd3 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -312,7 +312,6 @@ struct tty_driver {
 	 */
 	struct tty_struct **ttys;
 	struct ktermios **termios;
-	struct ktermios **termios_locked;
 	void *driver_state;
 
 	/*
-- 
cgit v1.2.3


From 85e7bac33b8d5edafc4e219c7dfdb3d48e0b4e31 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:05 -0500
Subject: seccomp: audit abnormal end to a process due to seccomp

The audit system likes to collect information about processes that end
abnormally (SIGSEGV) as this may me useful intrusion detection information.
This patch adds audit support to collect information when seccomp forces a
task to exit because of misbehavior in a similar way.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  8 ++++++++
 kernel/auditsc.c      | 50 +++++++++++++++++++++++++++++---------------------
 kernel/seccomp.c      |  2 ++
 3 files changed, 39 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 426ab9f4dd85..6e1c533f9b46 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -430,6 +430,7 @@ extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct dentry *dentry);
 extern void __audit_inode_child(const struct dentry *dentry,
 				const struct inode *parent);
+extern void __audit_seccomp(unsigned long syscall);
 extern void __audit_ptrace(struct task_struct *t);
 
 static inline int audit_dummy_context(void)
@@ -453,6 +454,12 @@ static inline void audit_inode_child(const struct dentry *dentry,
 }
 void audit_core_dumps(long signr);
 
+static inline void audit_seccomp(unsigned long syscall)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_seccomp(syscall);
+}
+
 static inline void audit_ptrace(struct task_struct *t)
 {
 	if (unlikely(!audit_dummy_context()))
@@ -558,6 +565,7 @@ extern int audit_signals;
 #define audit_inode(n,d) do { (void)(d); } while (0)
 #define audit_inode_child(i,p) do { ; } while (0)
 #define audit_core_dumps(i) do { ; } while (0)
+#define audit_seccomp(i) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) (0)
 #define audit_get_loginuid(t) (-1)
 #define audit_get_sessionid(t) (-1)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7c495147c3d9..e9bcb93800d8 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2529,6 +2529,25 @@ void __audit_mmap_fd(int fd, int flags)
 	context->type = AUDIT_MMAP;
 }
 
+static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+{
+	uid_t auid, uid;
+	gid_t gid;
+	unsigned int sessionid;
+
+	auid = audit_get_loginuid(current);
+	sessionid = audit_get_sessionid(current);
+	current_uid_gid(&uid, &gid);
+
+	audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
+			 auid, uid, gid, sessionid);
+	audit_log_task_context(ab);
+	audit_log_format(ab, " pid=%d comm=", current->pid);
+	audit_log_untrustedstring(ab, current->comm);
+	audit_log_format(ab, " reason=");
+	audit_log_string(ab, reason);
+	audit_log_format(ab, " sig=%ld", signr);
+}
 /**
  * audit_core_dumps - record information about processes that end abnormally
  * @signr: signal value
@@ -2539,10 +2558,6 @@ void __audit_mmap_fd(int fd, int flags)
 void audit_core_dumps(long signr)
 {
 	struct audit_buffer *ab;
-	u32 sid;
-	uid_t auid = audit_get_loginuid(current), uid;
-	gid_t gid;
-	unsigned int sessionid = audit_get_sessionid(current);
 
 	if (!audit_enabled)
 		return;
@@ -2551,24 +2566,17 @@ void audit_core_dumps(long signr)
 		return;
 
 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
-	current_uid_gid(&uid, &gid);
-	audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
-			 auid, uid, gid, sessionid);
-	security_task_getsecid(current, &sid);
-	if (sid) {
-		char *ctx = NULL;
-		u32 len;
+	audit_log_abend(ab, "memory violation", signr);
+	audit_log_end(ab);
+}
 
-		if (security_secid_to_secctx(sid, &ctx, &len))
-			audit_log_format(ab, " ssid=%u", sid);
-		else {
-			audit_log_format(ab, " subj=%s", ctx);
-			security_release_secctx(ctx, len);
-		}
-	}
-	audit_log_format(ab, " pid=%d comm=", current->pid);
-	audit_log_untrustedstring(ab, current->comm);
-	audit_log_format(ab, " sig=%ld", signr);
+void __audit_seccomp(unsigned long syscall)
+{
+	struct audit_buffer *ab;
+
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+	audit_log_abend(ab, "seccomp", SIGKILL);
+	audit_log_format(ab, " syscall=%ld", syscall);
 	audit_log_end(ab);
 }
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 57d4b13b631d..e8d76c5895ea 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -6,6 +6,7 @@
  * This defines a simple but solid secure-computing mode.
  */
 
+#include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/sched.h>
 #include <linux/compat.h>
@@ -54,6 +55,7 @@ void __secure_computing(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
+	audit_seccomp(this_syscall);
 	do_exit(SIGKILL);
 }
 
-- 
cgit v1.2.3


From d7e7528bcd456f5c36ad4a202ccfb43c5aa98bc4 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:06 -0500
Subject: Audit: push audit success and retcode into arch ptrace.h

The audit system previously expected arches calling to audit_syscall_exit to
supply as arguments if the syscall was a success and what the return code was.
Audit also provides a helper AUDITSC_RESULT which was supposed to simplify things
by converting from negative retcodes to an audit internal magic value stating
success or failure.  This helper was wrong and could indicate that a valid
pointer returned to userspace was a failed syscall.  The fix is to fix the
layering foolishness.  We now pass audit_syscall_exit a struct pt_reg and it
in turns calls back into arch code to collect the return value and to
determine if the syscall was a success or failure.  We also define a generic
is_syscall_success() macro which determines success/failure based on if the
value is < -MAX_ERRNO.  This works for arches like x86 which do not use a
separate mechanism to indicate syscall failure.

We make both the is_syscall_success() and regs_return_value() static inlines
instead of macros.  The reason is because the audit function must take a void*
for the regs.  (uml calls theirs struct uml_pt_regs instead of just struct
pt_regs so audit_syscall_exit can't take a struct pt_regs).  Since the audit
function takes a void* we need to use static inlines to cast it back to the
arch correct structure to dereference it.

The other major change is that on some arches, like ia64, MIPS and ppc, we
change regs_return_value() to give us the negative value on syscall failure.
THE only other user of this macro, kretprobe_example.c, won't notice and it
makes the value signed consistently for the audit functions across all archs.

In arch/sh/kernel/ptrace_64.c I see that we were using regs[9] in the old
audit code as the return value.  But the ptrace_64.h code defined the macro
regs_return_value() as regs[3].  I have no idea which one is correct, but this
patch now uses the regs_return_value() function, so it now uses regs[3].

For powerpc we previously used regs->result but now use the
regs_return_value() function which uses regs->gprs[3].  regs->gprs[3] is
always positive so the regs_return_value(), much like ia64 makes it negative
before calling the audit code when appropriate.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: H. Peter Anvin <hpa@zytor.com> [for x86 portion]
Acked-by: Tony Luck <tony.luck@intel.com> [for ia64]
Acked-by: Richard Weinberger <richard@nod.at> [for uml]
Acked-by: David S. Miller <davem@davemloft.net> [for sparc]
Acked-by: Ralf Baechle <ralf@linux-mips.org> [for mips]
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [for ppc]
---
 arch/ia64/include/asm/ptrace.h       | 13 ++++++++++++-
 arch/ia64/kernel/ptrace.c            |  9 +--------
 arch/microblaze/include/asm/ptrace.h |  5 +++++
 arch/microblaze/kernel/ptrace.c      |  3 +--
 arch/mips/include/asm/ptrace.h       | 14 +++++++++++++-
 arch/mips/kernel/ptrace.c            |  4 +---
 arch/powerpc/include/asm/ptrace.h    | 13 ++++++++++++-
 arch/powerpc/kernel/ptrace.c         |  4 +---
 arch/s390/include/asm/ptrace.h       |  6 +++++-
 arch/s390/kernel/ptrace.c            |  4 +---
 arch/sh/include/asm/ptrace_32.h      |  5 ++++-
 arch/sh/include/asm/ptrace_64.h      |  5 ++++-
 arch/sh/kernel/ptrace_32.c           |  4 +---
 arch/sh/kernel/ptrace_64.c           |  4 +---
 arch/sparc/include/asm/ptrace.h      | 10 +++++++++-
 arch/sparc/kernel/ptrace_64.c        | 11 +----------
 arch/um/kernel/ptrace.c              |  4 ++--
 arch/x86/ia32/ia32entry.S            | 10 +++++-----
 arch/x86/kernel/entry_32.S           |  8 ++++----
 arch/x86/kernel/entry_64.S           | 10 +++++-----
 arch/x86/kernel/ptrace.c             |  3 +--
 arch/x86/kernel/vm86_32.c            |  4 ++--
 arch/x86/um/shared/sysdep/ptrace.h   |  5 +++++
 include/linux/audit.h                | 22 ++++++++++++++--------
 include/linux/ptrace.h               | 10 ++++++++++
 kernel/auditsc.c                     | 16 ++++++++++++----
 26 files changed, 132 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index f5cb27614e35..68c98f5b3ca6 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -246,7 +246,18 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
 	return regs->ar_bspstore;
 }
 
-#define regs_return_value(regs) ((regs)->r8)
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return regs->r10 != -1;
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->r8;
+	else
+		return -regs->r8;
+}
 
 /* Conserve space in histogram by encoding slot bits in address
  * bits 2 and 3 rather than bits 0 and 1.
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 8848f43d819e..2c154088cce7 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1268,14 +1268,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 {
 	int step;
 
-	if (unlikely(current->audit_context)) {
-		int success = AUDITSC_RESULT(regs.r10);
-		long result = regs.r8;
-
-		if (success != AUDITSC_SUCCESS)
-			result = -result;
-		audit_syscall_exit(success, result);
-	}
+	audit_syscall_exit(&regs);
 
 	step = test_thread_flag(TIF_SINGLESTEP);
 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h
index 816bee64b196..94e92c805859 100644
--- a/arch/microblaze/include/asm/ptrace.h
+++ b/arch/microblaze/include/asm/ptrace.h
@@ -61,6 +61,11 @@ struct pt_regs {
 #define instruction_pointer(regs)	((regs)->pc)
 #define profile_pc(regs)		instruction_pointer(regs)
 
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->r3;
+}
+
 #else /* __KERNEL__ */
 
 /* pt_regs offsets used by gdbserver etc in ptrace syscalls */
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 043cb58f9c44..f564b1bfd386 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -159,8 +159,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3);
+	audit_syscall_exit(regs);
 
 	step = test_thread_flag(TIF_SINGLESTEP);
 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index de39b1f343ea..7d409505df2d 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -137,7 +137,19 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
  */
 #define user_mode(regs) (((regs)->cp0_status & KU_MASK) == KU_USER)
 
-#define regs_return_value(_regs) ((_regs)->regs[2])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !regs->regs[7];
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->regs[2];
+	else
+		return -regs->regs[2];
+}
+
 #define instruction_pointer(regs) ((regs)->cp0_epc)
 #define profile_pc(regs) instruction_pointer(regs)
 
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 4e6ea1ffad46..ab0f1963a7bd 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -572,9 +572,7 @@ out:
  */
 asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 {
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]),
-		                   -regs->regs[2]);
+	audit_syscall_exit(regs);
 
 	if (!(current->ptrace & PT_PTRACED))
 		return;
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 48223f9b8728..78a205162fd7 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -86,7 +86,18 @@ struct pt_regs {
 #define instruction_pointer(regs) ((regs)->nip)
 #define user_stack_pointer(regs) ((regs)->gpr[1])
 #define kernel_stack_pointer(regs) ((regs)->gpr[1])
-#define regs_return_value(regs) ((regs)->gpr[3])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->gpr[3];
+	else
+		return -regs->gpr[3];
+}
 
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 5de73dbd15c7..09d31c12a5e3 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1748,9 +1748,7 @@ void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS,
-				   regs->result);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->result);
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 56da355678f4..aeb77f017985 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -541,9 +541,13 @@ struct user_regs_struct
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
 #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
 #define user_stack_pointer(regs)((regs)->gprs[15])
-#define regs_return_value(regs)((regs)->gprs[2])
 #define profile_pc(regs) instruction_pointer(regs)
 
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->gprs[2];
+}
+
 int regs_query_register_offset(const char *name);
 const char *regs_query_register_name(unsigned int offset);
 unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 573bc29551ef..f52758600980 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -751,9 +751,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
 {
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]),
-				   regs->gprs[2]);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->gprs[2]);
diff --git a/arch/sh/include/asm/ptrace_32.h b/arch/sh/include/asm/ptrace_32.h
index 6c2239cca1a2..2d3e906aa722 100644
--- a/arch/sh/include/asm/ptrace_32.h
+++ b/arch/sh/include/asm/ptrace_32.h
@@ -76,7 +76,10 @@ struct pt_dspregs {
 #ifdef __KERNEL__
 
 #define MAX_REG_OFFSET		offsetof(struct pt_regs, tra)
-#define regs_return_value(_regs)	((_regs)->regs[0])
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sh/include/asm/ptrace_64.h b/arch/sh/include/asm/ptrace_64.h
index bf9be7764d69..eb3fcceaf64b 100644
--- a/arch/sh/include/asm/ptrace_64.h
+++ b/arch/sh/include/asm/ptrace_64.h
@@ -13,7 +13,10 @@ struct pt_regs {
 #ifdef __KERNEL__
 
 #define MAX_REG_OFFSET		offsetof(struct pt_regs, tregs[7])
-#define regs_return_value(_regs)	((_regs)->regs[3])
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[3];
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 92b3c276339a..c0b5c179d27b 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -530,9 +530,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[0]),
-				   regs->regs[0]);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->regs[0]);
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index c8f97649f354..ba720d686435 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -548,9 +548,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[9]),
-				   regs->regs[9]);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->regs[9]);
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index a0e1bcf843a1..c00c3b5c2806 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -207,7 +207,15 @@ do {	current_thread_info()->syscall_noerror = 1; \
 #define instruction_pointer(regs) ((regs)->tpc)
 #define instruction_pointer_set(regs, val) ((regs)->tpc = (val))
 #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
-#define regs_return_value(regs) ((regs)->u_regs[UREG_I0])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !(regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY));
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->u_regs[UREG_I0];
+}
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *);
 #else
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 96ee50a80661..c73c8c50f117 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -1086,17 +1086,8 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 
 asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 {
-#ifdef CONFIG_AUDITSYSCALL
-	if (unlikely(current->audit_context)) {
-		unsigned long tstate = regs->tstate;
-		int result = AUDITSC_SUCCESS;
+	audit_syscall_exit(regs);
 
-		if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
-			result = AUDITSC_FAILURE;
-
-		audit_syscall_exit(result, regs->u_regs[UREG_I0]);
-	}
-#endif
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->u_regs[UREG_G1]);
 
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index c9da32b0c707..2ccf25c42feb 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -175,8 +175,8 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit)
 					    UPT_SYSCALL_ARG2(regs),
 					    UPT_SYSCALL_ARG3(regs),
 					    UPT_SYSCALL_ARG4(regs));
-		else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)),
-					UPT_SYSCALL_RET(regs));
+		else
+			audit_syscall_exit(regs);
 	}
 
 	/* Fake a debug trap */
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3e274564f6bf..64ced0b8f8fd 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -14,6 +14,7 @@
 #include <asm/segment.h>
 #include <asm/irqflags.h>
 #include <linux/linkage.h>
+#include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -208,12 +209,11 @@ sysexit_from_sys_call:
 	TRACE_IRQS_ON
 	sti
 	movl %eax,%esi		/* second arg, syscall return value */
-	cmpl $0,%eax		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
-	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
-	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall return value */
+	call __audit_syscall_exit
+	movq RAX-ARGOFFSET(%rsp),%rax	/* reload syscall return value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	cli
 	TRACE_IRQS_OFF
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 22d0e21b4dd7..a22facf06f0e 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -42,6 +42,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/err.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
 #include <asm/errno.h>
@@ -466,11 +467,10 @@ sysexit_audit:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_ANY)
 	movl %eax,%edx		/* second arg, syscall return value */
-	cmpl $0,%eax		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%eax		/* zero-extend that */
-	inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
+	call __audit_syscall_exit
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a20e1cb9dc87..e51393dd93a3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
 #include <asm/paravirt.h>
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
+#include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -563,17 +564,16 @@ auditsys:
 	jmp system_call_fastpath
 
 	/*
-	 * Return fast path for syscall audit.  Call audit_syscall_exit()
+	 * Return fast path for syscall audit.  Call __audit_syscall_exit()
 	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
 	 * masked off.
 	 */
 sysret_audit:
 	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
-	cmpq $0,%rsi		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
-	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
+	call __audit_syscall_exit
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	jmp sysret_check
 #endif	/* CONFIG_AUDITSYSCALL */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 89a04c7b5bb6..8b0218758775 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1414,8 +1414,7 @@ void syscall_trace_leave(struct pt_regs *regs)
 {
 	bool step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->ax);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 863f8753ab0a..af17e1c966dc 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -335,9 +335,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	if (info->flags & VM86_SCREEN_BITMAP)
 		mark_screen_rdonly(tsk->mm);
 
-	/*call audit_syscall_exit since we do not exit via the normal paths */
+	/*call __audit_syscall_exit since we do not exit via the normal paths */
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(0), 0);
+		__audit_syscall_exit(1, 0);
 
 	__asm__ __volatile__(
 		"movl %0,%%esp\n\t"
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 711b1621747f..5ef9344a8b24 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -3,3 +3,8 @@
 #else
 #include "ptrace_64.h"
 #endif
+
+static inline long regs_return_value(struct uml_pt_regs *regs)
+{
+	return UPT_SYSCALL_RET(regs);
+}
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 6e1c533f9b46..3d65e4b3ba06 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,6 +26,7 @@
 
 #include <linux/types.h>
 #include <linux/elf-em.h>
+#include <linux/ptrace.h>
 
 /* The netlink messages for the audit system is divided into blocks:
  * 1000 - 1099 are for commanding the audit system
@@ -408,10 +409,6 @@ struct audit_field {
 	void				*lsm_rule;
 };
 
-#define AUDITSC_INVALID 0
-#define AUDITSC_SUCCESS 1
-#define AUDITSC_FAILURE 2
-#define AUDITSC_RESULT(x) ( ((long)(x))<0?AUDITSC_FAILURE:AUDITSC_SUCCESS )
 extern int __init audit_register_class(int class, unsigned *list);
 extern int audit_classify_syscall(int abi, unsigned syscall);
 extern int audit_classify_arch(int arch);
@@ -424,7 +421,7 @@ extern void audit_free(struct task_struct *task);
 extern void audit_syscall_entry(int arch,
 				int major, unsigned long a0, unsigned long a1,
 				unsigned long a2, unsigned long a3);
-extern void audit_syscall_exit(int failed, long return_code);
+extern void __audit_syscall_exit(int ret_success, long ret_value);
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct dentry *dentry);
@@ -438,6 +435,15 @@ static inline int audit_dummy_context(void)
 	void *p = current->audit_context;
 	return !p || *(int *)p;
 }
+static inline void audit_syscall_exit(void *pt_regs)
+{
+	if (unlikely(current->audit_context)) {
+		int success = is_syscall_success(pt_regs);
+		int return_code = regs_return_value(pt_regs);
+
+		__audit_syscall_exit(success, return_code);
+	}
+}
 static inline void audit_getname(const char *name)
 {
 	if (unlikely(!audit_dummy_context()))
@@ -551,12 +557,12 @@ static inline void audit_mmap_fd(int fd, int flags)
 
 extern int audit_n_rules;
 extern int audit_signals;
-#else
+#else /* CONFIG_AUDITSYSCALL */
 #define audit_finish_fork(t)
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
 #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0)
-#define audit_syscall_exit(f,r) do { ; } while (0)
+#define audit_syscall_exit(r) do { ; } while (0)
 #define audit_dummy_context() 1
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
@@ -587,7 +593,7 @@ extern int audit_signals;
 #define audit_ptrace(t) ((void)0)
 #define audit_n_rules 0
 #define audit_signals 0
-#endif
+#endif /* CONFIG_AUDITSYSCALL */
 
 #ifdef CONFIG_AUDIT
 /* These are defined in audit.c */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 800f113bea66..dd4cefa6519d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -112,6 +112,7 @@
 
 #include <linux/compiler.h>		/* For unlikely.  */
 #include <linux/sched.h>		/* For struct task_struct.  */
+#include <linux/err.h>			/* for IS_ERR_VALUE */
 
 
 extern long arch_ptrace(struct task_struct *child, long request,
@@ -265,6 +266,15 @@ static inline void ptrace_release_task(struct task_struct *task)
 #define force_successful_syscall_return() do { } while (0)
 #endif
 
+#ifndef is_syscall_success
+/*
+ * On most systems we can tell if a syscall is a success based on if the retval
+ * is an error value.  On some systems like ia64 and powerpc they have different
+ * indicators of success/failure and must define their own.
+ */
+#define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs))))
+#endif
+
 /*
  * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
  *
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e9bcb93800d8..3d2853808185 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -70,6 +70,11 @@
 
 #include "audit.h"
 
+/* flags stating the success for a syscall */
+#define AUDITSC_INVALID 0
+#define AUDITSC_SUCCESS 1
+#define AUDITSC_FAILURE 2
+
 /* AUDIT_NAMES is the number of slots we reserve in the audit_context
  * for saving names from getname().  If we get more names we will allocate
  * a name dynamically and also add those to the list anchored by names_list. */
@@ -1724,8 +1729,7 @@ void audit_finish_fork(struct task_struct *child)
 
 /**
  * audit_syscall_exit - deallocate audit context after a system call
- * @valid: success/failure flag
- * @return_code: syscall return value
+ * @pt_regs: syscall registers
  *
  * Tear down after system call.  If the audit context has been marked as
  * auditable (either because of the AUDIT_RECORD_CONTEXT state from
@@ -1733,13 +1737,17 @@ void audit_finish_fork(struct task_struct *child)
  * message), then write out the syscall information.  In call cases,
  * free the names stored from getname().
  */
-void audit_syscall_exit(int valid, long return_code)
+void __audit_syscall_exit(int success, long return_code)
 {
 	struct task_struct *tsk = current;
 	struct audit_context *context;
 
-	context = audit_get_context(tsk, valid, return_code);
+	if (success)
+		success = AUDITSC_SUCCESS;
+	else
+		success = AUDITSC_FAILURE;
 
+	context = audit_get_context(tsk, success, return_code);
 	if (likely(!context))
 		return;
 
-- 
cgit v1.2.3


From b05d8447e7821695bc2fa3359431f7a664232743 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:06 -0500
Subject: audit: inline audit_syscall_entry to reduce burden on archs

Every arch calls:

if (unlikely(current->audit_context))
	audit_syscall_entry()

which requires knowledge about audit (the existance of audit_context) in
the arch code.  Just do it all in static inline in audit.h so that arch's
can remain blissfully ignorant.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 arch/ia64/kernel/ptrace.c       |  9 +--------
 arch/microblaze/kernel/ptrace.c |  6 ++----
 arch/mips/kernel/ptrace.c       |  7 +++----
 arch/powerpc/kernel/ptrace.c    | 26 ++++++++++++--------------
 arch/s390/kernel/ptrace.c       | 11 +++++------
 arch/sh/kernel/ptrace_32.c      |  7 +++----
 arch/sh/kernel/ptrace_64.c      |  7 +++----
 arch/sparc/kernel/ptrace_64.c   | 17 ++++++++---------
 arch/um/kernel/ptrace.c         | 20 +++++++++-----------
 arch/x86/ia32/ia32entry.S       |  2 +-
 arch/x86/kernel/entry_32.S      |  2 +-
 arch/x86/kernel/entry_64.S      |  4 ++--
 arch/x86/kernel/ptrace.c        | 22 ++++++++++------------
 arch/xtensa/kernel/ptrace.c     |  3 +--
 include/linux/audit.h           | 13 ++++++++++---
 kernel/auditsc.c                |  2 +-
 16 files changed, 72 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 2c154088cce7..dad91661ddf9 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1246,15 +1246,8 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
 	if (test_thread_flag(TIF_RESTORE_RSE))
 		ia64_sync_krbs();
 
-	if (unlikely(current->audit_context)) {
-		long syscall;
-		int arch;
 
-		syscall = regs.r15;
-		arch = AUDIT_ARCH_IA64;
-
-		audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3);
-	}
+	audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3);
 
 	return 0;
 }
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index f564b1bfd386..6eb2aa927d89 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -147,10 +147,8 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 */
 		ret = -1L;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(EM_MICROBLAZE, regs->r12,
-				    regs->r5, regs->r6,
-				    regs->r7, regs->r8);
+	audit_syscall_entry(EM_MICROBLAZE, regs->r12, regs->r5, regs->r6,
+			    regs->r7, regs->r8);
 
 	return ret ?: regs->r12;
 }
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index ab0f1963a7bd..7786b608d932 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -560,10 +560,9 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 	}
 
 out:
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(audit_arch(), regs->regs[2],
-				    regs->regs[4], regs->regs[5],
-				    regs->regs[6], regs->regs[7]);
+	audit_syscall_entry(audit_arch(), regs->regs[2],
+			    regs->regs[4], regs->regs[5],
+			    regs->regs[6], regs->regs[7]);
 }
 
 /*
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 09d31c12a5e3..5b43325402bc 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1724,22 +1724,20 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->gpr[0]);
 
-	if (unlikely(current->audit_context)) {
 #ifdef CONFIG_PPC64
-		if (!is_32bit_task())
-			audit_syscall_entry(AUDIT_ARCH_PPC64,
-					    regs->gpr[0],
-					    regs->gpr[3], regs->gpr[4],
-					    regs->gpr[5], regs->gpr[6]);
-		else
+	if (!is_32bit_task())
+		audit_syscall_entry(AUDIT_ARCH_PPC64,
+				    regs->gpr[0],
+				    regs->gpr[3], regs->gpr[4],
+				    regs->gpr[5], regs->gpr[6]);
+	else
 #endif
-			audit_syscall_entry(AUDIT_ARCH_PPC,
-					    regs->gpr[0],
-					    regs->gpr[3] & 0xffffffff,
-					    regs->gpr[4] & 0xffffffff,
-					    regs->gpr[5] & 0xffffffff,
-					    regs->gpr[6] & 0xffffffff);
-	}
+		audit_syscall_entry(AUDIT_ARCH_PPC,
+				    regs->gpr[0],
+				    regs->gpr[3] & 0xffffffff,
+				    regs->gpr[4] & 0xffffffff,
+				    regs->gpr[5] & 0xffffffff,
+				    regs->gpr[6] & 0xffffffff);
 
 	return ret ?: regs->gpr[0];
 }
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index f52758600980..9d82ed4bcb27 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -740,12 +740,11 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->gprs[2]);
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(is_compat_task() ?
-					AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
-				    regs->gprs[2], regs->orig_gpr2,
-				    regs->gprs[3], regs->gprs[4],
-				    regs->gprs[5]);
+	audit_syscall_entry(is_compat_task() ?
+				AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
+			    regs->gprs[2], regs->orig_gpr2,
+			    regs->gprs[3], regs->gprs[4],
+			    regs->gprs[5]);
 	return ret ?: regs->gprs[2];
 }
 
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index c0b5c179d27b..a3e651563763 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -518,10 +518,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[0]);
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(audit_arch(), regs->regs[3],
-				    regs->regs[4], regs->regs[5],
-				    regs->regs[6], regs->regs[7]);
+	audit_syscall_entry(audit_arch(), regs->regs[3],
+			    regs->regs[4], regs->regs[5],
+			    regs->regs[6], regs->regs[7]);
 
 	return ret ?: regs->regs[0];
 }
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index ba720d686435..3d0080b5c976 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -536,10 +536,9 @@ asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[9]);
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(audit_arch(), regs->regs[1],
-				    regs->regs[2], regs->regs[3],
-				    regs->regs[4], regs->regs[5]);
+	audit_syscall_entry(audit_arch(), regs->regs[1],
+			    regs->regs[2], regs->regs[3],
+			    regs->regs[4], regs->regs[5]);
 
 	return ret ?: regs->regs[9];
 }
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index c73c8c50f117..9388844cd88c 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -1071,15 +1071,14 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->u_regs[UREG_G1]);
 
-	if (unlikely(current->audit_context) && !ret)
-		audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
-				     AUDIT_ARCH_SPARC :
-				     AUDIT_ARCH_SPARC64),
-				    regs->u_regs[UREG_G1],
-				    regs->u_regs[UREG_I0],
-				    regs->u_regs[UREG_I1],
-				    regs->u_regs[UREG_I2],
-				    regs->u_regs[UREG_I3]);
+	audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
+			     AUDIT_ARCH_SPARC :
+			     AUDIT_ARCH_SPARC64),
+			    regs->u_regs[UREG_G1],
+			    regs->u_regs[UREG_I0],
+			    regs->u_regs[UREG_I1],
+			    regs->u_regs[UREG_I2],
+			    regs->u_regs[UREG_I3]);
 
 	return ret;
 }
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 2ccf25c42feb..06b190390505 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -167,17 +167,15 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit)
 	int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit;
 	int tracesysgood;
 
-	if (unlikely(current->audit_context)) {
-		if (!entryexit)
-			audit_syscall_entry(HOST_AUDIT_ARCH,
-					    UPT_SYSCALL_NR(regs),
-					    UPT_SYSCALL_ARG1(regs),
-					    UPT_SYSCALL_ARG2(regs),
-					    UPT_SYSCALL_ARG3(regs),
-					    UPT_SYSCALL_ARG4(regs));
-		else
-			audit_syscall_exit(regs);
-	}
+	if (!entryexit)
+		audit_syscall_entry(HOST_AUDIT_ARCH,
+				    UPT_SYSCALL_NR(regs),
+				    UPT_SYSCALL_ARG1(regs),
+				    UPT_SYSCALL_ARG2(regs),
+				    UPT_SYSCALL_ARG3(regs),
+				    UPT_SYSCALL_ARG4(regs));
+	else
+		audit_syscall_exit(regs);
 
 	/* Fake a debug trap */
 	if (is_singlestep)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 025f0f01d254..cecfd9a8f734 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -192,7 +192,7 @@ sysexit_from_sys_call:
 	movl %ebx,%edx			/* 3rd arg: 1st syscall arg */
 	movl %eax,%esi			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_I386,%edi	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index a22facf06f0e..1ccd742eba1b 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -456,7 +456,7 @@ sysenter_audit:
 	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
 	movl %eax,%edx			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	pushl_cfi %ebx
 	movl PT_EAX(%esp),%eax		/* reload syscall number */
 	jmp sysenter_do_call
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e51393dd93a3..1ca66b650123 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -549,7 +549,7 @@ badsys:
 #ifdef CONFIG_AUDITSYSCALL
 	/*
 	 * Fast path for syscall audit without full syscall trace.
-	 * We just call audit_syscall_entry() directly, and then
+	 * We just call __audit_syscall_entry() directly, and then
 	 * jump back to the normal fast path.
 	 */
 auditsys:
@@ -559,7 +559,7 @@ auditsys:
 	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
 	movq %rax,%rsi			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	LOAD_ARGS 0		/* reload call-clobbered registers */
 	jmp system_call_fastpath
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 8b0218758775..50267386b766 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1392,20 +1392,18 @@ long syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->orig_ax);
 
-	if (unlikely(current->audit_context)) {
-		if (IS_IA32)
-			audit_syscall_entry(AUDIT_ARCH_I386,
-					    regs->orig_ax,
-					    regs->bx, regs->cx,
-					    regs->dx, regs->si);
+	if (IS_IA32)
+		audit_syscall_entry(AUDIT_ARCH_I386,
+				    regs->orig_ax,
+				    regs->bx, regs->cx,
+				    regs->dx, regs->si);
 #ifdef CONFIG_X86_64
-		else
-			audit_syscall_entry(AUDIT_ARCH_X86_64,
-					    regs->orig_ax,
-					    regs->di, regs->si,
-					    regs->dx, regs->r10);
+	else
+		audit_syscall_entry(AUDIT_ARCH_X86_64,
+				    regs->orig_ax,
+				    regs->di, regs->si,
+				    regs->dx, regs->r10);
 #endif
-	}
 
 	return ret ?: regs->orig_ax;
 }
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index a0d042aa2967..2dff698ab02e 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -334,8 +334,7 @@ void do_syscall_trace_enter(struct pt_regs *regs)
 		do_syscall_trace();
 
 #if 0
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
+	audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
 #endif
 }
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3d65e4b3ba06..f56ce2669b83 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -418,9 +418,9 @@ extern int audit_classify_arch(int arch);
 extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
 extern void audit_free(struct task_struct *task);
-extern void audit_syscall_entry(int arch,
-				int major, unsigned long a0, unsigned long a1,
-				unsigned long a2, unsigned long a3);
+extern void __audit_syscall_entry(int arch,
+				  int major, unsigned long a0, unsigned long a1,
+				  unsigned long a2, unsigned long a3);
 extern void __audit_syscall_exit(int ret_success, long ret_value);
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
@@ -435,6 +435,13 @@ static inline int audit_dummy_context(void)
 	void *p = current->audit_context;
 	return !p || *(int *)p;
 }
+static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
+				       unsigned long a1, unsigned long a2,
+				       unsigned long a3)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_syscall_entry(arch, major, a0, a1, a2, a3);
+}
 static inline void audit_syscall_exit(void *pt_regs)
 {
 	if (unlikely(current->audit_context)) {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3d2853808185..b408100dd6ef 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1632,7 +1632,7 @@ void audit_free(struct task_struct *tsk)
  * will only be written if another part of the kernel requests that it
  * be written).
  */
-void audit_syscall_entry(int arch, int major,
+void __audit_syscall_entry(int arch, int major,
 			 unsigned long a1, unsigned long a2,
 			 unsigned long a3, unsigned long a4)
 {
-- 
cgit v1.2.3


From 07c49417877f8658a6aa0ad9b4e21e4fd4df11b6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: inline checks for not needing to collect aux records

A number of audit hooks make function calls before they determine that
auxilary records do not need to be collected.  Do those checks as static
inlines since the most common case is going to be that records are not
needed and we can skip the function call overhead.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 23 ++++++++++++++++++++---
 kernel/auditsc.c      | 15 +++------------
 2 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index f56ce2669b83..cf16faff6b8a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -489,9 +489,9 @@ extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern void audit_log_task_context(struct audit_buffer *ab);
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
-extern int audit_bprm(struct linux_binprm *bprm);
-extern void audit_socketcall(int nargs, unsigned long *args);
-extern int audit_sockaddr(int len, void *addr);
+extern int __audit_bprm(struct linux_binprm *bprm);
+extern void __audit_socketcall(int nargs, unsigned long *args);
+extern int __audit_sockaddr(int len, void *addr);
 extern void __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
 extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr);
@@ -519,6 +519,23 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
+static inline int audit_bprm(struct linux_binprm *bprm)
+{
+	if (unlikely(!audit_dummy_context()))
+		return __audit_bprm(bprm);
+	return 0;
+}
+static inline void audit_socketcall(int nargs, unsigned long *args)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_socketcall(nargs, args);
+}
+static inline int audit_sockaddr(int len, void *addr)
+{
+	if (unlikely(!audit_dummy_context()))
+		return __audit_sockaddr(len, addr);
+	return 0;
+}
 static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
 {
 	if (unlikely(!audit_dummy_context()))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index d7382c2aaa9e..e1062f66b01b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2309,14 +2309,11 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
 	context->ipc.has_perm = 1;
 }
 
-int audit_bprm(struct linux_binprm *bprm)
+int __audit_bprm(struct linux_binprm *bprm)
 {
 	struct audit_aux_data_execve *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!audit_enabled || !context || context->dummy))
-		return 0;
-
 	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
 	if (!ax)
 		return -ENOMEM;
@@ -2337,13 +2334,10 @@ int audit_bprm(struct linux_binprm *bprm)
  * @args: args array
  *
  */
-void audit_socketcall(int nargs, unsigned long *args)
+void __audit_socketcall(int nargs, unsigned long *args)
 {
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context || context->dummy))
-		return;
-
 	context->type = AUDIT_SOCKETCALL;
 	context->socketcall.nargs = nargs;
 	memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long));
@@ -2369,13 +2363,10 @@ void __audit_fd_pair(int fd1, int fd2)
  *
  * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_sockaddr(int len, void *a)
+int __audit_sockaddr(int len, void *a)
 {
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context || context->dummy))
-		return 0;
-
 	if (!context->sockaddr) {
 		void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL);
 		if (!p)
-- 
cgit v1.2.3


From 38cdce53daa0408a61fe6d86fe48f31515c9b840 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: drop audit_set_macxattr as it doesn't do anything

unused.  deleted.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index cf16faff6b8a..4f1efe3e8616 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -493,7 +493,6 @@ extern int __audit_bprm(struct linux_binprm *bprm);
 extern void __audit_socketcall(int nargs, unsigned long *args);
 extern int __audit_sockaddr(int len, void *addr);
 extern void __audit_fd_pair(int fd1, int fd2);
-extern int audit_set_macxattr(const char *name);
 extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr);
 extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
 extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
@@ -606,7 +605,6 @@ extern int audit_signals;
 #define audit_socketcall(n,a) ((void)0)
 #define audit_fd_pair(n,a) ((void)0)
 #define audit_sockaddr(len, addr) ({ 0; })
-#define audit_set_macxattr(n) do { ; } while (0)
 #define audit_mq_open(o,m,a) ((void)0)
 #define audit_mq_sendrecv(d,l,p,t) ((void)0)
 #define audit_mq_notify(d,n) ((void)0)
-- 
cgit v1.2.3


From a4ff8dba7d8ce5ceb43fb27df66292251cc73bdc Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: inline audit_free to simplify the look of generic code

make the conditional a static inline instead of doing it in generic code.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 7 ++++++-
 kernel/auditsc.c      | 2 +-
 kernel/exit.c         | 3 +--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 4f1efe3e8616..8eb8bda749b3 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -417,7 +417,7 @@ extern int audit_classify_arch(int arch);
 				/* Public API */
 extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
-extern void audit_free(struct task_struct *task);
+extern void __audit_free(struct task_struct *task);
 extern void __audit_syscall_entry(int arch,
 				  int major, unsigned long a0, unsigned long a1,
 				  unsigned long a2, unsigned long a3);
@@ -435,6 +435,11 @@ static inline int audit_dummy_context(void)
 	void *p = current->audit_context;
 	return !p || *(int *)p;
 }
+static inline void audit_free(struct task_struct *task)
+{
+	if (unlikely(task->audit_context))
+		__audit_free(task);
+}
 static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e1062f66b01b..7aaeb38b262a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1594,7 +1594,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
  *
  * Called from copy_process and do_exit
  */
-void audit_free(struct task_struct *tsk)
+void __audit_free(struct task_struct *tsk)
 {
 	struct audit_context *context;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 94ed6e20bb53..88dcbbc446f7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -964,8 +964,7 @@ NORET_TYPE void do_exit(long code)
 	acct_collect(code, group_dead);
 	if (group_dead)
 		tty_audit_exit();
-	if (unlikely(tsk->audit_context))
-		audit_free(tsk);
+	audit_free(tsk);
 
 	tsk->exit_code = code;
 	taskstats_exit(tsk, group_dead);
-- 
cgit v1.2.3


From 6422e78de6880c66a82af512d9bd0c85eb62e661 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: remove audit_finish_fork as it can't be called

Audit entry,always rules are not allowed and are automatically changed in
exit,always rules in userspace.  The kernel refuses to load such rules.

Thus a task in the middle of a syscall (and thus in audit_finish_fork())
can only be in one of two states: AUDIT_BUILD_CONTEXT or AUDIT_DISABLED.
Since the current task cannot be in AUDIT_RECORD_CONTEXT we aren't every
going to actually use the code in audit_finish_fork() since it will
return without doing anything.  Thus drop the code.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  2 --
 kernel/auditsc.c      | 20 --------------------
 kernel/fork.c         |  2 --
 3 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8eb8bda749b3..67b66c37a254 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -415,7 +415,6 @@ extern int audit_classify_arch(int arch);
 #ifdef CONFIG_AUDITSYSCALL
 /* These are defined in auditsc.c */
 				/* Public API */
-extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
 extern void __audit_free(struct task_struct *task);
 extern void __audit_syscall_entry(int arch,
@@ -586,7 +585,6 @@ static inline void audit_mmap_fd(int fd, int flags)
 extern int audit_n_rules;
 extern int audit_signals;
 #else /* CONFIG_AUDITSYSCALL */
-#define audit_finish_fork(t)
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
 #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7aaeb38b262a..4d8920f5ab88 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1707,26 +1707,6 @@ void __audit_syscall_entry(int arch, int major,
 	context->ppid       = 0;
 }
 
-void audit_finish_fork(struct task_struct *child)
-{
-	struct audit_context *ctx = current->audit_context;
-	struct audit_context *p = child->audit_context;
-	if (!p || !ctx)
-		return;
-	if (!ctx->in_syscall || ctx->current_state != AUDIT_RECORD_CONTEXT)
-		return;
-	p->arch = ctx->arch;
-	p->major = ctx->major;
-	memcpy(p->argv, ctx->argv, sizeof(ctx->argv));
-	p->ctime = ctx->ctime;
-	p->dummy = ctx->dummy;
-	p->in_syscall = ctx->in_syscall;
-	p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL);
-	p->ppid = current->pid;
-	p->prio = ctx->prio;
-	p->current_state = ctx->current_state;
-}
-
 /**
  * audit_syscall_exit - deallocate audit context after a system call
  * @pt_regs: syscall registers
diff --git a/kernel/fork.c b/kernel/fork.c
index 443f5125f11e..c1e5c21f48c1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1525,8 +1525,6 @@ long do_fork(unsigned long clone_flags,
 			init_completion(&vfork);
 		}
 
-		audit_finish_fork(p);
-
 		/*
 		 * We set PF_STARTING at creation in case tracing wants to
 		 * use this to distinguish a fully live task from one that
-- 
cgit v1.2.3


From efaffd6e4417860c67576ac760dd6e8bbd15f006 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: allow matching on obj_uid

Allow syscall exit filter matching based on the uid of the owner of an
inode used in a syscall.  aka:

auditctl -a always,exit -S open -F obj_uid=0 -F perm=wa

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  1 +
 kernel/auditfilter.c  |  1 +
 kernel/auditsc.c      | 12 ++++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 67b66c37a254..55cb3daaf474 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -223,6 +223,7 @@
 #define AUDIT_PERM	106
 #define AUDIT_DIR	107
 #define AUDIT_FILETYPE	108
+#define AUDIT_OBJ_UID	109
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 903caa269b5c..13e997423dcd 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -461,6 +461,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_ARG1:
 		case AUDIT_ARG2:
 		case AUDIT_ARG3:
+		case AUDIT_OBJ_UID:
 			break;
 		case AUDIT_ARCH:
 			entry->rule.arch_f = f;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4d8920f5ab88..5cf3ecc01517 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -586,6 +586,18 @@ static int audit_filter_rules(struct task_struct *tsk,
 				}
 			}
 			break;
+		case AUDIT_OBJ_UID:
+			if (name) {
+				result = audit_comparator(name->uid, f->op, f->val);
+			} else if (ctx) {
+				list_for_each_entry(n, &ctx->names_list, list) {
+					if (audit_comparator(n->uid, f->op, f->val)) {
+						++result;
+						break;
+					}
+				}
+			}
+			break;
 		case AUDIT_WATCH:
 			if (name)
 				result = audit_watch_compare(rule->watch, name->ino, name->dev);
-- 
cgit v1.2.3


From 54d3218b31aee5bc9c859ae60fbde933d922448b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: allow audit matching on inode gid

Much like the ability to filter audit on the uid of an inode collected, we
should be able to filter on the gid of the inode.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  1 +
 kernel/auditfilter.c  |  1 +
 kernel/auditsc.c      | 12 ++++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 55cb3daaf474..e36aa37c88af 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -224,6 +224,7 @@
 #define AUDIT_DIR	107
 #define AUDIT_FILETYPE	108
 #define AUDIT_OBJ_UID	109
+#define AUDIT_OBJ_GID	110
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 13e997423dcd..f10605c787e6 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -462,6 +462,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_ARG2:
 		case AUDIT_ARG3:
 		case AUDIT_OBJ_UID:
+		case AUDIT_OBJ_GID:
 			break;
 		case AUDIT_ARCH:
 			entry->rule.arch_f = f;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 5cf3ecc01517..87b375fb12ff 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -598,6 +598,18 @@ static int audit_filter_rules(struct task_struct *tsk,
 				}
 			}
 			break;
+		case AUDIT_OBJ_GID:
+			if (name) {
+				result = audit_comparator(name->gid, f->op, f->val);
+			} else if (ctx) {
+				list_for_each_entry(n, &ctx->names_list, list) {
+					if (audit_comparator(n->gid, f->op, f->val)) {
+						++result;
+						break;
+					}
+				}
+			}
+			break;
 		case AUDIT_WATCH:
 			if (name)
 				result = audit_watch_compare(rule->watch, name->ino, name->dev);
-- 
cgit v1.2.3


From 0a300be6d5be8f66cd96609334710c268d0bfdce Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:08 -0500
Subject: audit: remove task argument to audit_set_loginuid

The function always deals with current.  Don't expose an option
pretending one can use it for something.  You can't.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/proc/base.c        | 2 +-
 include/linux/audit.h | 2 +-
 kernel/auditsc.c      | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8173dfd89cb2..e3cbebbabebd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1228,7 +1228,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
 		goto out_free_page;
 
 	}
-	length = audit_set_loginuid(current, loginuid);
+	length = audit_set_loginuid(loginuid);
 	if (likely(length == 0))
 		length = count;
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index e36aa37c88af..7cbd6fe41573 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -489,7 +489,7 @@ static inline void audit_ptrace(struct task_struct *t)
 extern unsigned int audit_serial(void);
 extern int auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec *t, unsigned int *serial);
-extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
+extern int  audit_set_loginuid(uid_t loginuid);
 #define audit_get_loginuid(t) ((t)->loginuid)
 #define audit_get_sessionid(t) ((t)->sessionid)
 extern void audit_log_task_context(struct audit_buffer *ab);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 87b375fb12ff..9d6dd7d869c0 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2163,16 +2163,16 @@ int auditsc_get_stamp(struct audit_context *ctx,
 static atomic_t session_id = ATOMIC_INIT(0);
 
 /**
- * audit_set_loginuid - set a task's audit_context loginuid
- * @task: task whose audit context is being modified
+ * audit_set_loginuid - set current task's audit_context loginuid
  * @loginuid: loginuid value
  *
  * Returns 0.
  *
  * Called (set) from fs/proc/base.c::proc_loginuid_write().
  */
-int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
+int audit_set_loginuid(uid_t loginuid)
 {
+	struct task_struct *task = current;
 	unsigned int sessionid = atomic_inc_return(&session_id);
 	struct audit_context *context = task->audit_context;
 
-- 
cgit v1.2.3


From 02d86a568c6d2d335256864451ac8ce781bc5652 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:08 -0500
Subject: audit: allow interfield comparison in audit rules

We wish to be able to audit when a uid=500 task accesses a file which is
uid=0.  Or vice versa.  This patch introduces a new audit filter type
AUDIT_FIELD_COMPARE which takes as an 'enum' which indicates which fields
should be compared.  At this point we only define the task->uid vs
inode->uid, but other comparisons can be added.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  4 ++++
 kernel/auditfilter.c  |  5 ++++-
 kernel/auditsc.c      | 30 +++++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 7cbd6fe41573..838e05fc0582 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -182,7 +182,10 @@
  * AUDIT_UNUSED_BITS is updated if need be. */
 #define AUDIT_UNUSED_BITS	0x07FFFC00
 
+/* AUDIT_FIELD_COMPARE rule list */
+#define AUDIT_COMPARE_UID_TO_OBJ_UID	1
 
+#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_UID_TO_OBJ_UID
 /* Rule fields */
 				/* These are useful when checking the
 				 * task structure at task creation time
@@ -225,6 +228,7 @@
 #define AUDIT_FILETYPE	108
 #define AUDIT_OBJ_UID	109
 #define AUDIT_OBJ_GID	110
+#define AUDIT_FIELD_COMPARE	111
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index f10605c787e6..a6c3f1abd206 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -526,7 +526,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 				goto exit_free;
 			break;
 		case AUDIT_FILTERKEY:
-			err = -EINVAL;
 			if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
 				goto exit_free;
 			str = audit_unpack_string(&bufp, &remain, f->val);
@@ -543,6 +542,10 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 			if (f->val & ~S_IFMT)
 				goto exit_free;
 			break;
+		case AUDIT_FIELD_COMPARE:
+			if (f->val > AUDIT_MAX_FIELD_COMPARE)
+				goto exit_free;
+			break;
 		default:
 			goto exit_free;
 		}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9161e70a4379..8fb2c8e6d624 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -463,6 +463,32 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree)
 	return 0;
 }
 
+static int audit_field_compare(struct task_struct *tsk,
+			       const struct cred *cred,
+			       struct audit_field *f,
+			       struct audit_context *ctx,
+			       struct audit_names *name)
+{
+	struct audit_names *n;
+
+	switch (f->val) {
+	case AUDIT_COMPARE_UID_TO_OBJ_UID:
+		if (name) {
+			return audit_comparator(cred->uid, f->op, name->uid);
+		} else if (ctx) {
+			list_for_each_entry(n, &ctx->names_list, list) {
+				if (audit_comparator(cred->uid, f->op, n->uid))
+					return 1;
+			}
+		}
+		break;
+	default:
+		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
+		return 0;
+	}
+	return 0;
+}
+
 /* Determine if any context name data matches a rule's watch data */
 /* Compare a task_struct with an audit_rule.  Return 1 on match, 0
  * otherwise.
@@ -693,8 +719,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 		case AUDIT_FILETYPE:
 			result = audit_match_filetype(ctx, f->val);
 			break;
+		case AUDIT_FIELD_COMPARE:
+			result = audit_field_compare(tsk, cred, f, ctx, name);
+			break;
 		}
-
 		if (!result)
 			return 0;
 	}
-- 
cgit v1.2.3


From c9fe685f7a17a0ee8bf3fbe51e40b1c8b8e65896 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:08 -0500
Subject: audit: allow interfield comparison between gid and ogid

Allow audit rules to compare the gid of the running task to the gid of the
inode in question.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 3 ++-
 kernel/auditsc.c      | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 838e05fc0582..fffbc2176ee1 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -184,8 +184,9 @@
 
 /* AUDIT_FIELD_COMPARE rule list */
 #define AUDIT_COMPARE_UID_TO_OBJ_UID	1
+#define AUDIT_COMPARE_GID_TO_OBJ_GID	2
 
-#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_UID_TO_OBJ_UID
+#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_GID_TO_OBJ_GID
 /* Rule fields */
 				/* These are useful when checking the
 				 * task structure at task creation time
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b12cc32fe377..861c7b9c565a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -474,6 +474,8 @@ static int audit_compare_id(uid_t uid1,
 	uid_t uid2;
 	int rc;
 
+	BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t));
+
 	if (name) {
 		addr = (unsigned long)name;
 		addr += name_offset;
@@ -510,6 +512,10 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_id(cred->uid,
 					name, offsetof(struct audit_names, uid),
 					f, ctx);
+	case AUDIT_COMPARE_GID_TO_OBJ_GID:
+		return audit_compare_id(cred->gid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
 	default:
 		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
 		return 0;
-- 
cgit v1.2.3


From 4a6633ed08af5ba67790b4d1adcdeb8ceb55677e Mon Sep 17 00:00:00 2001
From: Peter Moody <pmoody@google.com>
Date: Tue, 13 Dec 2011 16:17:51 -0800
Subject: audit: implement all object interfield comparisons

This completes the matrix of interfield comparisons between uid/gid
information for the current task and the uid/gid information for inodes.
aka I can audit based on differences between the euid of the process and
the uid of fs objects.

Signed-off-by: Peter Moody <pmoody@google.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 10 +++++++++-
 kernel/auditsc.c      | 29 +++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index fffbc2176ee1..67113cb4bc15 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -185,8 +185,16 @@
 /* AUDIT_FIELD_COMPARE rule list */
 #define AUDIT_COMPARE_UID_TO_OBJ_UID	1
 #define AUDIT_COMPARE_GID_TO_OBJ_GID	2
+#define AUDIT_COMPARE_EUID_TO_OBJ_UID	3
+#define AUDIT_COMPARE_EGID_TO_OBJ_GID	4
+#define AUDIT_COMPARE_AUID_TO_OBJ_UID	5
+#define AUDIT_COMPARE_SUID_TO_OBJ_UID	6
+#define AUDIT_COMPARE_SGID_TO_OBJ_GID	7
+#define AUDIT_COMPARE_FSUID_TO_OBJ_UID	8
+#define AUDIT_COMPARE_FSGID_TO_OBJ_GID	9
+
+#define AUDIT_MAX_FIELD_COMPARE		AUDIT_COMPARE_FSGID_TO_OBJ_GID
 
-#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_GID_TO_OBJ_GID
 /* Rule fields */
 				/* These are useful when checking the
 				 * task structure at task creation time
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 861c7b9c565a..b8cee462b99e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -508,6 +508,7 @@ static int audit_field_compare(struct task_struct *tsk,
 			       struct audit_names *name)
 {
 	switch (f->val) {
+	/* process to file object comparisons */
 	case AUDIT_COMPARE_UID_TO_OBJ_UID:
 		return audit_compare_id(cred->uid,
 					name, offsetof(struct audit_names, uid),
@@ -516,6 +517,34 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_id(cred->gid,
 					name, offsetof(struct audit_names, gid),
 					f, ctx);
+	case AUDIT_COMPARE_EUID_TO_OBJ_UID:
+		return audit_compare_id(cred->euid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_EGID_TO_OBJ_GID:
+		return audit_compare_id(cred->egid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
+	case AUDIT_COMPARE_AUID_TO_OBJ_UID:
+		return audit_compare_id(tsk->loginuid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_SUID_TO_OBJ_UID:
+		return audit_compare_id(cred->suid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_SGID_TO_OBJ_GID:
+		return audit_compare_id(cred->sgid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
+	case AUDIT_COMPARE_FSUID_TO_OBJ_UID:
+		return audit_compare_id(cred->fsuid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_FSGID_TO_OBJ_GID:
+		return audit_compare_id(cred->fsgid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
 	default:
 		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
 		return 0;
-- 
cgit v1.2.3


From 10d68360871657204885371cdf2594412675d2f9 Mon Sep 17 00:00:00 2001
From: Peter Moody <pmoody@google.com>
Date: Wed, 4 Jan 2012 15:24:31 -0500
Subject: audit: comparison on interprocess fields

This allows audit to specify rules in which we compare two fields of a
process.  Such as is the running process uid != to the running process
euid?

Signed-off-by: Peter Moody <pmoody@google.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 24 +++++++++++++++++++++++-
 kernel/auditsc.c      | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 67113cb4bc15..9ff7a2c48b50 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -193,7 +193,29 @@
 #define AUDIT_COMPARE_FSUID_TO_OBJ_UID	8
 #define AUDIT_COMPARE_FSGID_TO_OBJ_GID	9
 
-#define AUDIT_MAX_FIELD_COMPARE		AUDIT_COMPARE_FSGID_TO_OBJ_GID
+#define AUDIT_COMPARE_UID_TO_AUID	10
+#define AUDIT_COMPARE_UID_TO_EUID	11
+#define AUDIT_COMPARE_UID_TO_FSUID	12
+#define AUDIT_COMPARE_UID_TO_SUID	13
+
+#define AUDIT_COMPARE_AUID_TO_FSUID	14
+#define AUDIT_COMPARE_AUID_TO_SUID	15
+#define AUDIT_COMPARE_AUID_TO_EUID	16
+
+#define AUDIT_COMPARE_EUID_TO_SUID	17
+#define AUDIT_COMPARE_EUID_TO_FSUID	18
+
+#define AUDIT_COMPARE_SUID_TO_FSUID	19
+
+#define AUDIT_COMPARE_GID_TO_EGID	20
+#define AUDIT_COMPARE_GID_TO_FSGID	21
+#define AUDIT_COMPARE_GID_TO_SGID	22
+
+#define AUDIT_COMPARE_EGID_TO_FSGID	23
+#define AUDIT_COMPARE_EGID_TO_SGID	24
+#define AUDIT_COMPARE_SGID_TO_FSGID	25
+
+#define AUDIT_MAX_FIELD_COMPARE		AUDIT_COMPARE_SGID_TO_FSGID
 
 /* Rule fields */
 				/* These are useful when checking the
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b8cee462b99e..593237e3654d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -545,6 +545,45 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_id(cred->fsgid,
 					name, offsetof(struct audit_names, gid),
 					f, ctx);
+	/* uid comparisons */
+	case AUDIT_COMPARE_UID_TO_AUID:
+		return audit_comparator(cred->uid, f->op, tsk->loginuid);
+	case AUDIT_COMPARE_UID_TO_EUID:
+		return audit_comparator(cred->uid, f->op, cred->euid);
+	case AUDIT_COMPARE_UID_TO_SUID:
+		return audit_comparator(cred->uid, f->op, cred->suid);
+	case AUDIT_COMPARE_UID_TO_FSUID:
+		return audit_comparator(cred->uid, f->op, cred->fsuid);
+	/* auid comparisons */
+	case AUDIT_COMPARE_AUID_TO_EUID:
+		return audit_comparator(tsk->loginuid, f->op, cred->euid);
+	case AUDIT_COMPARE_AUID_TO_SUID:
+		return audit_comparator(tsk->loginuid, f->op, cred->suid);
+	case AUDIT_COMPARE_AUID_TO_FSUID:
+		return audit_comparator(tsk->loginuid, f->op, cred->fsuid);
+	/* euid comparisons */
+	case AUDIT_COMPARE_EUID_TO_SUID:
+		return audit_comparator(cred->euid, f->op, cred->suid);
+	case AUDIT_COMPARE_EUID_TO_FSUID:
+		return audit_comparator(cred->euid, f->op, cred->fsuid);
+	/* suid comparisons */
+	case AUDIT_COMPARE_SUID_TO_FSUID:
+		return audit_comparator(cred->suid, f->op, cred->fsuid);
+	/* gid comparisons */
+	case AUDIT_COMPARE_GID_TO_EGID:
+		return audit_comparator(cred->gid, f->op, cred->egid);
+	case AUDIT_COMPARE_GID_TO_SGID:
+		return audit_comparator(cred->gid, f->op, cred->sgid);
+	case AUDIT_COMPARE_GID_TO_FSGID:
+		return audit_comparator(cred->gid, f->op, cred->fsgid);
+	/* egid comparisons */
+	case AUDIT_COMPARE_EGID_TO_SGID:
+		return audit_comparator(cred->egid, f->op, cred->sgid);
+	case AUDIT_COMPARE_EGID_TO_FSGID:
+		return audit_comparator(cred->egid, f->op, cred->fsgid);
+	/* sgid comparison */
+	case AUDIT_COMPARE_SGID_TO_FSGID:
+		return audit_comparator(cred->sgid, f->op, cred->fsgid);
 	default:
 		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
 		return 0;
-- 
cgit v1.2.3


From 67175b855bfd6ed95ffeff95532173c07de6432d Mon Sep 17 00:00:00 2001
From: James Bottomley <jbottomley@parallels.com>
Date: Tue, 17 Jan 2012 21:14:05 +0000
Subject: Fix compile breakage with kref.h

This set of build failures just started appearing on parisc:

  In file included from drivers/input/serio/serio_raw.c:12:
  include/linux/kref.h: In function 'kref_get':
  include/linux/kref.h:40: error: 'TAINT_WARN' undeclared (first use in this function)
  include/linux/kref.h:40: error: (Each undeclared identifier is reported only once
  include/linux/kref.h:40: error: for each function it appears in.)
  include/linux/kref.h: In function 'kref_sub':
  include/linux/kref.h:65: error: 'TAINT_WARN' undeclared (first use in this function)

It happens because TAINT_WARN is defined in kernel.h and this particular
compile doesn't seem to include it (no idea why it's just manifesting ..
probably some #include file untangling exposed it).

Fix by adding

  #include <linux/kernel.h>

to linux/kref.h

Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kref.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index abc0120b09b7..9c07dcebded7 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -17,6 +17,7 @@
 
 #include <linux/bug.h>
 #include <linux/atomic.h>
+#include <linux/kernel.h>
 
 struct kref {
 	atomic_t refcount;
-- 
cgit v1.2.3


From ee0b31a25a010116f44fca6c96f4516d417793dd Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 17 Jan 2012 20:39:51 +0000
Subject: keys: fix trusted/encrypted keys sparse rcu_assign_pointer messages

Define rcu_assign_keypointer(), which uses the key payload.rcudata instead
of payload.data, to resolve the CONFIG_SPARSE_RCU_POINTER message:
"incompatible types in comparison expression (different address spaces)"

Replace the rcu_assign_pointer() calls in encrypted/trusted keys with
rcu_assign_keypointer().

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key.h                              | 3 +++
 security/keys/encrypted-keys/encrypted.c         | 4 ++--
 security/keys/encrypted-keys/masterkey_trusted.c | 2 ++
 security/keys/trusted.c                          | 4 ++--
 4 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 183a6af7715d..bfc014c57351 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -293,6 +293,9 @@ static inline bool key_is_instantiated(const struct key *key)
 	(rcu_dereference_protected((KEY)->payload.rcudata,		\
 				   rwsem_is_locked(&((struct key *)(KEY))->sem)))
 
+#define rcu_assign_keypointer(KEY, PAYLOAD)				\
+	(rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD))
+
 #ifdef CONFIG_SYSCTL
 extern ctl_table key_sysctls[];
 #endif
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 41144f71d615..d91efb6901e9 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -810,7 +810,7 @@ static int encrypted_instantiate(struct key *key, const void *data,
 		goto out;
 	}
 
-	rcu_assign_pointer(key->payload.data, epayload);
+	rcu_assign_keypointer(key, epayload);
 out:
 	kfree(datablob);
 	return ret;
@@ -874,7 +874,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen)
 	memcpy(new_epayload->payload_data, epayload->payload_data,
 	       epayload->payload_datalen);
 
-	rcu_assign_pointer(key->payload.data, new_epayload);
+	rcu_assign_keypointer(key, new_epayload);
 	call_rcu(&epayload->rcu, encrypted_rcu_free);
 out:
 	kfree(buf);
diff --git a/security/keys/encrypted-keys/masterkey_trusted.c b/security/keys/encrypted-keys/masterkey_trusted.c
index df87272e3f51..8c16c3e472e7 100644
--- a/security/keys/encrypted-keys/masterkey_trusted.c
+++ b/security/keys/encrypted-keys/masterkey_trusted.c
@@ -18,6 +18,8 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <keys/trusted-type.h>
+#include <keys/encrypted-type.h>
+#include "encrypted.h"
 
 /*
  * request_trusted_key - request the trusted key
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 0ed5fdf238a2..2d5d041f2049 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -993,7 +993,7 @@ out:
 	kfree(datablob);
 	kfree(options);
 	if (!ret)
-		rcu_assign_pointer(key->payload.data, payload);
+		rcu_assign_keypointer(key, payload);
 	else
 		kfree(payload);
 	return ret;
@@ -1067,7 +1067,7 @@ static int trusted_update(struct key *key, const void *data, size_t datalen)
 			goto out;
 		}
 	}
-	rcu_assign_pointer(key->payload.data, new_p);
+	rcu_assign_keypointer(key, new_p);
 	call_rcu(&p->rcu, trusted_rcu_free);
 out:
 	kfree(datablob);
-- 
cgit v1.2.3


From 5e8898e97a5db4125d944070922164d1d09a2689 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Tue, 17 Jan 2012 17:12:03 +0200
Subject: lib: digital signature config option name change

It was reported that DIGSIG is confusing name for digital signature
module. It was suggested to rename DIGSIG to SIGNATURE.

Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
Suggested-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/digsig.h     | 4 ++--
 lib/Kconfig                | 2 +-
 lib/Makefile               | 2 +-
 security/integrity/Kconfig | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/digsig.h b/include/linux/digsig.h
index efae755017d7..b01558b15814 100644
--- a/include/linux/digsig.h
+++ b/include/linux/digsig.h
@@ -46,7 +46,7 @@ struct signature_hdr {
 	char		mpi[0];
 } __packed;
 
-#if defined(CONFIG_DIGSIG) || defined(CONFIG_DIGSIG_MODULE)
+#if defined(CONFIG_SIGNATURE) || defined(CONFIG_SIGNATURE_MODULE)
 
 int digsig_verify(struct key *keyring, const char *sig, int siglen,
 					const char *digest, int digestlen);
@@ -59,6 +59,6 @@ static inline int digsig_verify(struct key *keyring, const char *sig,
 	return -EOPNOTSUPP;
 }
 
-#endif /* CONFIG_DIGSIG */
+#endif /* CONFIG_SIGNATURE */
 
 #endif /* _DIGSIG_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 201e1b33d721..854735d96dc3 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -302,7 +302,7 @@ config MPILIB_EXTRA
 	  This code in unnecessary for RSA digital signature verification,
 	  and can be compiled if needed.
 
-config DIGSIG
+config SIGNATURE
 	tristate "In-kernel signature checker"
 	depends on KEYS
 	select MPILIB
diff --git a/lib/Makefile b/lib/Makefile
index dace162c7e1c..d71aae1b01b3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -119,7 +119,7 @@ obj-$(CONFIG_CORDIC) += cordic.o
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
 obj-$(CONFIG_MPILIB) += mpi/
-obj-$(CONFIG_DIGSIG) += digsig.o
+obj-$(CONFIG_SIGNATURE) += digsig.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index d384ea921482..ff60bf72881f 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -7,7 +7,7 @@ config INTEGRITY_DIGSIG
 	boolean "Digital signature verification using multiple keyrings"
 	depends on INTEGRITY && KEYS
 	default n
-	select DIGSIG
+	select SIGNATURE
 	help
 	  This option enables digital signature verification support
 	  using multiple keyrings. It defines separate keyrings for each
-- 
cgit v1.2.3


From 65b7f839ceecc0a36c7969c0c9151d5748cd4242 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Tue, 17 Jan 2012 22:40:08 +0100
Subject: intel_idle: Split up and provide per CPU initialization func

Function split up, should have no functional change.

Provides entry point for physically hotplugged CPUs
to initialize and activate cpuidle.

Signed-off-by: Thomas Renninger <trenn@suse.de>
CC: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
CC: Shaohua Li <shaohua.li@intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/idle/intel_idle.c | 82 ++++++++++++++++++++++++-----------------------
 include/linux/cpuidle.h   |  7 ++++
 2 files changed, 49 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 5d2f8e13cf0e..ef0c04d8dc22 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -478,64 +478,60 @@ static int intel_idle_cpuidle_driver_init(void)
 
 
 /*
- * intel_idle_cpuidle_devices_init()
+ * intel_idle_cpu_init()
  * allocate, initialize, register cpuidle_devices
+ * @cpu: cpu/core to initialize
  */
-static int intel_idle_cpuidle_devices_init(void)
+int intel_idle_cpu_init(int cpu)
 {
-	int i, cstate;
+	int cstate;
 	struct cpuidle_device *dev;
 
-	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
-	if (intel_idle_cpuidle_devices == NULL)
-		return -ENOMEM;
-
-	for_each_online_cpu(i) {
-		dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
+	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
 
-		dev->state_count = 1;
+	dev->state_count = 1;
 
-		for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
-			int num_substates;
+	for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
+		int num_substates;
 
-			if (cstate > max_cstate) {
-				printk(PREFIX "max_cstate %d reached\n",
-					max_cstate);
-				break;
-			}
+		if (cstate > max_cstate) {
+			printk(PREFIX "max_cstate %d reached\n",
+			       max_cstate);
+			break;
+		}
 
-			/* does the state exist in CPUID.MWAIT? */
-			num_substates = (mwait_substates >> ((cstate) * 4))
-						& MWAIT_SUBSTATE_MASK;
-			if (num_substates == 0)
-				continue;
-			/* is the state not enabled? */
-			if (cpuidle_state_table[cstate].enter == NULL) {
-				continue;
-			}
+		/* does the state exist in CPUID.MWAIT? */
+		num_substates = (mwait_substates >> ((cstate) * 4))
+			& MWAIT_SUBSTATE_MASK;
+		if (num_substates == 0)
+			continue;
+		/* is the state not enabled? */
+		if (cpuidle_state_table[cstate].enter == NULL)
+			continue;
 
-			dev->states_usage[dev->state_count].driver_data =
-				(void *)get_driver_data(cstate);
+		dev->states_usage[dev->state_count].driver_data =
+			(void *)get_driver_data(cstate);
 
 			dev->state_count += 1;
 		}
+	dev->cpu = cpu;
 
-		dev->cpu = i;
-		if (cpuidle_register_device(dev)) {
-			pr_debug(PREFIX "cpuidle_register_device %d failed!\n",
-				 i);
-			intel_idle_cpuidle_devices_uninit();
-			return -EIO;
-		}
+	if (cpuidle_register_device(dev)) {
+		pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu);
+		intel_idle_cpuidle_devices_uninit();
+		return -EIO;
 	}
 
+	if (auto_demotion_disable_flags)
+		smp_call_function_single(cpu, auto_demotion_disable, NULL, 1);
+
 	return 0;
 }
 
 
 static int __init intel_idle_init(void)
 {
-	int retval;
+	int retval, i;
 
 	/* Do not load intel_idle at all for now if idle= is passed */
 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
@@ -553,10 +549,16 @@ static int __init intel_idle_init(void)
 		return retval;
 	}
 
-	retval = intel_idle_cpuidle_devices_init();
-	if (retval) {
-		cpuidle_unregister_driver(&intel_idle_driver);
-		return retval;
+	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+	if (intel_idle_cpuidle_devices == NULL)
+		return -ENOMEM;
+
+	for_each_online_cpu(i) {
+		retval = intel_idle_cpu_init(i);
+		if (retval) {
+			cpuidle_unregister_driver(&intel_idle_driver);
+			return retval;
+		}
 	}
 
 	return 0;
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 7408af843b8a..93df66ea794a 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -188,7 +188,14 @@ struct cpuidle_governor {
 extern int cpuidle_register_governor(struct cpuidle_governor *gov);
 extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
 
+#ifdef CONFIG_INTEL_IDLE
+extern int intel_idle_cpu_init(int cpu);
 #else
+static inline int intel_idle_cpu_init(int cpu) { return -1; }
+#endif
+
+#else
+static inline int intel_idle_cpu_init(int cpu) { return -1; }
 
 static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 {return 0;}
-- 
cgit v1.2.3


From 456a8167e94b66f406c27400a46a707b870452b0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 18 Jan 2012 10:04:29 +0000
Subject: KEYS: Permit key_serial() to be called with a const key pointer

Permit key_serial() to be called with a const key pointer.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index bfc014c57351..5253471cd2ea 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -271,7 +271,7 @@ extern int keyring_add_key(struct key *keyring,
 
 extern struct key *key_lookup(key_serial_t id);
 
-static inline key_serial_t key_serial(struct key *key)
+static inline key_serial_t key_serial(const struct key *key)
 {
 	return key ? key->serial : 0;
 }
-- 
cgit v1.2.3


From 72081624d5ad3cf56deb6e727b78c4e7a55e4eec Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 19 Jan 2012 23:25:33 +0100
Subject: PM / Hibernate: Rewrite unlock_system_sleep() to fix s2disk
 regression

Commit 33e638b, "PM / Sleep: Use the freezer_count() functions in
[un]lock_system_sleep() APIs" introduced an undesirable change in the
behaviour of unlock_system_sleep() since freezer_count() internally calls
try_to_freeze() - which we don't need in unlock_system_sleep().

And commit bcda53f, "PM / Sleep: Replace mutex_[un]lock(&pm_mutex) with
[un]lock_system_sleep()" made these APIs wide-spread. This caused a
regression in suspend-to-disk where snapshot_read() and snapshot_write()
were getting frozen due to the try_to_freeze embedded in
unlock_system_sleep(), since these functions were invoked when the freezing
condition was still in effect.

Fix this by rewriting unlock_system_sleep() by open-coding freezer_count()
and dropping the try_to_freeze() part. Not only will this fix the
regression but this will also ensure that the API only does what it is
intended to do, and nothing more, under the hood.

While at it, make the code more correct and robust by ensuring that the
PF_FREEZER_SKIP flag gets cleared with pm_mutex held, to avoid a race with
the freezer.

Also, to be on the safer side, open-code freezer_do_not_count() as well
(inside lock_system_sleep()), to ensure that any unrelated modification to
freezer[_do_not]_count() does not break things again!

Reported-and-tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/suspend.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 95040cc33107..91784a4f8608 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -357,14 +357,29 @@ extern bool pm_save_wakeup_count(unsigned int count);
 
 static inline void lock_system_sleep(void)
 {
-	freezer_do_not_count();
+	current->flags |= PF_FREEZER_SKIP;
 	mutex_lock(&pm_mutex);
 }
 
 static inline void unlock_system_sleep(void)
 {
+	/*
+	 * Don't use freezer_count() because we don't want the call to
+	 * try_to_freeze() here.
+	 *
+	 * Reason:
+	 * Fundamentally, we just don't need it, because freezing condition
+	 * doesn't come into effect until we release the pm_mutex lock,
+	 * since the freezer always works with pm_mutex held.
+	 *
+	 * More importantly, in the case of hibernation,
+	 * unlock_system_sleep() gets called in snapshot_read() and
+	 * snapshot_write() when the freezing condition is still in effect.
+	 * Which means, if we use try_to_freeze() here, it would make them
+	 * enter the refrigerator, thus causing hibernation to lockup.
+	 */
+	current->flags &= ~PF_FREEZER_SKIP;
 	mutex_unlock(&pm_mutex);
-	freezer_count();
 }
 
 #else /* !CONFIG_PM_SLEEP */
-- 
cgit v1.2.3


From 65f2e753f1eb09d3a7e2a0d16408a5433b4097b2 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 20 Jan 2012 17:38:58 +0000
Subject: Revert "ARM: sa11x0: Implement autoloading of codec and codec pdata
 for mcp bus."

This reverts commit 5dd7bf59e0e8563265b3e5b33276099ef628fcc7.

Conflicts:

	scripts/mod/file2alias.c

This change is wrong on many levels.  First and foremost, it causes a
regression.  On boot on Assabet, which this patch gives a codec id of
'ucb1x00', it gives:

	ucb1x00 ID not found: 1005

0x1005 is a valid ID for the UCB1300 device.

Secondly, this patch is way over the top in terms of complexity.  The
only device which has been seen to be connected with this MCP code is
the UCB1x00 (UCB1200, UCB1300 etc) devices, and they all use the same
driver.  Adding a match table, requiring the codec string to match the
hardware ID read out of the ID register, etc is completely over the top
when we can just read the hardware ID register.
---
 arch/arm/mach-sa1100/assabet.c          |  1 -
 arch/arm/mach-sa1100/cerf.c             |  1 -
 arch/arm/mach-sa1100/collie.c           |  8 +-----
 arch/arm/mach-sa1100/include/mach/mcp.h |  2 --
 arch/arm/mach-sa1100/lart.c             |  1 -
 arch/arm/mach-sa1100/shannon.c          |  1 -
 arch/arm/mach-sa1100/simpad.c           |  8 +-----
 drivers/mfd/mcp-core.c                  | 44 ++----------------------------
 drivers/mfd/mcp-sa11x0.c                |  7 ++---
 drivers/mfd/ucb1x00-core.c              | 48 ++++++++-------------------------
 drivers/mfd/ucb1x00-ts.c                |  2 +-
 include/linux/mfd/mcp.h                 |  7 ++---
 include/linux/mfd/ucb1x00.h             |  5 +---
 include/linux/mod_devicetable.h         | 11 --------
 scripts/mod/file2alias.c                | 10 -------
 15 files changed, 21 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index d8aa1c28353b..0c4b76ab4d8e 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -202,7 +202,6 @@ static struct irda_platform_data assabet_irda_data = {
 static struct mcp_plat_data assabet_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init assabet_init(void)
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index fcadc4cafe9a..11bb6d0b9be3 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -124,7 +124,6 @@ static void __init cerf_map_io(void)
 static struct mcp_plat_data cerf_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init cerf_init(void)
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 6b7c74b304cf..b9060e236def 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -27,7 +27,6 @@
 #include <linux/timer.h>
 #include <linux/gpio.h>
 #include <linux/pda_power.h>
-#include <linux/mfd/ucb1x00.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -86,15 +85,10 @@ static struct scoop_pcmcia_config collie_pcmcia_config = {
 	.num_devs	= 1,
 };
 
-static struct ucb1x00_plat_data collie_ucb1x00_data = {
-	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
-};
-
 static struct mcp_plat_data collie_mcp_data = {
 	.mccr0		= MCCR0_ADM | MCCR0_ExtClk,
 	.sclk_rate	= 9216000,
-	.codec		= "ucb1x00",
-	.codec_pdata	= &collie_ucb1x00_data,
+	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
 };
 
 /*
diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h
index 586cec898b35..ed1a331508a7 100644
--- a/arch/arm/mach-sa1100/include/mach/mcp.h
+++ b/arch/arm/mach-sa1100/include/mach/mcp.h
@@ -17,8 +17,6 @@ struct mcp_plat_data {
 	u32 mccr1;
 	unsigned int sclk_rate;
 	int gpio_base;
-	const char *codec;
-	void *codec_pdata;
 };
 
 #endif
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index 48a8f4ef0fcd..af4e2761f3db 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -24,7 +24,6 @@
 static struct mcp_plat_data lart_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init lart_init(void)
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 3807c9135272..318b2b766a0b 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -55,7 +55,6 @@ static struct resource shannon_flash_resource = {
 static struct mcp_plat_data shannon_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init shannon_init(void)
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index d9b765c441f6..e17c04d6e324 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -14,7 +14,6 @@
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
-#include <linux/mfd/ucb1x00.h>
 
 #include <asm/irq.h>
 #include <mach/hardware.h>
@@ -188,15 +187,10 @@ static struct resource simpad_flash_resources [] = {
 	}
 };
 
-static struct ucb1x00_plat_data simpad_ucb1x00_data = {
-	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
-};
-
 static struct mcp_plat_data simpad_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1300",
-	.codec_pdata	= &simpad_ucb1x00_data,
+	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
 };
 
 
diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c
index 63be60bc3455..84815f9ef636 100644
--- a/drivers/mfd/mcp-core.c
+++ b/drivers/mfd/mcp-core.c
@@ -26,35 +26,9 @@
 #define to_mcp(d)		container_of(d, struct mcp, attached_device)
 #define to_mcp_driver(d)	container_of(d, struct mcp_driver, drv)
 
-static const struct mcp_device_id *mcp_match_id(const struct mcp_device_id *id,
-						const char *codec)
-{
-	while (id->name[0]) {
-		if (strcmp(codec, id->name) == 0)
-			return id;
-		id++;
-	}
-	return NULL;
-}
-
-const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp)
-{
-	const struct mcp_driver *driver =
-		to_mcp_driver(mcp->attached_device.driver);
-
-	return mcp_match_id(driver->id_table, mcp->codec);
-}
-EXPORT_SYMBOL(mcp_get_device_id);
-
 static int mcp_bus_match(struct device *dev, struct device_driver *drv)
 {
-	const struct mcp *mcp = to_mcp(dev);
-	const struct mcp_driver *driver = to_mcp_driver(drv);
-
-	if (driver->id_table)
-		return !!mcp_match_id(driver->id_table, mcp->codec);
-
-	return 0;
+	return 1;
 }
 
 static int mcp_bus_probe(struct device *dev)
@@ -100,18 +74,9 @@ static int mcp_bus_resume(struct device *dev)
 	return ret;
 }
 
-static int mcp_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct mcp *mcp = to_mcp(dev);
-
-	add_uevent_var(env, "MODALIAS=%s%s", MCP_MODULE_PREFIX, mcp->codec);
-	return 0;
-}
-
 static struct bus_type mcp_bus_type = {
 	.name		= "mcp",
 	.match		= mcp_bus_match,
-	.uevent		= mcp_bus_uevent,
 	.probe		= mcp_bus_probe,
 	.remove		= mcp_bus_remove,
 	.suspend	= mcp_bus_suspend,
@@ -247,14 +212,9 @@ struct mcp *mcp_host_alloc(struct device *parent, size_t size)
 }
 EXPORT_SYMBOL(mcp_host_alloc);
 
-int mcp_host_register(struct mcp *mcp, void *pdata)
+int mcp_host_register(struct mcp *mcp)
 {
-	if (!mcp->codec)
-		return -EINVAL;
-
-	mcp->attached_device.platform_data = pdata;
 	dev_set_name(&mcp->attached_device, "mcp0");
-	request_module("%s%s", MCP_MODULE_PREFIX, mcp->codec);
 	return device_register(&mcp->attached_device);
 }
 EXPORT_SYMBOL(mcp_host_register);
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index da4e077a1bee..02c53a0766c4 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -146,9 +146,6 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENODEV;
 
-	if (!data->codec)
-		return -ENODEV;
-
 	if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp"))
 		return -EBUSY;
 
@@ -165,7 +162,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->dma_audio_wr	= DMA_Ser4MCP0Wr;
 	mcp->dma_telco_rd	= DMA_Ser4MCP1Rd;
 	mcp->dma_telco_wr	= DMA_Ser4MCP1Wr;
-	mcp->codec		= data->codec;
+	mcp->gpio_base		= data->gpio_base;
 
 	platform_set_drvdata(pdev, mcp);
 
@@ -198,7 +195,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->rw_timeout = (64 * 3 * 1000000 + mcp->sclk_rate - 1) /
 			  mcp->sclk_rate;
 
-	ret = mcp_host_register(mcp, data->codec_pdata);
+	ret = mcp_host_register(mcp);
 	if (ret == 0)
 		goto out;
 
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index 91c4f25e0e55..b281217334eb 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -36,15 +36,6 @@ static DEFINE_MUTEX(ucb1x00_mutex);
 static LIST_HEAD(ucb1x00_drivers);
 static LIST_HEAD(ucb1x00_devices);
 
-static struct mcp_device_id ucb1x00_id[] = {
-	{ "ucb1x00", 0 },  /* auto-detection */
-	{ "ucb1200", UCB_ID_1200 },
-	{ "ucb1300", UCB_ID_1300 },
-	{ "tc35143", UCB_ID_TC35143 },
-	{ }
-};
-MODULE_DEVICE_TABLE(mcp, ucb1x00_id);
-
 /**
  *	ucb1x00_io_set_dir - set IO direction
  *	@ucb: UCB1x00 structure describing chip
@@ -536,33 +527,17 @@ static struct class ucb1x00_class = {
 
 static int ucb1x00_probe(struct mcp *mcp)
 {
-	const struct mcp_device_id *mid;
 	struct ucb1x00 *ucb;
 	struct ucb1x00_driver *drv;
-	struct ucb1x00_plat_data *pdata;
 	unsigned int id;
 	int ret = -ENODEV;
 	int temp;
 
 	mcp_enable(mcp);
 	id = mcp_reg_read(mcp, UCB_ID);
-	mid = mcp_get_device_id(mcp);
 
-	if (mid && mid->driver_data) {
-		if (id != mid->driver_data) {
-			printk(KERN_WARNING "%s wrong ID %04x found: %04x\n",
-				mid->name, (unsigned int) mid->driver_data, id);
-			goto err_disable;
-		}
-	} else {
-		mid = &ucb1x00_id[1];
-		while (mid->driver_data) {
-			if (id == mid->driver_data)
-				break;
-			mid++;
-		}
-		printk(KERN_WARNING "%s ID not found: %04x\n",
-			ucb1x00_id[0].name, id);
+	if (id != UCB_ID_1200 && id != UCB_ID_1300 && id != UCB_ID_TC35143) {
+		printk(KERN_WARNING "UCB1x00 ID not found: %04x\n", id);
 		goto err_disable;
 	}
 
@@ -571,28 +546,28 @@ static int ucb1x00_probe(struct mcp *mcp)
 	if (!ucb)
 		goto err_disable;
 
-	pdata = mcp->attached_device.platform_data;
+
 	ucb->dev.class = &ucb1x00_class;
 	ucb->dev.parent = &mcp->attached_device;
-	dev_set_name(&ucb->dev, mid->name);
+	dev_set_name(&ucb->dev, "ucb1x00");
 
 	spin_lock_init(&ucb->lock);
 	spin_lock_init(&ucb->io_lock);
 	sema_init(&ucb->adc_sem, 1);
 
-	ucb->id  = mid;
+	ucb->id  = id;
 	ucb->mcp = mcp;
 	ucb->irq = ucb1x00_detect_irq(ucb);
 	if (ucb->irq == NO_IRQ) {
-		printk(KERN_ERR "%s: IRQ probe failed\n", mid->name);
+		printk(KERN_ERR "UCB1x00: IRQ probe failed\n");
 		ret = -ENODEV;
 		goto err_free;
 	}
 
 	ucb->gpio.base = -1;
-	if (pdata && (pdata->gpio_base >= 0)) {
+	if (mcp->gpio_base != 0) {
 		ucb->gpio.label = dev_name(&ucb->dev);
-		ucb->gpio.base = pdata->gpio_base;
+		ucb->gpio.base = mcp->gpio_base;
 		ucb->gpio.ngpio = 10;
 		ucb->gpio.set = ucb1x00_gpio_set;
 		ucb->gpio.get = ucb1x00_gpio_get;
@@ -605,10 +580,10 @@ static int ucb1x00_probe(struct mcp *mcp)
 		dev_info(&ucb->dev, "gpio_base not set so no gpiolib support");
 
 	ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING,
-			  mid->name, ucb);
+			  "UCB1x00", ucb);
 	if (ret) {
-		printk(KERN_ERR "%s: unable to grab irq%d: %d\n",
-			mid->name, ucb->irq, ret);
+		printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
+			ucb->irq, ret);
 		goto err_gpio;
 	}
 
@@ -730,7 +705,6 @@ static struct mcp_driver ucb1x00_driver = {
 	.remove		= ucb1x00_remove,
 	.suspend	= ucb1x00_suspend,
 	.resume		= ucb1x00_resume,
-	.id_table	= ucb1x00_id,
 };
 
 static int __init ucb1x00_init(void)
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 40ec3c118868..38ffbd50a0d2 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -382,7 +382,7 @@ static int ucb1x00_ts_add(struct ucb1x00_dev *dev)
 	ts->adcsync = adcsync ? UCB_SYNC : UCB_NOSYNC;
 
 	idev->name       = "Touchscreen panel";
-	idev->id.product = ts->ucb->id->driver_data;
+	idev->id.product = ts->ucb->id;
 	idev->open       = ucb1x00_ts_open;
 	idev->close      = ucb1x00_ts_close;
 
diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index 1515e64e3663..ee496708e38b 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -10,7 +10,6 @@
 #ifndef MCP_H
 #define MCP_H
 
-#include <linux/mod_devicetable.h>
 #include <mach/dma.h>
 
 struct mcp_ops;
@@ -27,7 +26,7 @@ struct mcp {
 	dma_device_t	dma_telco_rd;
 	dma_device_t	dma_telco_wr;
 	struct device	attached_device;
-	const char	*codec;
+	int		gpio_base;
 };
 
 struct mcp_ops {
@@ -45,11 +44,10 @@ void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
 unsigned int mcp_reg_read(struct mcp *, unsigned int);
 void mcp_enable(struct mcp *);
 void mcp_disable(struct mcp *);
-const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp);
 #define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
 
 struct mcp *mcp_host_alloc(struct device *, size_t);
-int mcp_host_register(struct mcp *, void *);
+int mcp_host_register(struct mcp *);
 void mcp_host_unregister(struct mcp *);
 
 struct mcp_driver {
@@ -58,7 +56,6 @@ struct mcp_driver {
 	void (*remove)(struct mcp *);
 	int (*suspend)(struct mcp *, pm_message_t);
 	int (*resume)(struct mcp *);
-	const struct mcp_device_id *id_table;
 };
 
 int mcp_driver_register(struct mcp_driver *);
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index bc19e5fb7ea8..4321f044d1e4 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h
@@ -104,9 +104,6 @@
 #define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
 #define UCB_MODE_AUD_OFF_CAN	(1 << 13)
 
-struct ucb1x00_plat_data {
-	int		gpio_base;
-};
 
 struct ucb1x00_irq {
 	void *devid;
@@ -119,7 +116,7 @@ struct ucb1x00 {
 	unsigned int		irq;
 	struct semaphore	adc_sem;
 	spinlock_t		io_lock;
-	const struct mcp_device_id *id;
+	u16			id;
 	u16			io_dir;
 	u16			io_out;
 	u16			adc_cr;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b29e7f6f8fa5..83ac0713ed0a 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -436,17 +436,6 @@ struct spi_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
-/* mcp */
-
-#define MCP_NAME_SIZE	20
-#define MCP_MODULE_PREFIX "mcp:"
-
-struct mcp_device_id {
-	char name[MCP_NAME_SIZE];
-	kernel_ulong_t driver_data	/* Data private to the driver */
-			__attribute__((aligned(sizeof(kernel_ulong_t))));
-};
-
 /* dmi */
 enum dmi_field {
 	DMI_NONE,
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index c0e14b3f2306..e8c969577768 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -823,16 +823,6 @@ static int do_spi_entry(const char *filename, struct spi_device_id *id,
 }
 ADD_TO_DEVTABLE("spi", struct spi_device_id, do_spi_entry);
 
-/* Looks like: mcp:S */
-static int do_mcp_entry(const char *filename, struct mcp_device_id *id,
-			char *alias)
-{
-	sprintf(alias, MCP_MODULE_PREFIX "%s", id->name);
-
-	return 1;
-}
-ADD_TO_DEVTABLE("mcp", struct mcp_device_id, do_mcp_entry); 
-
 static const struct dmifield {
 	const char *prefix;
 	int field;
-- 
cgit v1.2.3


From 2a7f51a3e08cdaeea78d9e101a0079422a55bbc3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 21 Jan 2012 09:28:53 +0000
Subject: MFD: mcp-core: fix mcp_priv() to be more type safe

mcp_priv() does unexpected things when passed a void pointer.  Make it
a typed inline function, which ensures that it works correctly in
these cases.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/mfd/mcp.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index ee496708e38b..f88c1cc0cb0f 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -64,6 +64,9 @@ void mcp_driver_unregister(struct mcp_driver *);
 #define mcp_get_drvdata(mcp)	dev_get_drvdata(&(mcp)->attached_device)
 #define mcp_set_drvdata(mcp,d)	dev_set_drvdata(&(mcp)->attached_device, d)
 
-#define mcp_priv(mcp)		((void *)((mcp)+1))
+static inline void *mcp_priv(struct mcp *mcp)
+{
+	return mcp + 1;
+}
 
 #endif
-- 
cgit v1.2.3


From 93ece0c1a7ace88f10411dbb5643d2aa2fe00ebf Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.co.il>
Date: Thu, 19 Jan 2012 09:45:05 +0000
Subject: mlx4_en: eth statistics modification

In native mode display all available staticstics.
In SRIOV mode on VF display only SW counters statistics,
in SRIOV mode on hypervisor display SW counters and errors (got from FW)
statistics.

Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Reviewed-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 66 +++++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c  |  2 +
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    |  1 +
 drivers/net/ethernet/mellanox/mlx4/port.c       | 21 ++++++++
 include/linux/mlx4/device.h                     |  1 +
 5 files changed, 75 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 53c66869aecd..70346fd7f9c4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -183,10 +183,11 @@ static int mlx4_en_set_wol(struct net_device *netdev,
 static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int bit_count = hweight64(priv->stats_bitmap);
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		return NUM_ALL_STATS +
+		return (priv->stats_bitmap ? bit_count : NUM_ALL_STATS) +
 			(priv->tx_ring_num + priv->rx_ring_num) * 2;
 	case ETH_SS_TEST:
 		return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
@@ -201,14 +202,34 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	int index = 0;
-	int i;
+	int i, j = 0;
 
 	spin_lock_bh(&priv->stats_lock);
 
-	for (i = 0; i < NUM_MAIN_STATS; i++)
-		data[index++] = ((unsigned long *) &priv->stats)[i];
-	for (i = 0; i < NUM_PORT_STATS; i++)
-		data[index++] = ((unsigned long *) &priv->port_stats)[i];
+	if (!(priv->stats_bitmap)) {
+		for (i = 0; i < NUM_MAIN_STATS; i++)
+			data[index++] =
+				((unsigned long *) &priv->stats)[i];
+		for (i = 0; i < NUM_PORT_STATS; i++)
+			data[index++] =
+				((unsigned long *) &priv->port_stats)[i];
+		for (i = 0; i < NUM_PKT_STATS; i++)
+			data[index++] =
+				((unsigned long *) &priv->pkstats)[i];
+	} else {
+		for (i = 0; i < NUM_MAIN_STATS; i++) {
+			if ((priv->stats_bitmap >> j) & 1)
+				data[index++] =
+				((unsigned long *) &priv->stats)[i];
+			j++;
+		}
+		for (i = 0; i < NUM_PORT_STATS; i++) {
+			if ((priv->stats_bitmap >> j) & 1)
+				data[index++] =
+				((unsigned long *) &priv->port_stats)[i];
+			j++;
+		}
+	}
 	for (i = 0; i < priv->tx_ring_num; i++) {
 		data[index++] = priv->tx_ring[i].packets;
 		data[index++] = priv->tx_ring[i].bytes;
@@ -217,8 +238,6 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 		data[index++] = priv->rx_ring[i].packets;
 		data[index++] = priv->rx_ring[i].bytes;
 	}
-	for (i = 0; i < NUM_PKT_STATS; i++)
-		data[index++] = ((unsigned long *) &priv->pkstats)[i];
 	spin_unlock_bh(&priv->stats_lock);
 
 }
@@ -247,11 +266,29 @@ static void mlx4_en_get_strings(struct net_device *dev,
 
 	case ETH_SS_STATS:
 		/* Add main counters */
-		for (i = 0; i < NUM_MAIN_STATS; i++)
-			strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]);
-		for (i = 0; i< NUM_PORT_STATS; i++)
-			strcpy(data + (index++) * ETH_GSTRING_LEN,
-			main_strings[i + NUM_MAIN_STATS]);
+		if (!priv->stats_bitmap) {
+			for (i = 0; i < NUM_MAIN_STATS; i++)
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+					main_strings[i]);
+			for (i = 0; i < NUM_PORT_STATS; i++)
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+					main_strings[i +
+					NUM_MAIN_STATS]);
+			for (i = 0; i < NUM_PKT_STATS; i++)
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+					main_strings[i +
+					NUM_MAIN_STATS +
+					NUM_PORT_STATS]);
+		} else
+			for (i = 0; i < NUM_MAIN_STATS + NUM_PORT_STATS; i++) {
+				if ((priv->stats_bitmap >> i) & 1) {
+					strcpy(data +
+					       (index++) * ETH_GSTRING_LEN,
+					       main_strings[i]);
+				}
+				if (!(priv->stats_bitmap >> i))
+					break;
+			}
 		for (i = 0; i < priv->tx_ring_num; i++) {
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"tx%d_packets", i);
@@ -264,9 +301,6 @@ static void mlx4_en_get_strings(struct net_device *dev,
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"rx%d_bytes", i);
 		}
-		for (i = 0; i< NUM_PKT_STATS; i++)
-			strcpy(data + (index++) * ETH_GSTRING_LEN,
-			main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]);
 		break;
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index be3f4156aaab..467ae5824875 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -702,6 +702,8 @@ int mlx4_en_start_port(struct net_device *dev)
 	/* Schedule multicast task to populate multicast list */
 	queue_work(mdev->workqueue, &priv->mcast_task);
 
+	mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap);
+
 	priv->port_up = true;
 	netif_tx_start_all_queues(dev);
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f4d189a1290e..35f08840813c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -476,6 +476,7 @@ struct mlx4_en_priv {
 	struct mlx4_en_perf_stats pstats;
 	struct mlx4_en_pkt_stats pkstats;
 	struct mlx4_en_port_stats port_stats;
+	u64 stats_bitmap;
 	char *mc_addrs;
 	int mc_addrs_cnt;
 	struct mlx4_en_stat_out_mbox hw_stats;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 1a551d69ddcb..f44ae555bf43 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -44,6 +44,11 @@
 #define MLX4_VLAN_VALID		(1u << 31)
 #define MLX4_VLAN_MASK		0xfff
 
+#define MLX4_STATS_TRAFFIC_COUNTERS_MASK	0xfULL
+#define MLX4_STATS_TRAFFIC_DROPS_MASK		0xc0ULL
+#define MLX4_STATS_ERROR_COUNTERS_MASK		0x1ffc30ULL
+#define MLX4_STATS_PORT_COUNTERS_MASK		0x1fe00000ULL
+
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
 {
 	int i;
@@ -903,3 +908,19 @@ int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
 	return mlx4_common_dump_eth_stats(dev, slave,
 					  vhcr->in_modifier, outbox);
 }
+
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
+{
+	if (!mlx4_is_mfunc(dev)) {
+		*stats_bitmap = 0;
+		return;
+	}
+
+	*stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK |
+			 MLX4_STATS_TRAFFIC_DROPS_MASK |
+			 MLX4_STATS_PORT_COUNTERS_MASK);
+
+	if (mlx4_is_master(dev))
+		*stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
+}
+EXPORT_SYMBOL(mlx4_set_stats_bitmap);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5c4fe8e5bfe5..aea61905499b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -621,6 +621,7 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
 int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn);
 void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn);
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap);
 
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-- 
cgit v1.2.3


From 974c12360dfe6ab01201fe9e708e7755c413f8b6 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 19 Jan 2012 14:42:21 +0000
Subject: tcp: detect loss above high_seq in recovery

Correctly implement a loss detection heuristic: New sequences (above
high_seq) sent during the fast recovery are deemed lost when higher
sequences are SACKed.

Current code does not catch these losses, because tcp_mark_head_lost()
does not check packets beyond high_seq. The fix is straight-forward by
checking packets until the highest sacked packet. In addition, all the
FLAG_DATA_LOST logic are in-effective and redundant and can be removed.

Update the loss heuristic comments. The algorithm above is documented
as heuristic B, but it is redundant too because heuristic A already
covers B.

Note that this change only marks some forward-retransmitted packets LOST.
It does NOT forbid TCP performing further CWR on new losses. A potential
follow-up patch under preparation is to perform another CWR on "new"
losses such as
1) sequence above high_seq is lost (by resetting high_seq to snd_nxt)
2) retransmission is lost.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h |  1 -
 net/ipv4/proc.c      |  1 -
 net/ipv4/tcp_input.c | 41 +++++++++++++++--------------------------
 3 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index e16557a357e5..c1241c428179 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -192,7 +192,6 @@ enum
 	LINUX_MIB_TCPPARTIALUNDO,		/* TCPPartialUndo */
 	LINUX_MIB_TCPDSACKUNDO,			/* TCPDSACKUndo */
 	LINUX_MIB_TCPLOSSUNDO,			/* TCPLossUndo */
-	LINUX_MIB_TCPLOSS,			/* TCPLoss */
 	LINUX_MIB_TCPLOSTRETRANSMIT,		/* TCPLostRetransmit */
 	LINUX_MIB_TCPRENOFAILURES,		/* TCPRenoFailures */
 	LINUX_MIB_TCPSACKFAILURES,		/* TCPSackFailures */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3569d8ecaeac..6afc807ee2ad 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO),
 	SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO),
 	SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO),
-	SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS),
 	SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT),
 	SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES),
 	SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2877c3e09587..976034f82320 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/
 #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
-#define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/
 #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
 #define FLAG_ONLY_ORIG_SACKED	0x200 /* SACKs only non-rexmit sent before RTO */
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
  * These 6 states form finite state machine, controlled by the following events:
  * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
  * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
- * 3. Loss detection event of one of three flavors:
+ * 3. Loss detection event of two flavors:
  *	A. Scoreboard estimator decided the packet is lost.
  *	   A'. Reno "three dupacks" marks head of queue lost.
- *	   A''. Its FACK modfication, head until snd.fack is lost.
- *	B. SACK arrives sacking data transmitted after never retransmitted
- *	   hole was sent out.
- *	C. SACK arrives sacking SND.NXT at the moment, when the
+ *	   A''. Its FACK modification, head until snd.fack is lost.
+ *	B. SACK arrives sacking SND.NXT at the moment, when the
  *	   segment was retransmitted.
  * 4. D-SACK added new rule: D-SACK changes any tag to S.
  *
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
 }
 
 /* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
- * Event "C". Later note: FACK people cheated me again 8), we have to account
+ * Event "B". Later note: FACK people cheated me again 8), we have to account
  * for reordering! Ugly, but should help.
  *
  * Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 		if (found_dup_sack && ((i + 1) == first_sack_index))
 			next_dup = &sp[i + 1];
 
-		/* Event "B" in the comment above. */
-		if (after(end_seq, tp->high_seq))
-			state.flag |= FLAG_DATA_LOST;
-
 		/* Skip too early cached blocks */
 		while (tcp_sack_cache_ok(tp, cache) &&
 		       !before(start_seq, cache->end_seq))
@@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk)
 	tcp_verify_left_out(tp);
 }
 
-/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
- * is against sacked "cnt", otherwise it's against facked "cnt"
+/* Detect loss in event "A" above by marking head of queue up as lost.
+ * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * are considered lost. For RFC3517 SACK, a segment is considered lost if it
+ * has at least tp->reordering SACKed seqments above it; "packets" refers to
+ * the maximum SACKed segments to pass before reaching this limit.
  */
 static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 {
@@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 	int cnt, oldcnt;
 	int err;
 	unsigned int mss;
+	/* Use SACK to deduce losses of new sequences sent during recovery */
+	const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
 
 	WARN_ON(packets > tp->packets_out);
 	if (tp->lost_skb_hint) {
@@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 		tp->lost_skb_hint = skb;
 		tp->lost_cnt_hint = cnt;
 
-		if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+		if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
 			break;
 
 		oldcnt = cnt;
@@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 	if (tcp_check_sack_reneging(sk, flag))
 		return;
 
-	/* C. Process data loss notification, provided it is valid. */
-	if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
-	    before(tp->snd_una, tp->high_seq) &&
-	    icsk->icsk_ca_state != TCP_CA_Open &&
-	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
-	}
-
-	/* D. Check consistency of the current state. */
+	/* C. Check consistency of the current state. */
 	tcp_verify_left_out(tp);
 
-	/* E. Check state exit conditions. State can be terminated
+	/* D. Check state exit conditions. State can be terminated
 	 *    when high_seq is ACKed. */
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
 		WARN_ON(tp->retrans_out != 0);
@@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 		}
 	}
 
-	/* F. Process state. */
+	/* E. Process state. */
 	switch (icsk->icsk_ca_state) {
 	case TCP_CA_Recovery:
 		if (!(flag & FLAG_SND_UNA_ADVANCED)) {
-- 
cgit v1.2.3


From 8cfd14ad1eb52e44cb1fe7b47a68126e45e04026 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Fri, 20 Jan 2012 04:57:15 +0000
Subject: cgroup: make sure memcg margin is 0 when over limit

For the memcg sock code, we'll need to register allocations
that are temporarily over limit. Let's make sure that margin
is 0 in this case.

I am keeping this as a separate patch, so that if any weirdness
interaction appears in the future, we can now exactly what caused
it.

Suggested by Johannes Weiner

Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: Johannes Weiner <hannes@cmpxchg.org>
CC: Michal Hocko <mhocko@suse.cz>
CC: Tejun Heo <tj@kernel.org>
CC: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/res_counter.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index c9d625ca659e..d06d014afda6 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -142,7 +142,10 @@ static inline unsigned long long res_counter_margin(struct res_counter *cnt)
 	unsigned long flags;
 
 	spin_lock_irqsave(&cnt->lock, flags);
-	margin = cnt->limit - cnt->usage;
+	if (cnt->limit > cnt->usage)
+		margin = cnt->limit - cnt->usage;
+	else
+		margin = 0;
 	spin_unlock_irqrestore(&cnt->lock, flags);
 	return margin;
 }
-- 
cgit v1.2.3


From 0e90b31f4ba77027a7c21cbfc66404df0851ca21 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Fri, 20 Jan 2012 04:57:16 +0000
Subject: net: introduce res_counter_charge_nofail() for socket allocations

There is a case in __sk_mem_schedule(), where an allocation
is beyond the maximum, but yet we are allowed to proceed.
It happens under the following condition:

	sk->sk_wmem_queued + size >= sk->sk_sndbuf

The network code won't revert the allocation in this case,
meaning that at some point later it'll try to do it. Since
this is never communicated to the underlying res_counter
code, there is an inbalance in res_counter uncharge operation.

I see two ways of fixing this:

1) storing the information about those allocations somewhere
   in memcg, and then deducting from that first, before
   we start draining the res_counter,
2) providing a slightly different allocation function for
   the res_counter, that matches the original behavior of
   the network code more closely.

I decided to go for #2 here, believing it to be more elegant,
since #1 would require us to do basically that, but in a more
obscure way.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
CC: Tejun Heo <tj@kernel.org>
CC: Li Zefan <lizf@cn.fujitsu.com>
CC: Laurent Chavey <chavey@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/res_counter.h |  6 ++++++
 include/net/sock.h          | 10 ++++------
 kernel/res_counter.c        | 25 +++++++++++++++++++++++++
 net/core/sock.c             |  4 ++--
 4 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index d06d014afda6..da81af086eaf 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -109,12 +109,18 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
  *
  * returns 0 on success and <0 if the counter->usage will exceed the
  * counter->limit _locked call expects the counter->lock to be taken
+ *
+ * charge_nofail works the same, except that it charges the resource
+ * counter unconditionally, and returns < 0 if the after the current
+ * charge we are over limit.
  */
 
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
 		unsigned long val, struct res_counter **limit_fail_at);
+int __must_check res_counter_charge_nofail(struct res_counter *counter,
+		unsigned long val, struct res_counter **limit_fail_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
diff --git a/include/net/sock.h b/include/net/sock.h
index 0e7a9b05f92b..4c69ac165e6b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1008,9 +1008,8 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
 	struct res_counter *fail;
 	int ret;
 
-	ret = res_counter_charge(prot->memory_allocated,
-				 amt << PAGE_SHIFT, &fail);
-
+	ret = res_counter_charge_nofail(prot->memory_allocated,
+					amt << PAGE_SHIFT, &fail);
 	if (ret < 0)
 		*parent_status = OVER_LIMIT;
 }
@@ -1054,12 +1053,11 @@ sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
 }
 
 static inline void
-sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status)
+sk_memory_allocated_sub(struct sock *sk, int amt)
 {
 	struct proto *prot = sk->sk_prot;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-	    parent_status != OVER_LIMIT) /* Otherwise was uncharged already */
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
 		memcg_memory_allocated_sub(sk->sk_cgrp, amt);
 
 	atomic_long_sub(amt, prot->memory_allocated);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 6d269cce7aa1..d508363858b3 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -66,6 +66,31 @@ done:
 	return ret;
 }
 
+int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
+			      struct res_counter **limit_fail_at)
+{
+	int ret, r;
+	unsigned long flags;
+	struct res_counter *c;
+
+	r = ret = 0;
+	*limit_fail_at = NULL;
+	local_irq_save(flags);
+	for (c = counter; c != NULL; c = c->parent) {
+		spin_lock(&c->lock);
+		r = res_counter_charge_locked(c, val);
+		if (r)
+			c->usage += val;
+		spin_unlock(&c->lock);
+		if (r < 0 && ret == 0) {
+			*limit_fail_at = c;
+			ret = r;
+		}
+	}
+	local_irq_restore(flags);
+
+	return ret;
+}
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 {
 	if (WARN_ON(counter->usage < val))
diff --git a/net/core/sock.c b/net/core/sock.c
index 5c5af9988f94..3e81fd2e3c75 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1827,7 +1827,7 @@ suppress_allocation:
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
 
-	sk_memory_allocated_sub(sk, amt, parent_status);
+	sk_memory_allocated_sub(sk, amt);
 
 	return 0;
 }
@@ -1840,7 +1840,7 @@ EXPORT_SYMBOL(__sk_mem_schedule);
 void __sk_mem_reclaim(struct sock *sk)
 {
 	sk_memory_allocated_sub(sk,
-				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0);
+				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
 	if (sk_under_memory_pressure(sk) &&
-- 
cgit v1.2.3


From e9c688a3272fd4b659228f3880de8109a94540e2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Sun, 22 Jan 2012 14:31:15 -0700
Subject: driver core: remove drivers/base/sys.c and include/linux/sysdev.h

Now that all users of 'struct sysdev' are removed from the kernel, we
can safely remove the .h and .c files for this code, to ensure that no
one accidentally starts to use it again.

Many thanks for Kay who did all the hard work here on making this
happen.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/Makefile  |   2 +-
 drivers/base/sys.c     | 383 -------------------------------------------------
 include/linux/sysdev.h | 164 ---------------------
 3 files changed, 1 insertion(+), 548 deletions(-)
 delete mode 100644 drivers/base/sys.c
 delete mode 100644 include/linux/sysdev.h

(limited to 'include/linux')

diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 2c8272dd93c4..610f9997a403 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -1,6 +1,6 @@
 # Makefile for the Linux device tree
 
-obj-y			:= core.o sys.o bus.o dd.o syscore.o \
+obj-y			:= core.o bus.o dd.o syscore.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o devres.o \
 			   attribute_container.o transport_class.o \
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
deleted file mode 100644
index 409f5ce78829..000000000000
--- a/drivers/base/sys.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * sys.c - pseudo-bus for system 'devices' (cpus, PICs, timers, etc)
- *
- * Copyright (c) 2002-3 Patrick Mochel
- *               2002-3 Open Source Development Lab
- *
- * This file is released under the GPLv2
- *
- * This exports a 'system' bus type.
- * By default, a 'sys' bus gets added to the root of the system. There will
- * always be core system devices. Devices can use sysdev_register() to
- * add themselves as children of the system bus.
- */
-
-#include <linux/sysdev.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/pm.h>
-#include <linux/device.h>
-#include <linux/mutex.h>
-#include <linux/interrupt.h>
-
-#include "base.h"
-
-#define to_sysdev(k) container_of(k, struct sys_device, kobj)
-#define to_sysdev_attr(a) container_of(a, struct sysdev_attribute, attr)
-
-
-static ssize_t
-sysdev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
-{
-	struct sys_device *sysdev = to_sysdev(kobj);
-	struct sysdev_attribute *sysdev_attr = to_sysdev_attr(attr);
-
-	if (sysdev_attr->show)
-		return sysdev_attr->show(sysdev, sysdev_attr, buffer);
-	return -EIO;
-}
-
-
-static ssize_t
-sysdev_store(struct kobject *kobj, struct attribute *attr,
-	     const char *buffer, size_t count)
-{
-	struct sys_device *sysdev = to_sysdev(kobj);
-	struct sysdev_attribute *sysdev_attr = to_sysdev_attr(attr);
-
-	if (sysdev_attr->store)
-		return sysdev_attr->store(sysdev, sysdev_attr, buffer, count);
-	return -EIO;
-}
-
-static const struct sysfs_ops sysfs_ops = {
-	.show	= sysdev_show,
-	.store	= sysdev_store,
-};
-
-static struct kobj_type ktype_sysdev = {
-	.sysfs_ops	= &sysfs_ops,
-};
-
-
-int sysdev_create_file(struct sys_device *s, struct sysdev_attribute *a)
-{
-	return sysfs_create_file(&s->kobj, &a->attr);
-}
-
-
-void sysdev_remove_file(struct sys_device *s, struct sysdev_attribute *a)
-{
-	sysfs_remove_file(&s->kobj, &a->attr);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_create_file);
-EXPORT_SYMBOL_GPL(sysdev_remove_file);
-
-#define to_sysdev_class(k) container_of(k, struct sysdev_class, kset.kobj)
-#define to_sysdev_class_attr(a) container_of(a, \
-	struct sysdev_class_attribute, attr)
-
-static ssize_t sysdev_class_show(struct kobject *kobj, struct attribute *attr,
-				 char *buffer)
-{
-	struct sysdev_class *class = to_sysdev_class(kobj);
-	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
-
-	if (class_attr->show)
-		return class_attr->show(class, class_attr, buffer);
-	return -EIO;
-}
-
-static ssize_t sysdev_class_store(struct kobject *kobj, struct attribute *attr,
-				  const char *buffer, size_t count)
-{
-	struct sysdev_class *class = to_sysdev_class(kobj);
-	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
-
-	if (class_attr->store)
-		return class_attr->store(class, class_attr, buffer, count);
-	return -EIO;
-}
-
-static const struct sysfs_ops sysfs_class_ops = {
-	.show	= sysdev_class_show,
-	.store	= sysdev_class_store,
-};
-
-static struct kobj_type ktype_sysdev_class = {
-	.sysfs_ops	= &sysfs_class_ops,
-};
-
-int sysdev_class_create_file(struct sysdev_class *c,
-			     struct sysdev_class_attribute *a)
-{
-	return sysfs_create_file(&c->kset.kobj, &a->attr);
-}
-EXPORT_SYMBOL_GPL(sysdev_class_create_file);
-
-void sysdev_class_remove_file(struct sysdev_class *c,
-			      struct sysdev_class_attribute *a)
-{
-	sysfs_remove_file(&c->kset.kobj, &a->attr);
-}
-EXPORT_SYMBOL_GPL(sysdev_class_remove_file);
-
-extern struct kset *system_kset;
-
-int sysdev_class_register(struct sysdev_class *cls)
-{
-	int retval;
-
-	pr_debug("Registering sysdev class '%s'\n", cls->name);
-
-	INIT_LIST_HEAD(&cls->drivers);
-	memset(&cls->kset.kobj, 0x00, sizeof(struct kobject));
-	cls->kset.kobj.parent = &system_kset->kobj;
-	cls->kset.kobj.ktype = &ktype_sysdev_class;
-	cls->kset.kobj.kset = system_kset;
-
-	retval = kobject_set_name(&cls->kset.kobj, "%s", cls->name);
-	if (retval)
-		return retval;
-
-	retval = kset_register(&cls->kset);
-	if (!retval && cls->attrs)
-		retval = sysfs_create_files(&cls->kset.kobj,
-					    (const struct attribute **)cls->attrs);
-	return retval;
-}
-
-void sysdev_class_unregister(struct sysdev_class *cls)
-{
-	pr_debug("Unregistering sysdev class '%s'\n",
-		 kobject_name(&cls->kset.kobj));
-	if (cls->attrs)
-		sysfs_remove_files(&cls->kset.kobj,
-				   (const struct attribute **)cls->attrs);
-	kset_unregister(&cls->kset);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_class_register);
-EXPORT_SYMBOL_GPL(sysdev_class_unregister);
-
-static DEFINE_MUTEX(sysdev_drivers_lock);
-
-/*
- * @dev != NULL means that we're unwinding because some drv->add()
- * failed for some reason. You need to grab sysdev_drivers_lock before
- * calling this.
- */
-static void __sysdev_driver_remove(struct sysdev_class *cls,
-				   struct sysdev_driver *drv,
-				   struct sys_device *from_dev)
-{
-	struct sys_device *dev = from_dev;
-
-	list_del_init(&drv->entry);
-	if (!cls)
-		return;
-
-	if (!drv->remove)
-		goto kset_put;
-
-	if (dev)
-		list_for_each_entry_continue_reverse(dev, &cls->kset.list,
-						     kobj.entry)
-			drv->remove(dev);
-	else
-		list_for_each_entry(dev, &cls->kset.list, kobj.entry)
-			drv->remove(dev);
-
-kset_put:
-	kset_put(&cls->kset);
-}
-
-/**
- *	sysdev_driver_register - Register auxiliary driver
- *	@cls:	Device class driver belongs to.
- *	@drv:	Driver.
- *
- *	@drv is inserted into @cls->drivers to be
- *	called on each operation on devices of that class. The refcount
- *	of @cls is incremented.
- */
-int sysdev_driver_register(struct sysdev_class *cls, struct sysdev_driver *drv)
-{
-	struct sys_device *dev = NULL;
-	int err = 0;
-
-	if (!cls) {
-		WARN(1, KERN_WARNING "sysdev: invalid class passed to %s!\n",
-			__func__);
-		return -EINVAL;
-	}
-
-	/* Check whether this driver has already been added to a class. */
-	if (drv->entry.next && !list_empty(&drv->entry))
-		WARN(1, KERN_WARNING "sysdev: class %s: driver (%p) has already"
-			" been registered to a class, something is wrong, but "
-			"will forge on!\n", cls->name, drv);
-
-	mutex_lock(&sysdev_drivers_lock);
-	if (cls && kset_get(&cls->kset)) {
-		list_add_tail(&drv->entry, &cls->drivers);
-
-		/* If devices of this class already exist, tell the driver */
-		if (drv->add) {
-			list_for_each_entry(dev, &cls->kset.list, kobj.entry) {
-				err = drv->add(dev);
-				if (err)
-					goto unwind;
-			}
-		}
-	} else {
-		err = -EINVAL;
-		WARN(1, KERN_ERR "%s: invalid device class\n", __func__);
-	}
-
-	goto unlock;
-
-unwind:
-	__sysdev_driver_remove(cls, drv, dev);
-
-unlock:
-	mutex_unlock(&sysdev_drivers_lock);
-	return err;
-}
-
-/**
- *	sysdev_driver_unregister - Remove an auxiliary driver.
- *	@cls:	Class driver belongs to.
- *	@drv:	Driver.
- */
-void sysdev_driver_unregister(struct sysdev_class *cls,
-			      struct sysdev_driver *drv)
-{
-	mutex_lock(&sysdev_drivers_lock);
-	__sysdev_driver_remove(cls, drv, NULL);
-	mutex_unlock(&sysdev_drivers_lock);
-}
-EXPORT_SYMBOL_GPL(sysdev_driver_register);
-EXPORT_SYMBOL_GPL(sysdev_driver_unregister);
-
-/**
- *	sysdev_register - add a system device to the tree
- *	@sysdev:	device in question
- *
- */
-int sysdev_register(struct sys_device *sysdev)
-{
-	int error;
-	struct sysdev_class *cls = sysdev->cls;
-
-	if (!cls)
-		return -EINVAL;
-
-	pr_debug("Registering sys device of class '%s'\n",
-		 kobject_name(&cls->kset.kobj));
-
-	/* initialize the kobject to 0, in case it had previously been used */
-	memset(&sysdev->kobj, 0x00, sizeof(struct kobject));
-
-	/* Make sure the kset is set */
-	sysdev->kobj.kset = &cls->kset;
-
-	/* Register the object */
-	error = kobject_init_and_add(&sysdev->kobj, &ktype_sysdev, NULL,
-				     "%s%d", kobject_name(&cls->kset.kobj),
-				     sysdev->id);
-
-	if (!error) {
-		struct sysdev_driver *drv;
-
-		pr_debug("Registering sys device '%s'\n",
-			 kobject_name(&sysdev->kobj));
-
-		mutex_lock(&sysdev_drivers_lock);
-		/* Generic notification is implicit, because it's that
-		 * code that should have called us.
-		 */
-
-		/* Notify class auxiliary drivers */
-		list_for_each_entry(drv, &cls->drivers, entry) {
-			if (drv->add)
-				drv->add(sysdev);
-		}
-		mutex_unlock(&sysdev_drivers_lock);
-		kobject_uevent(&sysdev->kobj, KOBJ_ADD);
-	}
-
-	return error;
-}
-
-void sysdev_unregister(struct sys_device *sysdev)
-{
-	struct sysdev_driver *drv;
-
-	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry(drv, &sysdev->cls->drivers, entry) {
-		if (drv->remove)
-			drv->remove(sysdev);
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-
-	kobject_put(&sysdev->kobj);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_register);
-EXPORT_SYMBOL_GPL(sysdev_unregister);
-
-#define to_ext_attr(x) container_of(x, struct sysdev_ext_attribute, attr)
-
-ssize_t sysdev_store_ulong(struct sys_device *sysdev,
-			   struct sysdev_attribute *attr,
-			   const char *buf, size_t size)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	char *end;
-	unsigned long new = simple_strtoul(buf, &end, 0);
-	if (end == buf)
-		return -EINVAL;
-	*(unsigned long *)(ea->var) = new;
-	/* Always return full write size even if we didn't consume all */
-	return size;
-}
-EXPORT_SYMBOL_GPL(sysdev_store_ulong);
-
-ssize_t sysdev_show_ulong(struct sys_device *sysdev,
-			  struct sysdev_attribute *attr,
-			  char *buf)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var));
-}
-EXPORT_SYMBOL_GPL(sysdev_show_ulong);
-
-ssize_t sysdev_store_int(struct sys_device *sysdev,
-			   struct sysdev_attribute *attr,
-			   const char *buf, size_t size)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	char *end;
-	long new = simple_strtol(buf, &end, 0);
-	if (end == buf || new > INT_MAX || new < INT_MIN)
-		return -EINVAL;
-	*(int *)(ea->var) = new;
-	/* Always return full write size even if we didn't consume all */
-	return size;
-}
-EXPORT_SYMBOL_GPL(sysdev_store_int);
-
-ssize_t sysdev_show_int(struct sys_device *sysdev,
-			  struct sysdev_attribute *attr,
-			  char *buf)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var));
-}
-EXPORT_SYMBOL_GPL(sysdev_show_int);
-
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
deleted file mode 100644
index 20f63d3e6144..000000000000
--- a/include/linux/sysdev.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/**
- * System devices follow a slightly different driver model. 
- * They don't need to do dynammic driver binding, can't be probed, 
- * and don't reside on any type of peripheral bus. 
- * So, we represent and treat them a little differently.
- * 
- * We still have a notion of a driver for a system device, because we still
- * want to perform basic operations on these devices. 
- *
- * We also support auxiliary drivers binding to devices of a certain class.
- * 
- * This allows configurable drivers to register themselves for devices of
- * a certain type. And, it allows class definitions to reside in generic
- * code while arch-specific code can register specific drivers.
- *
- * Auxiliary drivers registered with a NULL cls are registered as drivers
- * for all system devices, and get notification calls for each device. 
- */
-
-
-#ifndef _SYSDEV_H_
-#define _SYSDEV_H_
-
-#include <linux/kobject.h>
-#include <linux/pm.h>
-
-
-struct sys_device;
-struct sysdev_class_attribute;
-
-struct sysdev_class {
-	const char *name;
-	struct list_head	drivers;
-	struct sysdev_class_attribute **attrs;
-	struct kset		kset;
-};
-
-struct sysdev_class_attribute {
-	struct attribute attr;
-	ssize_t (*show)(struct sysdev_class *, struct sysdev_class_attribute *,
-			char *);
-	ssize_t (*store)(struct sysdev_class *, struct sysdev_class_attribute *,
-			 const char *, size_t);
-};
-
-#define _SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) 		\
-{					 			\
-	.attr = {.name = __stringify(_name), .mode = _mode },	\
-	.show	= _show,					\
-	.store	= _store,					\
-}
-
-#define SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) 		\
-	struct sysdev_class_attribute attr_##_name = 		\
-		_SYSDEV_CLASS_ATTR(_name,_mode,_show,_store)
-
-
-extern int sysdev_class_register(struct sysdev_class *);
-extern void sysdev_class_unregister(struct sysdev_class *);
-
-extern int sysdev_class_create_file(struct sysdev_class *,
-	struct sysdev_class_attribute *);
-extern void sysdev_class_remove_file(struct sysdev_class *,
-	struct sysdev_class_attribute *);
-/**
- * Auxiliary system device drivers.
- */
-
-struct sysdev_driver {
-	struct list_head	entry;
-	int	(*add)(struct sys_device *);
-	int	(*remove)(struct sys_device *);
-};
-
-
-extern int sysdev_driver_register(struct sysdev_class *, struct sysdev_driver *);
-extern void sysdev_driver_unregister(struct sysdev_class *, struct sysdev_driver *);
-
-
-/**
- * sys_devices can be simplified a lot from regular devices, because they're
- * simply not as versatile. 
- */
-
-struct sys_device {
-	u32		id;
-	struct sysdev_class	* cls;
-	struct kobject		kobj;
-};
-
-extern int sysdev_register(struct sys_device *);
-extern void sysdev_unregister(struct sys_device *);
-
-
-struct sysdev_attribute { 
-	struct attribute	attr;
-	ssize_t (*show)(struct sys_device *, struct sysdev_attribute *, char *);
-	ssize_t (*store)(struct sys_device *, struct sysdev_attribute *,
-			 const char *, size_t);
-};
-
-
-#define _SYSDEV_ATTR(_name, _mode, _show, _store)		\
-{								\
-	.attr = { .name = __stringify(_name), .mode = _mode },	\
-	.show	= _show,					\
-	.store	= _store,					\
-}
-
-#define SYSDEV_ATTR(_name, _mode, _show, _store)		\
-	struct sysdev_attribute attr_##_name =			\
-		_SYSDEV_ATTR(_name, _mode, _show, _store);
-
-extern int sysdev_create_file(struct sys_device *, struct sysdev_attribute *);
-extern void sysdev_remove_file(struct sys_device *, struct sysdev_attribute *);
-
-/* Create/remove NULL terminated attribute list */
-static inline int
-sysdev_create_files(struct sys_device *d, struct sysdev_attribute **a)
-{
-	return sysfs_create_files(&d->kobj, (const struct attribute **)a);
-}
-
-static inline void
-sysdev_remove_files(struct sys_device *d, struct sysdev_attribute **a)
-{
-	return sysfs_remove_files(&d->kobj, (const struct attribute **)a);
-}
-
-struct sysdev_ext_attribute {
-	struct sysdev_attribute attr;
-	void *var;
-};
-
-/*
- * Support for simple variable sysdev attributes.
- * The pointer to the variable is stored in a sysdev_ext_attribute
- */
-
-/* Add more types as needed */
-
-extern ssize_t sysdev_show_ulong(struct sys_device *, struct sysdev_attribute *,
-				char *);
-extern ssize_t sysdev_store_ulong(struct sys_device *,
-			struct sysdev_attribute *, const char *, size_t);
-extern ssize_t sysdev_show_int(struct sys_device *, struct sysdev_attribute *,
-				char *);
-extern ssize_t sysdev_store_int(struct sys_device *,
-			struct sysdev_attribute *, const char *, size_t);
-
-#define _SYSDEV_ULONG_ATTR(_name, _mode, _var)				\
-	{ _SYSDEV_ATTR(_name, _mode, sysdev_show_ulong, sysdev_store_ulong), \
-	  &(_var) }
-#define SYSDEV_ULONG_ATTR(_name, _mode, _var)			\
-	struct sysdev_ext_attribute attr_##_name = 		\
-		_SYSDEV_ULONG_ATTR(_name, _mode, _var);
-#define _SYSDEV_INT_ATTR(_name, _mode, _var)				\
-	{ _SYSDEV_ATTR(_name, _mode, sysdev_show_int, sysdev_store_int), \
-	  &(_var) }
-#define SYSDEV_INT_ATTR(_name, _mode, _var)			\
-	struct sysdev_ext_attribute attr_##_name = 		\
-		_SYSDEV_INT_ATTR(_name, _mode, _var);
-
-#endif /* _SYSDEV_H_ */
-- 
cgit v1.2.3


From 2d58d7ea9164da59d0ea82fdf80e3ababe52d58c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 4 Nov 2011 10:31:04 +0100
Subject: thermal: Rename generate_netlink_event

It doesn't seem right for the thermal subsystem to export a symbol
named generate_netlink_event. This function is thermal-specific and
its name should reflect that fact. Rename it to
thermal_generate_netlink_event.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: R.Durgadoss <durgadoss.r@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/thermal/sysfs-api.txt | 2 +-
 drivers/thermal/thermal_sys.c       | 4 ++--
 include/linux/thermal.h             | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index b61e46f449aa..1733ab947a95 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -284,7 +284,7 @@ method, the sys I/F structure will be built like this:
 The framework includes a simple notification mechanism, in the form of a
 netlink event. Netlink socket initialization is done during the _init_
 of the framework. Drivers which intend to use the notification mechanism
-just need to call generate_netlink_event() with two arguments viz
+just need to call thermal_generate_netlink_event() with two arguments viz
 (originator, event). Typically the originator will be an integer assigned
 to a thermal_zone_device when it registers itself with the framework. The
 event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index dd9a5743fa99..220ce7e31cf5 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -1304,7 +1304,7 @@ static struct genl_multicast_group thermal_event_mcgrp = {
 	.name = THERMAL_GENL_MCAST_GROUP_NAME,
 };
 
-int generate_netlink_event(u32 orig, enum events event)
+int thermal_generate_netlink_event(u32 orig, enum events event)
 {
 	struct sk_buff *skb;
 	struct nlattr *attr;
@@ -1363,7 +1363,7 @@ int generate_netlink_event(u32 orig, enum events event)
 
 	return result;
 }
-EXPORT_SYMBOL(generate_netlink_event);
+EXPORT_SYMBOL(thermal_generate_netlink_event);
 
 static int genetlink_init(void)
 {
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 47b4a27e6e97..796f1ff0388c 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -152,9 +152,9 @@ struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
 
 #ifdef CONFIG_NET
-extern int generate_netlink_event(u32 orig, enum events event);
+extern int thermal_generate_netlink_event(u32 orig, enum events event);
 #else
-static inline int generate_netlink_event(u32 orig, enum events event)
+static inline int thermal_generate_netlink_event(u32 orig, enum events event)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 6536e3123e5d3371a6f52e32a3d0694bcc987702 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 20 Jan 2012 14:33:53 -0800
Subject: mm: fix warnings regarding enum migrate_mode

sparc64 allmodconfig:

In file included from include/linux/compat.h:15,
                 from /usr/src/25/arch/sparc/include/asm/siginfo.h:19,
                 from include/linux/signal.h:5,
                 from include/linux/sched.h:73,
                 from arch/sparc/kernel/asm-offsets.c:13:
include/linux/fs.h:618: warning: parameter has incomplete type

It seems that my sparc64 compiler (gcc-3.4.5) doesn't like the forward
declaration of enums.

Fix this by moving the "enum migrate_mode" definition into its own header
file.

Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h           |  2 +-
 include/linux/migrate.h      | 14 +-------------
 include/linux/migrate_mode.h | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/migrate_mode.h

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0244082d42c5..4b3a41fe22bf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,6 +10,7 @@
 #include <linux/ioctl.h>
 #include <linux/blk_types.h>
 #include <linux/types.h>
+#include <linux/migrate_mode.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -526,7 +527,6 @@ enum positive_aop_returns {
 struct page;
 struct address_space;
 struct writeback_control;
-enum migrate_mode;
 
 struct iov_iter {
 	const struct iovec *iov;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index eaf867412f7a..05ed2828a553 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -3,22 +3,10 @@
 
 #include <linux/mm.h>
 #include <linux/mempolicy.h>
+#include <linux/migrate_mode.h>
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
-/*
- * MIGRATE_ASYNC means never block
- * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
- *	on most operations but not ->writepage as the potential stall time
- *	is too significant
- * MIGRATE_SYNC will block when migrating pages
- */
-enum migrate_mode {
-	MIGRATE_ASYNC,
-	MIGRATE_SYNC_LIGHT,
-	MIGRATE_SYNC,
-};
-
 #ifdef CONFIG_MIGRATION
 #define PAGE_MIGRATION 1
 
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
new file mode 100644
index 000000000000..ebf3d89a3919
--- /dev/null
+++ b/include/linux/migrate_mode.h
@@ -0,0 +1,16 @@
+#ifndef MIGRATE_MODE_H_INCLUDED
+#define MIGRATE_MODE_H_INCLUDED
+/*
+ * MIGRATE_ASYNC means never block
+ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
+ *	on most operations but not ->writepage as the potential stall time
+ *	is too significant
+ * MIGRATE_SYNC will block when migrating pages
+ */
+enum migrate_mode {
+	MIGRATE_ASYNC,
+	MIGRATE_SYNC_LIGHT,
+	MIGRATE_SYNC,
+};
+
+#endif		/* MIGRATE_MODE_H_INCLUDED */
-- 
cgit v1.2.3


From cb78edfdcef5259ac9e9088bd63810d21299928d Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Fri, 20 Jan 2012 14:34:16 -0800
Subject: kdump: define KEXEC_NOTE_BYTES arch specific for s390x

kdump only allocates memory for the prstatus ELF note.  For s390x,
besides of prstatus multiple ELF notes for various different register
types are stored.  Therefore the currently allocated memory is not
sufficient.  With this patch the KEXEC_NOTE_BYTES macro can be defined
by architecture code and for s390x it is set to the correct size now.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/kexec.h | 18 ++++++++++++++++++
 include/linux/kexec.h         |  2 ++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index cf4e47b0948c..3f30dac804ea 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -42,6 +42,24 @@
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_S390
 
+/*
+ * Size for s390x ELF notes per CPU
+ *
+ * Seven notes plus zero note at the end: prstatus, fpregset, timer,
+ * tod_cmp, tod_reg, control regs, and prefix
+ */
+#define KEXEC_NOTE_BYTES \
+	(ALIGN(sizeof(struct elf_note), 4) * 8 + \
+	 ALIGN(sizeof("CORE"), 4) * 7 + \
+	 ALIGN(sizeof(struct elf_prstatus), 4) + \
+	 ALIGN(sizeof(elf_fpregset_t), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u32), 4) + \
+	 ALIGN(sizeof(u64) * 16, 4) + \
+	 ALIGN(sizeof(u32), 4) \
+	)
+
 /* Provide a dummy definition to avoid build failures. */
 static inline void crash_setup_regs(struct pt_regs *newregs,
 					struct pt_regs *oldregs) { }
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 2fa0901219d4..0d7d6a1b172f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -50,9 +50,11 @@
  * note header.  For kdump, the code in vmcore.c runs in the context
  * of the second kernel to combine them into one note.
  */
+#ifndef KEXEC_NOTE_BYTES
 #define KEXEC_NOTE_BYTES ( (KEXEC_NOTE_HEAD_BYTES * 2) +		\
 			    KEXEC_CORE_NOTE_NAME_BYTES +		\
 			    KEXEC_CORE_NOTE_DESC_BYTES )
+#endif
 
 /*
  * This structure is used to hold the arguments that are used when loading
-- 
cgit v1.2.3


From 245132643e1cfcd145bbc86a716c1818371fcb93 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 20 Jan 2012 14:34:21 -0800
Subject: SHM_UNLOCK: fix Unevictable pages stranded after swap

Commit cc39c6a9bbde ("mm: account skipped entries to avoid looping in
find_get_pages") correctly fixed an infinite loop; but left a problem
that find_get_pages() on shmem would return 0 (appearing to callers to
mean end of tree) when it meets a run of nr_pages swap entries.

The only uses of find_get_pages() on shmem are via pagevec_lookup(),
called from invalidate_mapping_pages(), and from shmctl SHM_UNLOCK's
scan_mapping_unevictable_pages().  The first is already commented, and
not worth worrying about; but the second can leave pages on the
Unevictable list after an unusual sequence of swapping and locking.

Fix that by using shmem_find_get_pages_and_swap() (then ignoring the
swap) instead of pagevec_lookup().

But I don't want to contaminate vmscan.c with shmem internals, nor
shmem.c with LRU locking.  So move scan_mapping_unevictable_pages() into
shmem.c, renaming it shmem_unlock_mapping(); and rename
check_move_unevictable_page() to check_move_unevictable_pages(), looping
down an array of pages, oftentimes under the same lock.

Leave out the "rotate unevictable list" block: that's a leftover from
when this was used for /proc/sys/vm/scan_unevictable_pages, whose flawed
handling involved looking at pages at tail of LRU.

Was there significance to the sequence first ClearPageUnevictable, then
test page_evictable, then SetPageUnevictable here? I think not, we're
under LRU lock, and have no barriers between those.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michel Lespinasse <walken@google.com>
Cc: <stable@vger.kernel.org> [back to 3.1 but will need respins]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h |   1 +
 include/linux/swap.h     |   2 +-
 ipc/shm.c                |   2 +-
 mm/shmem.c               |  46 +++++++++++++++--
 mm/vmscan.c              | 128 +++++++++++++++--------------------------------
 5 files changed, 83 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e4c711c6f321..79ab2555b3b0 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,6 +48,7 @@ extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern void shmem_unlock_mapping(struct address_space *mapping);
 extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 06061a7f8e69..3e60228e7299 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -273,7 +273,7 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 #endif
 
 extern int page_evictable(struct page *page, struct vm_area_struct *vma);
-extern void scan_mapping_unevictable_pages(struct address_space *);
+extern void check_move_unevictable_pages(struct page **, int nr_pages);
 
 extern unsigned long scan_unevictable_pages;
 extern int scan_unevictable_handler(struct ctl_table *, int,
diff --git a/ipc/shm.c b/ipc/shm.c
index 854ab58e5f6e..b76be5bda6c2 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -916,7 +916,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 		shp->mlock_user = NULL;
 		get_file(shm_file);
 		shm_unlock(shp);
-		scan_mapping_unevictable_pages(shm_file->f_mapping);
+		shmem_unlock_mapping(shm_file->f_mapping);
 		fput(shm_file);
 		goto out;
 	}
diff --git a/mm/shmem.c b/mm/shmem.c
index 4aaa53abe302..269d049294ab 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -379,7 +379,7 @@ static int shmem_free_swap(struct address_space *mapping,
 /*
  * Pagevec may contain swap entries, so shuffle up pages before releasing.
  */
-static void shmem_pagevec_release(struct pagevec *pvec)
+static void shmem_deswap_pagevec(struct pagevec *pvec)
 {
 	int i, j;
 
@@ -389,7 +389,36 @@ static void shmem_pagevec_release(struct pagevec *pvec)
 			pvec->pages[j++] = page;
 	}
 	pvec->nr = j;
-	pagevec_release(pvec);
+}
+
+/*
+ * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
+ */
+void shmem_unlock_mapping(struct address_space *mapping)
+{
+	struct pagevec pvec;
+	pgoff_t indices[PAGEVEC_SIZE];
+	pgoff_t index = 0;
+
+	pagevec_init(&pvec, 0);
+	/*
+	 * Minor point, but we might as well stop if someone else SHM_LOCKs it.
+	 */
+	while (!mapping_unevictable(mapping)) {
+		/*
+		 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
+		 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
+		 */
+		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+					PAGEVEC_SIZE, pvec.pages, indices);
+		if (!pvec.nr)
+			break;
+		index = indices[pvec.nr - 1] + 1;
+		shmem_deswap_pagevec(&pvec);
+		check_move_unevictable_pages(pvec.pages, pvec.nr);
+		pagevec_release(&pvec);
+		cond_resched();
+	}
 }
 
 /*
@@ -440,7 +469,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			}
 			unlock_page(page);
 		}
-		shmem_pagevec_release(&pvec);
+		shmem_deswap_pagevec(&pvec);
+		pagevec_release(&pvec);
 		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
@@ -470,7 +500,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			continue;
 		}
 		if (index == start && indices[0] > end) {
-			shmem_pagevec_release(&pvec);
+			shmem_deswap_pagevec(&pvec);
+			pagevec_release(&pvec);
 			break;
 		}
 		mem_cgroup_uncharge_start();
@@ -494,7 +525,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			}
 			unlock_page(page);
 		}
-		shmem_pagevec_release(&pvec);
+		shmem_deswap_pagevec(&pvec);
+		pagevec_release(&pvec);
 		mem_cgroup_uncharge_end();
 		index++;
 	}
@@ -2438,6 +2470,10 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	return 0;
 }
 
+void shmem_unlock_mapping(struct address_space *mapping)
+{
+}
+
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
 	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e097c1026b58..c52b23552659 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -26,7 +26,6 @@
 #include <linux/buffer_head.h>	/* for try_to_release_page(),
 					buffer_heads_over_limit */
 #include <linux/mm_inline.h>
-#include <linux/pagevec.h>
 #include <linux/backing-dev.h>
 #include <linux/rmap.h>
 #include <linux/topology.h>
@@ -661,7 +660,7 @@ redo:
 		 * When racing with an mlock or AS_UNEVICTABLE clearing
 		 * (page is unlocked) make sure that if the other thread
 		 * does not observe our setting of PG_lru and fails
-		 * isolation/check_move_unevictable_page,
+		 * isolation/check_move_unevictable_pages,
 		 * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
 		 * the page back to the evictable list.
 		 *
@@ -3501,107 +3500,58 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
 
 #ifdef CONFIG_SHMEM
 /**
- * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
- * @page: page to check evictability and move to appropriate lru list
- * @zone: zone page is in
+ * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
+ * @pages:	array of pages to check
+ * @nr_pages:	number of pages to check
  *
- * Checks a page for evictability and moves the page to the appropriate
- * zone lru list.
- *
- * Restrictions: zone->lru_lock must be held, page must be on LRU and must
- * have PageUnevictable set.
+ * Checks pages for evictability and moves them to the appropriate lru list.
  *
  * This function is only used for SysV IPC SHM_UNLOCK.
  */
-static void check_move_unevictable_page(struct page *page, struct zone *zone)
+void check_move_unevictable_pages(struct page **pages, int nr_pages)
 {
 	struct lruvec *lruvec;
+	struct zone *zone = NULL;
+	int pgscanned = 0;
+	int pgrescued = 0;
+	int i;
 
-	VM_BUG_ON(PageActive(page));
-retry:
-	ClearPageUnevictable(page);
-	if (page_evictable(page, NULL)) {
-		enum lru_list l = page_lru_base_type(page);
-
-		__dec_zone_state(zone, NR_UNEVICTABLE);
-		lruvec = mem_cgroup_lru_move_lists(zone, page,
-						   LRU_UNEVICTABLE, l);
-		list_move(&page->lru, &lruvec->lists[l]);
-		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
-		__count_vm_event(UNEVICTABLE_PGRESCUED);
-	} else {
-		/*
-		 * rotate unevictable list
-		 */
-		SetPageUnevictable(page);
-		lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE,
-						   LRU_UNEVICTABLE);
-		list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]);
-		if (page_evictable(page, NULL))
-			goto retry;
-	}
-}
-
-/**
- * scan_mapping_unevictable_pages - scan an address space for evictable pages
- * @mapping: struct address_space to scan for evictable pages
- *
- * Scan all pages in mapping.  Check unevictable pages for
- * evictability and move them to the appropriate zone lru list.
- *
- * This function is only used for SysV IPC SHM_UNLOCK.
- */
-void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
-	pgoff_t next = 0;
-	pgoff_t end   = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
-			 PAGE_CACHE_SHIFT;
-	struct zone *zone;
-	struct pagevec pvec;
-
-	if (mapping->nrpages == 0)
-		return;
-
-	pagevec_init(&pvec, 0);
-	while (next < end &&
-		pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
-		int i;
-		int pg_scanned = 0;
-
-		zone = NULL;
-
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-			pgoff_t page_index = page->index;
-			struct zone *pagezone = page_zone(page);
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page = pages[i];
+		struct zone *pagezone;
 
-			pg_scanned++;
-			if (page_index > next)
-				next = page_index;
-			next++;
+		pgscanned++;
+		pagezone = page_zone(page);
+		if (pagezone != zone) {
+			if (zone)
+				spin_unlock_irq(&zone->lru_lock);
+			zone = pagezone;
+			spin_lock_irq(&zone->lru_lock);
+		}
 
-			if (pagezone != zone) {
-				if (zone)
-					spin_unlock_irq(&zone->lru_lock);
-				zone = pagezone;
-				spin_lock_irq(&zone->lru_lock);
-			}
+		if (!PageLRU(page) || !PageUnevictable(page))
+			continue;
 
-			if (PageLRU(page) && PageUnevictable(page))
-				check_move_unevictable_page(page, zone);
+		if (page_evictable(page, NULL)) {
+			enum lru_list lru = page_lru_base_type(page);
+
+			VM_BUG_ON(PageActive(page));
+			ClearPageUnevictable(page);
+			__dec_zone_state(zone, NR_UNEVICTABLE);
+			lruvec = mem_cgroup_lru_move_lists(zone, page,
+						LRU_UNEVICTABLE, lru);
+			list_move(&page->lru, &lruvec->lists[lru]);
+			__inc_zone_state(zone, NR_INACTIVE_ANON + lru);
+			pgrescued++;
 		}
-		if (zone)
-			spin_unlock_irq(&zone->lru_lock);
-		pagevec_release(&pvec);
+	}
 
-		count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
-		cond_resched();
+	if (zone) {
+		__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+		__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+		spin_unlock_irq(&zone->lru_lock);
 	}
 }
-#else
-void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
-}
 #endif /* CONFIG_SHMEM */
 
 static void warn_scan_unevictable_pages(void)
-- 
cgit v1.2.3


From 2eda013f4894bc200124f791a56c4defb613a0cc Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:51 -0800
Subject: kernel-doc: fix new warnings in device.h

Fix new kernel-doc warnings:

Warning(include/linux/device.h:299): No description found for parameter 'name'
Warning(include/linux/device.h:299): No description found for parameter 'subsys'
Warning(include/linux/device.h:299): No description found for parameter 'node'
Warning(include/linux/device.h:299): No description found for parameter 'add_dev'
Warning(include/linux/device.h:299): No description found for parameter 'remove_dev'
Warning(include/linux/device.h:685): No description found for parameter 'id'
Warning(include/linux/device.h:1009): No description found for parameter '__driver'
Warning(include/linux/device.h:1009): No description found for parameter '__register'
Warning(include/linux/device.h:1009): No description found for parameter '__unregister'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/device.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 5b3adb8f9588..b63fb393aa58 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -279,11 +279,11 @@ struct device *driver_find_device(struct device_driver *drv,
 
 /**
  * struct subsys_interface - interfaces to device functions
- * @name        name of the device function
- * @subsystem   subsytem of the devices to attach to
- * @node        the list of functions registered at the subsystem
- * @add         device hookup to device function handler
- * @remove      device hookup to device function handler
+ * @name:       name of the device function
+ * @subsys:     subsytem of the devices to attach to
+ * @node:       the list of functions registered at the subsystem
+ * @add_dev:    device hookup to device function handler
+ * @remove_dev: device hookup to device function handler
  *
  * Simple interfaces attached to a subsystem. Multiple interfaces can
  * attach to a subsystem and its devices. Unlike drivers, they do not
@@ -612,6 +612,7 @@ struct device_dma_parameters {
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @devt:	For creating the sysfs "dev".
+ * @id:		device instance
  * @devres_lock: Spinlock to protect the resource of the device.
  * @devres_head: The resources list of the device.
  * @knode_class: The node used to add the device to the class list.
@@ -1003,6 +1004,10 @@ extern long sysfs_deprecated;
  * Each module may only use this macro once, and calling it replaces
  * module_init() and module_exit().
  *
+ * @__driver: driver name
+ * @__register: register function for this driver type
+ * @__unregister: unregister function for this driver type
+ *
  * Use this macro to construct bus specific macros for registering
  * drivers, and do not use it on its own.
  */
-- 
cgit v1.2.3


From 4d922612df8bd1202a1f51d95b78aca3d67302cd Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:56 -0800
Subject: kernel-doc: fix new warning in usb.h

Fix new kernel-doc warning:

Warning(include/linux/usb.h:1251): No description found for parameter 'num_mapped_sgs'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 27a4e16d2bf1..69d845739bc2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1073,6 +1073,7 @@ typedef void (*usb_complete_t)(struct urb *);
  *	which the host controller driver should use in preference to the
  *	transfer_buffer.
  * @sg: scatter gather buffer list
+ * @num_mapped_sgs: (internal) number of mapped sg entries
  * @num_sgs: number of entries in the sg list
  * @transfer_buffer_length: How big is transfer_buffer.  The transfer may
  *	be broken up into chunks according to the current maximum packet
-- 
cgit v1.2.3


From fa757281a08799fd6c0f7ec6f111d1cd66afc97b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:03:13 -0800
Subject: kernel-doc: fix kernel-doc warnings in sched

Fix new kernel-doc notation warnings:

Warning(include/linux/sched.h:2094): No description found for parameter 'p'
Warning(include/linux/sched.h:2094): Excess function parameter 'tsk' description in 'is_idle_task'
Warning(kernel/sched/cpupri.c:139): No description found for parameter 'newpri'
Warning(kernel/sched/cpupri.c:139): Excess function parameter 'pri' description in 'cpupri_set'
Warning(kernel/sched/cpupri.c:208): Excess function parameter 'bootmem' description in 'cpupri_init'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc:	Ingo Molnar <mingo@elte.hu>
Cc:	Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 kernel/sched/cpupri.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4032ec1cf836..513f52459872 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2088,7 +2088,7 @@ extern int sched_setscheduler_nocheck(struct task_struct *, int,
 extern struct task_struct *idle_task(int cpu);
 /**
  * is_idle_task - is the specified task an idle task?
- * @tsk: the task in question.
+ * @p: the task in question.
  */
 static inline bool is_idle_task(struct task_struct *p)
 {
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index b0d798eaf130..d72586fdf660 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -129,7 +129,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
  * cpupri_set - update the cpu priority setting
  * @cp: The cpupri context
  * @cpu: The target cpu
- * @pri: The priority (INVALID-RT99) to assign to this CPU
+ * @newpri: The priority (INVALID-RT99) to assign to this CPU
  *
  * Note: Assumes cpu_rq(cpu)->lock is locked
  *
@@ -200,7 +200,6 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 /**
  * cpupri_init - initialize the cpupri structure
  * @cp: The cpupri context
- * @bootmem: true if allocations need to use bootmem
  *
  * Returns: -ENOMEM if memory fails.
  */
-- 
cgit v1.2.3


From c1aab02dac690af7ff634d8e1cb3be6a04387eef Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 24 Jan 2012 11:41:32 +1100
Subject: migrate_mode.h is not exported to user mode

so move its include into fs.h inside the __KERNEL__ protection.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b3a41fe22bf..386da09f229d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,7 +10,6 @@
 #include <linux/ioctl.h>
 #include <linux/blk_types.h>
 #include <linux/types.h>
-#include <linux/migrate_mode.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -397,6 +396,7 @@ struct inodes_stat_t {
 #include <linux/rculist_bl.h>
 #include <linux/atomic.h>
 #include <linux/shrinker.h>
+#include <linux/migrate_mode.h>
 
 #include <asm/byteorder.h>
 
-- 
cgit v1.2.3


From c1084a56da255ef5385c0f587e16fdc225a5460f Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 21 Dec 2011 10:19:38 +0200
Subject: usb: otg: kill langwell_otg driver

The way this driver was added by f0ae849 (usb: Add Intel Langwell USB
OTG Transceiver Driver) never even compiled together with langwell_udc,
and that's the only way for it to be useful.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: stable@vger.kernel.org # v2.6.31+
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Alan Cox <alan@linux.intel.com>
Cc: linux-usb@vger.kernel.org
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/otg/Kconfig          |   14 -
 drivers/usb/otg/Makefile         |    1 -
 drivers/usb/otg/langwell_otg.c   | 2347 --------------------------------------
 include/linux/usb/langwell_otg.h |  139 ---
 4 files changed, 2501 deletions(-)
 delete mode 100644 drivers/usb/otg/langwell_otg.c
 delete mode 100644 include/linux/usb/langwell_otg.h

(limited to 'include/linux')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index 2a25955881fc..9105c285f594 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -86,20 +86,6 @@ config NOP_USB_XCEIV
 	  built-in with usb ip or which are autonomous and doesn't require any
 	  phy programming such as ISP1x04 etc.
 
-config USB_LANGWELL_OTG
-	tristate "Intel Langwell USB OTG dual-role support"
-	depends on USB && PCI && INTEL_SCU_IPC
-	select USB_OTG
-	select USB_OTG_UTILS
-	help
-	  Say Y here if you want to build Intel Langwell USB OTG
-	  transciever driver in kernel. This driver implements role
-	  switch between EHCI host driver and Langwell USB OTG
-	  client driver.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called langwell_otg.
-
 config USB_MSM_OTG
 	tristate "OTG support for Qualcomm on-chip USB controller"
 	depends on (USB || USB_GADGET) && ARCH_MSM
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index b2c5a9598637..41aa5098b139 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_USB_GPIO_VBUS)	+= gpio_vbus.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
 obj-$(CONFIG_TWL4030_USB)	+= twl4030-usb.o
 obj-$(CONFIG_TWL6030_USB)	+= twl6030-usb.o
-obj-$(CONFIG_USB_LANGWELL_OTG)	+= langwell_otg.o
 obj-$(CONFIG_NOP_USB_XCEIV)	+= nop-usb-xceiv.o
 obj-$(CONFIG_USB_ULPI)		+= ulpi.o
 obj-$(CONFIG_USB_ULPI_VIEWPORT)	+= ulpi_viewport.o
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
deleted file mode 100644
index f08f784086f7..000000000000
--- a/drivers/usb/otg/langwell_otg.c
+++ /dev/null
@@ -1,2347 +0,0 @@
-/*
- * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008 - 2010, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-/* This driver helps to switch Langwell OTG controller function between host
- * and peripheral. It works with EHCI driver and Langwell client controller
- * driver together.
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/moduleparam.h>
-#include <linux/usb/ch9.h>
-#include <linux/usb/gadget.h>
-#include <linux/usb.h>
-#include <linux/usb/otg.h>
-#include <linux/usb/hcd.h>
-#include <linux/notifier.h>
-#include <linux/delay.h>
-#include <asm/intel_scu_ipc.h>
-
-#include <linux/usb/langwell_otg.h>
-
-#define	DRIVER_DESC		"Intel Langwell USB OTG transceiver driver"
-#define	DRIVER_VERSION		"July 10, 2010"
-
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_AUTHOR("Henry Yuan <hang.yuan@intel.com>, Hao Wu <hao.wu@intel.com>");
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
-
-static const char driver_name[] = "langwell_otg";
-
-static int langwell_otg_probe(struct pci_dev *pdev,
-			const struct pci_device_id *id);
-static void langwell_otg_remove(struct pci_dev *pdev);
-static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message);
-static int langwell_otg_resume(struct pci_dev *pdev);
-
-static int langwell_otg_set_host(struct otg_transceiver *otg,
-				struct usb_bus *host);
-static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
-				struct usb_gadget *gadget);
-static int langwell_otg_start_srp(struct otg_transceiver *otg);
-
-static const struct pci_device_id pci_ids[] = {{
-	.class =        ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
-	.class_mask =   ~0,
-	.vendor =	0x8086,
-	.device =	0x0811,
-	.subvendor =	PCI_ANY_ID,
-	.subdevice =	PCI_ANY_ID,
-}, { /* end: all zeroes */ }
-};
-
-static struct pci_driver otg_pci_driver = {
-	.name =		(char *) driver_name,
-	.id_table =	pci_ids,
-
-	.probe =	langwell_otg_probe,
-	.remove =	langwell_otg_remove,
-
-	.suspend =	langwell_otg_suspend,
-	.resume =	langwell_otg_resume,
-};
-
-/* HSM timers */
-static inline struct langwell_otg_timer *otg_timer_initializer
-(void (*function)(unsigned long), unsigned long expires, unsigned long data)
-{
-	struct langwell_otg_timer *timer;
-	timer = kmalloc(sizeof(struct langwell_otg_timer), GFP_KERNEL);
-	if (timer == NULL)
-		return timer;
-
-	timer->function = function;
-	timer->expires = expires;
-	timer->data = data;
-	return timer;
-}
-
-static struct langwell_otg_timer *a_wait_vrise_tmr, *a_aidl_bdis_tmr,
-	*b_se0_srp_tmr, *b_srp_init_tmr;
-
-static struct list_head active_timers;
-
-static struct langwell_otg *the_transceiver;
-
-/* host/client notify transceiver when event affects HNP state */
-void langwell_update_transceiver(void)
-{
-	struct langwell_otg *lnw = the_transceiver;
-
-	dev_dbg(lnw->dev, "transceiver is updated\n");
-
-	if (!lnw->qwork)
-		return ;
-
-	queue_work(lnw->qwork, &lnw->work);
-}
-EXPORT_SYMBOL(langwell_update_transceiver);
-
-static int langwell_otg_set_host(struct otg_transceiver *otg,
-					struct usb_bus *host)
-{
-	otg->host = host;
-
-	return 0;
-}
-
-static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
-					struct usb_gadget *gadget)
-{
-	otg->gadget = gadget;
-
-	return 0;
-}
-
-static int langwell_otg_set_power(struct otg_transceiver *otg,
-				unsigned mA)
-{
-	return 0;
-}
-
-/* A-device drives vbus, controlled through IPC commands */
-static int langwell_otg_set_vbus(struct otg_transceiver *otg, bool enabled)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	u8				sub_id;
-
-	dev_dbg(lnw->dev, "%s <--- %s\n", __func__, enabled ? "on" : "off");
-
-	if (enabled)
-		sub_id = 0x8; /* Turn on the VBus */
-	else
-		sub_id = 0x9; /* Turn off the VBus */
-
-	if (intel_scu_ipc_simple_command(0xef, sub_id)) {
-		dev_dbg(lnw->dev, "Failed to set Vbus via IPC commands\n");
-		return -EBUSY;
-	}
-
-	dev_dbg(lnw->dev, "%s --->\n", __func__);
-
-	return 0;
-}
-
-/* charge vbus or discharge vbus through a resistor to ground */
-static void langwell_otg_chrg_vbus(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	u32	val;
-
-	val = readl(lnw->iotg.base + CI_OTGSC);
-
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VC,
-				lnw->iotg.base + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VD,
-				lnw->iotg.base + CI_OTGSC);
-}
-
-/* Start SRP */
-static int langwell_otg_start_srp(struct otg_transceiver *otg)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s --->\n", __func__);
-
-	val = readl(iotg->base + CI_OTGSC);
-
-	writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HADP,
-				iotg->base + CI_OTGSC);
-
-	/* Check if the data plus is finished or not */
-	msleep(8);
-	val = readl(iotg->base + CI_OTGSC);
-	if (val & (OTGSC_HADP | OTGSC_DP))
-		dev_dbg(lnw->dev, "DataLine SRP Error\n");
-
-	/* Disable interrupt - b_sess_vld */
-	val = readl(iotg->base + CI_OTGSC);
-	val &= (~(OTGSC_BSVIE | OTGSC_BSEIE));
-	writel(val, iotg->base + CI_OTGSC);
-
-	/* Start VBus SRP, drive vbus to generate VBus pulse */
-	iotg->otg.set_vbus(&iotg->otg, true);
-	msleep(15);
-	iotg->otg.set_vbus(&iotg->otg, false);
-
-	/* Enable interrupt - b_sess_vld*/
-	val = readl(iotg->base + CI_OTGSC);
-	dev_dbg(lnw->dev, "after VBUS pulse otgsc = %x\n", val);
-
-	val |= (OTGSC_BSVIE | OTGSC_BSEIE);
-	writel(val, iotg->base + CI_OTGSC);
-
-	/* If Vbus is valid, then update the hsm */
-	if (val & OTGSC_BSV) {
-		dev_dbg(lnw->dev, "no b_sess_vld interrupt\n");
-
-		lnw->iotg.hsm.b_sess_vld = 1;
-		langwell_update_transceiver();
-	}
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-	return 0;
-}
-
-/* stop SOF via bus_suspend */
-static void langwell_otg_loc_sof(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	struct usb_hcd		*hcd;
-	int			err;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "suspend" : "resume");
-
-	hcd = bus_to_hcd(lnw->iotg.otg.host);
-	if (on)
-		err = hcd->driver->bus_resume(hcd);
-	else
-		err = hcd->driver->bus_suspend(hcd);
-
-	if (err)
-		dev_dbg(lnw->dev, "Fail to resume/suspend USB bus - %d\n", err);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-static int langwell_otg_check_otgsc(void)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	u32				otgsc, usbcfg;
-
-	dev_dbg(lnw->dev, "check sync OTGSC and USBCFG registers\n");
-
-	otgsc = readl(lnw->iotg.base + CI_OTGSC);
-	usbcfg = readl(lnw->usbcfg);
-
-	dev_dbg(lnw->dev, "OTGSC = %08x, USBCFG = %08x\n",
-					otgsc, usbcfg);
-	dev_dbg(lnw->dev, "OTGSC_AVV = %d\n", !!(otgsc & OTGSC_AVV));
-	dev_dbg(lnw->dev, "USBCFG.VBUSVAL = %d\n",
-					!!(usbcfg & USBCFG_VBUSVAL));
-	dev_dbg(lnw->dev, "OTGSC_ASV = %d\n", !!(otgsc & OTGSC_ASV));
-	dev_dbg(lnw->dev, "USBCFG.AVALID = %d\n",
-					!!(usbcfg & USBCFG_AVALID));
-	dev_dbg(lnw->dev, "OTGSC_BSV = %d\n", !!(otgsc & OTGSC_BSV));
-	dev_dbg(lnw->dev, "USBCFG.BVALID = %d\n",
-					!!(usbcfg & USBCFG_BVALID));
-	dev_dbg(lnw->dev, "OTGSC_BSE = %d\n", !!(otgsc & OTGSC_BSE));
-	dev_dbg(lnw->dev, "USBCFG.SESEND = %d\n",
-					!!(usbcfg & USBCFG_SESEND));
-
-	/* Check USBCFG VBusValid/AValid/BValid/SessEnd */
-	if (!!(otgsc & OTGSC_AVV) ^ !!(usbcfg & USBCFG_VBUSVAL)) {
-		dev_dbg(lnw->dev, "OTGSC.AVV != USBCFG.VBUSVAL\n");
-		goto err;
-	}
-	if (!!(otgsc & OTGSC_ASV) ^ !!(usbcfg & USBCFG_AVALID)) {
-		dev_dbg(lnw->dev, "OTGSC.ASV != USBCFG.AVALID\n");
-		goto err;
-	}
-	if (!!(otgsc & OTGSC_BSV) ^ !!(usbcfg & USBCFG_BVALID)) {
-		dev_dbg(lnw->dev, "OTGSC.BSV != USBCFG.BVALID\n");
-		goto err;
-	}
-	if (!!(otgsc & OTGSC_BSE) ^ !!(usbcfg & USBCFG_SESEND)) {
-		dev_dbg(lnw->dev, "OTGSC.BSE != USBCFG.SESSEN\n");
-		goto err;
-	}
-
-	dev_dbg(lnw->dev, "OTGSC and USBCFG are synced\n");
-
-	return 0;
-
-err:
-	dev_warn(lnw->dev, "OTGSC isn't equal to USBCFG\n");
-	return -EPIPE;
-}
-
-
-static void langwell_otg_phy_low_power(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u8				val, phcd;
-	int				retval;
-
-	dev_dbg(lnw->dev, "%s ---> %s mode\n",
-			__func__, on ? "Low power" : "Normal");
-
-	phcd = 0x40;
-
-	val = readb(iotg->base + CI_HOSTPC1 + 2);
-
-	if (on) {
-		/* Due to hardware issue, after set PHCD, sync will failed
-		 * between USBCFG and OTGSC, so before set PHCD, check if
-		 * sync is in process now. If the answer is "yes", then do
-		 * not touch PHCD bit */
-		retval = langwell_otg_check_otgsc();
-		if (retval) {
-			dev_dbg(lnw->dev, "Skip PHCD programming..\n");
-			return ;
-		}
-
-		writeb(val | phcd, iotg->base + CI_HOSTPC1 + 2);
-	} else
-		writeb(val & ~phcd, iotg->base + CI_HOSTPC1 + 2);
-
-	dev_dbg(lnw->dev, "%s <--- done\n", __func__);
-}
-
-/* After drv vbus, add 5 ms delay to set PHCD */
-static void langwell_otg_phy_low_power_wait(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-
-	dev_dbg(lnw->dev, "add 5ms delay before programing PHCD\n");
-
-	mdelay(5);
-	langwell_otg_phy_low_power(on);
-}
-
-/* Enable/Disable OTG interrupt */
-static void langwell_otg_intr(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
-
-	val = readl(iotg->base + CI_OTGSC);
-
-	/* OTGSC_INT_MASK doesn't contains 1msInt */
-	if (on) {
-		val = val | (OTGSC_INT_MASK);
-		writel(val, iotg->base + CI_OTGSC);
-	} else {
-		val = val & ~(OTGSC_INT_MASK);
-		writel(val, iotg->base + CI_OTGSC);
-	}
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-/* set HAAR: Hardware Assist Auto-Reset */
-static void langwell_otg_HAAR(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
-
-	val = readl(iotg->base + CI_OTGSC);
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HAAR,
-					iotg->base + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HAAR,
-					iotg->base + CI_OTGSC);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-/* set HABA: Hardware Assist B-Disconnect to A-Connect */
-static void langwell_otg_HABA(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
-
-	val = readl(iotg->base + CI_OTGSC);
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HABA,
-					iotg->base + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HABA,
-					iotg->base + CI_OTGSC);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-static int langwell_otg_check_se0_srp(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	int			delay_time = TB_SE0_SRP * 10;
-	u32			val;
-
-	dev_dbg(lnw->dev, "%s --->\n", __func__);
-
-	do {
-		udelay(100);
-		if (!delay_time--)
-			break;
-		val = readl(lnw->iotg.base + CI_PORTSC1);
-		val &= PORTSC_LS;
-	} while (!val);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-	return val;
-}
-
-/* The timeout callback function to set time out bit */
-static void set_tmout(unsigned long indicator)
-{
-	*(int *)indicator = 1;
-}
-
-void langwell_otg_nsf_msg(unsigned long indicator)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-
-	switch (indicator) {
-	case 2:
-	case 4:
-	case 6:
-	case 7:
-		dev_warn(lnw->dev,
-			"OTG:NSF-%lu - deivce not responding\n", indicator);
-		break;
-	case 3:
-		dev_warn(lnw->dev,
-			"OTG:NSF-%lu - deivce not supported\n", indicator);
-		break;
-	default:
-		dev_warn(lnw->dev, "Do not have this kind of NSF\n");
-		break;
-	}
-}
-
-/* Initialize timers */
-static int langwell_otg_init_timers(struct otg_hsm *hsm)
-{
-	/* HSM used timers */
-	a_wait_vrise_tmr = otg_timer_initializer(&set_tmout, TA_WAIT_VRISE,
-				(unsigned long)&hsm->a_wait_vrise_tmout);
-	if (a_wait_vrise_tmr == NULL)
-		return -ENOMEM;
-	a_aidl_bdis_tmr = otg_timer_initializer(&set_tmout, TA_AIDL_BDIS,
-				(unsigned long)&hsm->a_aidl_bdis_tmout);
-	if (a_aidl_bdis_tmr == NULL)
-		return -ENOMEM;
-	b_se0_srp_tmr = otg_timer_initializer(&set_tmout, TB_SE0_SRP,
-				(unsigned long)&hsm->b_se0_srp);
-	if (b_se0_srp_tmr == NULL)
-		return -ENOMEM;
-	b_srp_init_tmr = otg_timer_initializer(&set_tmout, TB_SRP_INIT,
-				(unsigned long)&hsm->b_srp_init_tmout);
-	if (b_srp_init_tmr == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Free timers */
-static void langwell_otg_free_timers(void)
-{
-	kfree(a_wait_vrise_tmr);
-	kfree(a_aidl_bdis_tmr);
-	kfree(b_se0_srp_tmr);
-	kfree(b_srp_init_tmr);
-}
-
-/* The timeout callback function to set time out bit */
-static void langwell_otg_timer_fn(unsigned long indicator)
-{
-	struct langwell_otg *lnw = the_transceiver;
-
-	*(int *)indicator = 1;
-
-	dev_dbg(lnw->dev, "kernel timer - timeout\n");
-
-	langwell_update_transceiver();
-}
-
-/* kernel timer used instead of HW based interrupt */
-static void langwell_otg_add_ktimer(enum langwell_otg_timer_type timers)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	unsigned long		j = jiffies;
-	unsigned long		data, time;
-
-	switch (timers) {
-	case TA_WAIT_VRISE_TMR:
-		iotg->hsm.a_wait_vrise_tmout = 0;
-		data = (unsigned long)&iotg->hsm.a_wait_vrise_tmout;
-		time = TA_WAIT_VRISE;
-		break;
-	case TA_WAIT_BCON_TMR:
-		iotg->hsm.a_wait_bcon_tmout = 0;
-		data = (unsigned long)&iotg->hsm.a_wait_bcon_tmout;
-		time = TA_WAIT_BCON;
-		break;
-	case TA_AIDL_BDIS_TMR:
-		iotg->hsm.a_aidl_bdis_tmout = 0;
-		data = (unsigned long)&iotg->hsm.a_aidl_bdis_tmout;
-		time = TA_AIDL_BDIS;
-		break;
-	case TB_ASE0_BRST_TMR:
-		iotg->hsm.b_ase0_brst_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_ase0_brst_tmout;
-		time = TB_ASE0_BRST;
-		break;
-	case TB_SRP_INIT_TMR:
-		iotg->hsm.b_srp_init_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_srp_init_tmout;
-		time = TB_SRP_INIT;
-		break;
-	case TB_SRP_FAIL_TMR:
-		iotg->hsm.b_srp_fail_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_srp_fail_tmout;
-		time = TB_SRP_FAIL;
-		break;
-	case TB_BUS_SUSPEND_TMR:
-		iotg->hsm.b_bus_suspend_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_bus_suspend_tmout;
-		time = TB_BUS_SUSPEND;
-		break;
-	default:
-		dev_dbg(lnw->dev, "unknown timer, cannot enable it\n");
-		return;
-	}
-
-	lnw->hsm_timer.data = data;
-	lnw->hsm_timer.function = langwell_otg_timer_fn;
-	lnw->hsm_timer.expires = j + time * HZ / 1000; /* milliseconds */
-
-	add_timer(&lnw->hsm_timer);
-
-	dev_dbg(lnw->dev, "add timer successfully\n");
-}
-
-/* Add timer to timer list */
-static void langwell_otg_add_timer(void *gtimer)
-{
-	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
-	struct langwell_otg_timer *tmp_timer;
-	struct intel_mid_otg_xceiv *iotg = &the_transceiver->iotg;
-	u32	val32;
-
-	/* Check if the timer is already in the active list,
-	 * if so update timer count
-	 */
-	list_for_each_entry(tmp_timer, &active_timers, list)
-		if (tmp_timer == timer) {
-			timer->count = timer->expires;
-			return;
-		}
-	timer->count = timer->expires;
-
-	if (list_empty(&active_timers)) {
-		val32 = readl(iotg->base + CI_OTGSC);
-		writel(val32 | OTGSC_1MSE, iotg->base + CI_OTGSC);
-	}
-
-	list_add_tail(&timer->list, &active_timers);
-}
-
-/* Remove timer from the timer list; clear timeout status */
-static void langwell_otg_del_timer(void *gtimer)
-{
-	struct langwell_otg *lnw = the_transceiver;
-	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
-	struct langwell_otg_timer *tmp_timer, *del_tmp;
-	u32 val32;
-
-	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list)
-		if (tmp_timer == timer)
-			list_del(&timer->list);
-
-	if (list_empty(&active_timers)) {
-		val32 = readl(lnw->iotg.base + CI_OTGSC);
-		writel(val32 & ~OTGSC_1MSE, lnw->iotg.base + CI_OTGSC);
-	}
-}
-
-/* Reduce timer count by 1, and find timeout conditions.*/
-static int langwell_otg_tick_timer(u32 *int_sts)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	struct langwell_otg_timer *tmp_timer, *del_tmp;
-	int expired = 0;
-
-	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list) {
-		tmp_timer->count--;
-		/* check if timer expires */
-		if (!tmp_timer->count) {
-			list_del(&tmp_timer->list);
-			tmp_timer->function(tmp_timer->data);
-			expired = 1;
-		}
-	}
-
-	if (list_empty(&active_timers)) {
-		dev_dbg(lnw->dev, "tick timer: disable 1ms int\n");
-		*int_sts = *int_sts & ~OTGSC_1MSE;
-	}
-	return expired;
-}
-
-static void reset_otg(void)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	int			delay_time = 1000;
-	u32			val;
-
-	dev_dbg(lnw->dev, "reseting OTG controller ...\n");
-	val = readl(lnw->iotg.base + CI_USBCMD);
-	writel(val | USBCMD_RST, lnw->iotg.base + CI_USBCMD);
-	do {
-		udelay(100);
-		if (!delay_time--)
-			dev_dbg(lnw->dev, "reset timeout\n");
-		val = readl(lnw->iotg.base + CI_USBCMD);
-		val &= USBCMD_RST;
-	} while (val != 0);
-	dev_dbg(lnw->dev, "reset done.\n");
-}
-
-static void set_host_mode(void)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	u32			val;
-
-	reset_otg();
-	val = readl(lnw->iotg.base + CI_USBMODE);
-	val = (val & (~USBMODE_CM)) | USBMODE_HOST;
-	writel(val, lnw->iotg.base + CI_USBMODE);
-}
-
-static void set_client_mode(void)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	u32			val;
-
-	reset_otg();
-	val = readl(lnw->iotg.base + CI_USBMODE);
-	val = (val & (~USBMODE_CM)) | USBMODE_DEVICE;
-	writel(val, lnw->iotg.base + CI_USBMODE);
-}
-
-static void init_hsm(void)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val32;
-
-	/* read OTGSC after reset */
-	val32 = readl(lnw->iotg.base + CI_OTGSC);
-	dev_dbg(lnw->dev, "%s: OTGSC init value = 0x%x\n", __func__, val32);
-
-	/* set init state */
-	if (val32 & OTGSC_ID) {
-		iotg->hsm.id = 1;
-		iotg->otg.default_a = 0;
-		set_client_mode();
-		iotg->otg.state = OTG_STATE_B_IDLE;
-	} else {
-		iotg->hsm.id = 0;
-		iotg->otg.default_a = 1;
-		set_host_mode();
-		iotg->otg.state = OTG_STATE_A_IDLE;
-	}
-
-	/* set session indicator */
-	if (val32 & OTGSC_BSE)
-		iotg->hsm.b_sess_end = 1;
-	if (val32 & OTGSC_BSV)
-		iotg->hsm.b_sess_vld = 1;
-	if (val32 & OTGSC_ASV)
-		iotg->hsm.a_sess_vld = 1;
-	if (val32 & OTGSC_AVV)
-		iotg->hsm.a_vbus_vld = 1;
-
-	/* defautly power the bus */
-	iotg->hsm.a_bus_req = 1;
-	iotg->hsm.a_bus_drop = 0;
-	/* defautly don't request bus as B device */
-	iotg->hsm.b_bus_req = 0;
-	/* no system error */
-	iotg->hsm.a_clr_err = 0;
-
-	langwell_otg_phy_low_power_wait(1);
-}
-
-static void update_hsm(void)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val32;
-
-	/* read OTGSC */
-	val32 = readl(lnw->iotg.base + CI_OTGSC);
-	dev_dbg(lnw->dev, "%s: OTGSC value = 0x%x\n", __func__, val32);
-
-	iotg->hsm.id = !!(val32 & OTGSC_ID);
-	iotg->hsm.b_sess_end = !!(val32 & OTGSC_BSE);
-	iotg->hsm.b_sess_vld = !!(val32 & OTGSC_BSV);
-	iotg->hsm.a_sess_vld = !!(val32 & OTGSC_ASV);
-	iotg->hsm.a_vbus_vld = !!(val32 & OTGSC_AVV);
-}
-
-static irqreturn_t otg_dummy_irq(int irq, void *_dev)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	void __iomem		*reg_base = _dev;
-	u32			val;
-	u32			int_mask = 0;
-
-	val = readl(reg_base + CI_USBMODE);
-	if ((val & USBMODE_CM) != USBMODE_DEVICE)
-		return IRQ_NONE;
-
-	val = readl(reg_base + CI_USBSTS);
-	int_mask = val & INTR_DUMMY_MASK;
-
-	if (int_mask == 0)
-		return IRQ_NONE;
-
-	/* clear hsm.b_conn here since host driver can't detect it
-	*  otg_dummy_irq called means B-disconnect happened.
-	*/
-	if (lnw->iotg.hsm.b_conn) {
-		lnw->iotg.hsm.b_conn = 0;
-		if (spin_trylock(&lnw->wq_lock)) {
-			langwell_update_transceiver();
-			spin_unlock(&lnw->wq_lock);
-		}
-	}
-
-	/* Clear interrupts */
-	writel(int_mask, reg_base + CI_USBSTS);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t otg_irq(int irq, void *_dev)
-{
-	struct langwell_otg		*lnw = _dev;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				int_sts, int_en;
-	u32				int_mask = 0;
-	int				flag = 0;
-
-	int_sts = readl(lnw->iotg.base + CI_OTGSC);
-	int_en = (int_sts & OTGSC_INTEN_MASK) >> 8;
-	int_mask = int_sts & int_en;
-	if (int_mask == 0)
-		return IRQ_NONE;
-
-	if (int_mask & OTGSC_IDIS) {
-		dev_dbg(lnw->dev, "%s: id change int\n", __func__);
-		iotg->hsm.id = (int_sts & OTGSC_ID) ? 1 : 0;
-		dev_dbg(lnw->dev, "id = %d\n", iotg->hsm.id);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_DPIS) {
-		dev_dbg(lnw->dev, "%s: data pulse int\n", __func__);
-		iotg->hsm.a_srp_det = (int_sts & OTGSC_DPS) ? 1 : 0;
-		dev_dbg(lnw->dev, "data pulse = %d\n", iotg->hsm.a_srp_det);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_BSEIS) {
-		dev_dbg(lnw->dev, "%s: b session end int\n", __func__);
-		iotg->hsm.b_sess_end = (int_sts & OTGSC_BSE) ? 1 : 0;
-		dev_dbg(lnw->dev, "b_sess_end = %d\n", iotg->hsm.b_sess_end);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_BSVIS) {
-		dev_dbg(lnw->dev, "%s: b session valid int\n", __func__);
-		iotg->hsm.b_sess_vld = (int_sts & OTGSC_BSV) ? 1 : 0;
-		dev_dbg(lnw->dev, "b_sess_vld = %d\n", iotg->hsm.b_sess_end);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_ASVIS) {
-		dev_dbg(lnw->dev, "%s: a session valid int\n", __func__);
-		iotg->hsm.a_sess_vld = (int_sts & OTGSC_ASV) ? 1 : 0;
-		dev_dbg(lnw->dev, "a_sess_vld = %d\n", iotg->hsm.a_sess_vld);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_AVVIS) {
-		dev_dbg(lnw->dev, "%s: a vbus valid int\n", __func__);
-		iotg->hsm.a_vbus_vld = (int_sts & OTGSC_AVV) ? 1 : 0;
-		dev_dbg(lnw->dev, "a_vbus_vld = %d\n", iotg->hsm.a_vbus_vld);
-		flag = 1;
-	}
-
-	if (int_mask & OTGSC_1MSS) {
-		/* need to schedule otg_work if any timer is expired */
-		if (langwell_otg_tick_timer(&int_sts))
-			flag = 1;
-	}
-
-	writel((int_sts & ~OTGSC_INTSTS_MASK) | int_mask,
-					lnw->iotg.base + CI_OTGSC);
-	if (flag)
-		langwell_update_transceiver();
-
-	return IRQ_HANDLED;
-}
-
-static int langwell_otg_iotg_notify(struct notifier_block *nb,
-				unsigned long action, void *data)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = data;
-	int				flag = 0;
-
-	if (iotg == NULL)
-		return NOTIFY_BAD;
-
-	if (lnw == NULL)
-		return NOTIFY_BAD;
-
-	switch (action) {
-	case MID_OTG_NOTIFY_CONNECT:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Connect Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.b_conn = 1;
-		else
-			iotg->hsm.a_conn = 1;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_DISCONN:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Disconnect Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.b_conn = 0;
-		else
-			iotg->hsm.a_conn = 0;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_HSUSPEND:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Host Bus suspend Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.a_suspend_req = 1;
-		else
-			iotg->hsm.b_bus_req = 0;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_HRESUME:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Host Bus resume Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.b_bus_resume = 1;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_CSUSPEND:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Client Bus suspend Event\n");
-		if (iotg->otg.default_a == 1) {
-			if (iotg->hsm.b_bus_suspend_vld == 2) {
-				iotg->hsm.b_bus_suspend = 1;
-				iotg->hsm.b_bus_suspend_vld = 0;
-				flag = 1;
-			} else {
-				iotg->hsm.b_bus_suspend_vld++;
-				flag = 0;
-			}
-		} else {
-			if (iotg->hsm.a_bus_suspend == 0) {
-				iotg->hsm.a_bus_suspend = 1;
-				flag = 1;
-			}
-		}
-		break;
-	case MID_OTG_NOTIFY_CRESUME:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Client Bus resume Event\n");
-		if (iotg->otg.default_a == 0)
-			iotg->hsm.a_bus_suspend = 0;
-		flag = 0;
-		break;
-	case MID_OTG_NOTIFY_HOSTADD:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Host Driver Add\n");
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_HOSTREMOVE:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Host Driver remove\n");
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_CLIENTADD:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Client Driver Add\n");
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_CLIENTREMOVE:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Client Driver remove\n");
-		flag = 1;
-		break;
-	default:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity unknown notify message\n");
-		return NOTIFY_DONE;
-	}
-
-	if (flag)
-		langwell_update_transceiver();
-
-	return NOTIFY_OK;
-}
-
-static void langwell_otg_work(struct work_struct *work)
-{
-	struct langwell_otg		*lnw;
-	struct intel_mid_otg_xceiv	*iotg;
-	int				retval;
-	struct pci_dev			*pdev;
-
-	lnw = container_of(work, struct langwell_otg, work);
-	iotg = &lnw->iotg;
-	pdev = to_pci_dev(lnw->dev);
-
-	dev_dbg(lnw->dev, "%s: old state = %s\n", __func__,
-			otg_state_string(iotg->otg.state));
-
-	switch (iotg->otg.state) {
-	case OTG_STATE_UNDEFINED:
-	case OTG_STATE_B_IDLE:
-		if (!iotg->hsm.id) {
-			langwell_otg_del_timer(b_srp_init_tmr);
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.b_sess_vld) {
-			langwell_otg_del_timer(b_srp_init_tmr);
-			del_timer_sync(&lnw->hsm_timer);
-			iotg->hsm.b_sess_end = 0;
-			iotg->hsm.a_bus_suspend = 0;
-			langwell_otg_chrg_vbus(0);
-
-			if (lnw->iotg.start_peripheral) {
-				lnw->iotg.start_peripheral(&lnw->iotg);
-				iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-			} else
-				dev_dbg(lnw->dev, "client driver not loaded\n");
-
-		} else if (iotg->hsm.b_srp_init_tmout) {
-			iotg->hsm.b_srp_init_tmout = 0;
-			dev_warn(lnw->dev, "SRP init timeout\n");
-		} else if (iotg->hsm.b_srp_fail_tmout) {
-			iotg->hsm.b_srp_fail_tmout = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			/* No silence failure */
-			langwell_otg_nsf_msg(6);
-		} else if (iotg->hsm.b_bus_req && iotg->hsm.b_sess_end) {
-			del_timer_sync(&lnw->hsm_timer);
-			/* workaround for b_se0_srp detection */
-			retval = langwell_otg_check_se0_srp(0);
-			if (retval) {
-				iotg->hsm.b_bus_req = 0;
-				dev_dbg(lnw->dev, "LS isn't SE0, try later\n");
-			} else {
-				/* clear the PHCD before start srp */
-				langwell_otg_phy_low_power(0);
-
-				/* Start SRP */
-				langwell_otg_add_timer(b_srp_init_tmr);
-				iotg->otg.start_srp(&iotg->otg);
-				langwell_otg_del_timer(b_srp_init_tmr);
-				langwell_otg_add_ktimer(TB_SRP_FAIL_TMR);
-
-				/* reset PHY low power mode here */
-				langwell_otg_phy_low_power_wait(1);
-			}
-		}
-		break;
-	case OTG_STATE_B_SRP_INIT:
-		if (!iotg->hsm.id) {
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_chrg_vbus(0);
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.b_sess_vld) {
-			langwell_otg_chrg_vbus(0);
-			if (lnw->iotg.start_peripheral) {
-				lnw->iotg.start_peripheral(&lnw->iotg);
-				iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-			} else
-				dev_dbg(lnw->dev, "client driver not loaded\n");
-		}
-		break;
-	case OTG_STATE_B_PERIPHERAL:
-		if (!iotg->hsm.id) {
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.b_sess_vld) {
-			iotg->hsm.b_hnp_enable = 0;
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if (iotg->hsm.b_bus_req && iotg->otg.gadget &&
-					iotg->otg.gadget->b_hnp_enable &&
-					iotg->hsm.a_bus_suspend) {
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			langwell_otg_HAAR(1);
-			iotg->hsm.a_conn = 0;
-
-			if (lnw->iotg.start_host) {
-				lnw->iotg.start_host(&lnw->iotg);
-				iotg->otg.state = OTG_STATE_B_WAIT_ACON;
-			} else
-				dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-
-			iotg->hsm.a_bus_resume = 0;
-			langwell_otg_add_ktimer(TB_ASE0_BRST_TMR);
-		}
-		break;
-
-	case OTG_STATE_B_WAIT_ACON:
-		if (!iotg->hsm.id) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-
-			langwell_otg_HAAR(0);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.b_sess_vld) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->hsm.b_hnp_enable = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			langwell_otg_chrg_vbus(0);
-			langwell_otg_HAAR(0);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if (iotg->hsm.a_conn) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			langwell_otg_HAAR(0);
-			iotg->otg.state = OTG_STATE_B_HOST;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_resume ||
-				iotg->hsm.b_ase0_brst_tmout) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			langwell_otg_HAAR(0);
-			langwell_otg_nsf_msg(7);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			iotg->hsm.a_bus_suspend = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.start_peripheral)
-				lnw->iotg.start_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver not loaded.\n");
-
-			iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-		}
-		break;
-
-	case OTG_STATE_B_HOST:
-		if (!iotg->hsm.id) {
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.b_sess_vld) {
-			iotg->hsm.b_hnp_enable = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			langwell_otg_chrg_vbus(0);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if ((!iotg->hsm.b_bus_req) ||
-				(!iotg->hsm.a_conn)) {
-			iotg->hsm.b_bus_req = 0;
-			langwell_otg_loc_sof(0);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			iotg->hsm.a_bus_suspend = 0;
-
-			if (lnw->iotg.start_peripheral)
-				lnw->iotg.start_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-						"client driver not loaded.\n");
-
-			iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-		}
-		break;
-
-	case OTG_STATE_A_IDLE:
-		iotg->otg.default_a = 1;
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-			iotg->hsm.vbus_srp_up = 0;
-
-			langwell_otg_chrg_vbus(0);
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_bus_drop &&
-			(iotg->hsm.a_srp_det || iotg->hsm.a_bus_req)) {
-			langwell_otg_phy_low_power(0);
-
-			/* Turn on VBus */
-			iotg->otg.set_vbus(&iotg->otg, true);
-
-			iotg->hsm.vbus_srp_up = 0;
-			iotg->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_bus_drop && iotg->hsm.a_sess_vld) {
-			iotg->hsm.vbus_srp_up = 1;
-		} else if (!iotg->hsm.a_sess_vld && iotg->hsm.vbus_srp_up) {
-			msleep(10);
-			langwell_otg_phy_low_power(0);
-
-			/* Turn on VBus */
-			iotg->otg.set_vbus(&iotg->otg, true);
-			iotg->hsm.a_srp_det = 1;
-			iotg->hsm.vbus_srp_up = 0;
-			iotg->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_sess_vld &&
-				!iotg->hsm.vbus_srp_up) {
-			langwell_otg_phy_low_power(1);
-		}
-		break;
-	case OTG_STATE_A_WAIT_VRISE:
-		if (iotg->hsm.id) {
-			langwell_otg_del_timer(a_wait_vrise_tmr);
-			iotg->hsm.b_bus_req = 0;
-			iotg->otg.default_a = 0;
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if (iotg->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(a_wait_vrise_tmr);
-			iotg->hsm.b_conn = 0;
-			if (lnw->iotg.start_host)
-				lnw->iotg.start_host(&lnw->iotg);
-			else {
-				dev_dbg(lnw->dev, "host driver not loaded.\n");
-				break;
-			}
-
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		} else if (iotg->hsm.a_wait_vrise_tmout) {
-			iotg->hsm.b_conn = 0;
-			if (iotg->hsm.a_vbus_vld) {
-				if (lnw->iotg.start_host)
-					lnw->iotg.start_host(&lnw->iotg);
-				else {
-					dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-					break;
-				}
-				langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-				iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-			} else {
-
-				/* Turn off VBus */
-				iotg->otg.set_vbus(&iotg->otg, false);
-				langwell_otg_phy_low_power_wait(1);
-				iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-			}
-		}
-		break;
-	case OTG_STATE_A_WAIT_BCON:
-		if (iotg->hsm.id) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_vbus_vld) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (iotg->hsm.a_bus_drop ||
-				(iotg->hsm.a_wait_bcon_tmout &&
-				!iotg->hsm.a_bus_req)) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (iotg->hsm.b_conn) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->hsm.a_suspend_req = 0;
-			iotg->otg.state = OTG_STATE_A_HOST;
-			if (iotg->hsm.a_srp_det && iotg->otg.host &&
-					!iotg->otg.host->b_hnp_enable) {
-				/* SRP capable peripheral-only device */
-				iotg->hsm.a_bus_req = 1;
-				iotg->hsm.a_srp_det = 0;
-			} else if (!iotg->hsm.a_bus_req && iotg->otg.host &&
-					iotg->otg.host->b_hnp_enable) {
-				/* It is not safe enough to do a fast
-				 * transition from A_WAIT_BCON to
-				 * A_SUSPEND */
-				msleep(10000);
-				if (iotg->hsm.a_bus_req)
-					break;
-
-				if (request_irq(pdev->irq,
-					otg_dummy_irq, IRQF_SHARED,
-					driver_name, iotg->base) != 0) {
-					dev_dbg(lnw->dev,
-						"request interrupt %d fail\n",
-						pdev->irq);
-				}
-
-				langwell_otg_HABA(1);
-				iotg->hsm.b_bus_resume = 0;
-				iotg->hsm.a_aidl_bdis_tmout = 0;
-
-				langwell_otg_loc_sof(0);
-				/* clear PHCD to enable HW timer */
-				langwell_otg_phy_low_power(0);
-				langwell_otg_add_timer(a_aidl_bdis_tmr);
-				iotg->otg.state = OTG_STATE_A_SUSPEND;
-			} else if (!iotg->hsm.a_bus_req && iotg->otg.host &&
-				!iotg->otg.host->b_hnp_enable) {
-				if (lnw->iotg.stop_host)
-					lnw->iotg.stop_host(&lnw->iotg);
-				else
-					dev_dbg(lnw->dev,
-						"host driver removed.\n");
-
-				/* Turn off VBus */
-				iotg->otg.set_vbus(&iotg->otg, false);
-				iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-			}
-		}
-		break;
-	case OTG_STATE_A_HOST:
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_drop ||
-				(iotg->otg.host &&
-				!iotg->otg.host->b_hnp_enable &&
-					!iotg->hsm.a_bus_req)) {
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (!iotg->hsm.a_vbus_vld) {
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (iotg->otg.host &&
-				iotg->otg.host->b_hnp_enable &&
-				!iotg->hsm.a_bus_req) {
-			/* Set HABA to enable hardware assistance to signal
-			 *  A-connect after receiver B-disconnect. Hardware
-			 *  will then set client mode and enable URE, SLE and
-			 *  PCE after the assistance. otg_dummy_irq is used to
-			 *  clean these ints when client driver is not resumed.
-			 */
-			if (request_irq(pdev->irq, otg_dummy_irq, IRQF_SHARED,
-					driver_name, iotg->base) != 0) {
-				dev_dbg(lnw->dev,
-					"request interrupt %d failed\n",
-						pdev->irq);
-			}
-
-			/* set HABA */
-			langwell_otg_HABA(1);
-			iotg->hsm.b_bus_resume = 0;
-			iotg->hsm.a_aidl_bdis_tmout = 0;
-			langwell_otg_loc_sof(0);
-			/* clear PHCD to enable HW timer */
-			langwell_otg_phy_low_power(0);
-			langwell_otg_add_timer(a_aidl_bdis_tmr);
-			iotg->otg.state = OTG_STATE_A_SUSPEND;
-		} else if (!iotg->hsm.b_conn || !iotg->hsm.a_bus_req) {
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		}
-		break;
-	case OTG_STATE_A_SUSPEND:
-		if (iotg->hsm.id) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_req ||
-				iotg->hsm.b_bus_resume) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			iotg->hsm.a_suspend_req = 0;
-			langwell_otg_loc_sof(1);
-			iotg->otg.state = OTG_STATE_A_HOST;
-		} else if (iotg->hsm.a_aidl_bdis_tmout ||
-				iotg->hsm.a_bus_drop) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (!iotg->hsm.b_conn && iotg->otg.host &&
-				iotg->otg.host->b_hnp_enable) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			iotg->hsm.b_bus_suspend = 0;
-			iotg->hsm.b_bus_suspend_vld = 0;
-
-			/* msleep(200); */
-			if (lnw->iotg.start_peripheral)
-				lnw->iotg.start_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver not loaded.\n");
-
-			langwell_otg_add_ktimer(TB_BUS_SUSPEND_TMR);
-			iotg->otg.state = OTG_STATE_A_PERIPHERAL;
-			break;
-		} else if (!iotg->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		}
-		break;
-	case OTG_STATE_A_PERIPHERAL:
-		if (iotg->hsm.id) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_vbus_vld) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (iotg->hsm.a_bus_drop) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (iotg->hsm.b_bus_suspend) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			if (lnw->iotg.start_host)
-				lnw->iotg.start_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		} else if (iotg->hsm.b_bus_suspend_tmout) {
-			u32	val;
-			val = readl(lnw->iotg.base + CI_PORTSC1);
-			if (!(val & PORTSC_SUSP))
-				break;
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			if (lnw->iotg.start_host)
-				lnw->iotg.start_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		}
-		break;
-	case OTG_STATE_A_VBUS_ERR:
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			iotg->hsm.a_clr_err = 0;
-			iotg->hsm.a_srp_det = 0;
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_clr_err) {
-			iotg->hsm.a_clr_err = 0;
-			iotg->hsm.a_srp_det = 0;
-			reset_otg();
-			init_hsm();
-			if (iotg->otg.state == OTG_STATE_A_IDLE)
-				langwell_update_transceiver();
-		} else {
-			/* FW will clear PHCD bit when any VBus
-			 * event detected. Reset PHCD to 1 again */
-			langwell_otg_phy_low_power(1);
-		}
-		break;
-	case OTG_STATE_A_WAIT_VFALL:
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_req) {
-
-			/* Turn on VBus */
-			iotg->otg.set_vbus(&iotg->otg, true);
-			iotg->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
-		} else if (!iotg->hsm.a_sess_vld) {
-			iotg->hsm.a_srp_det = 0;
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-		}
-		break;
-	default:
-		;
-	}
-
-	dev_dbg(lnw->dev, "%s: new state = %s\n", __func__,
-			otg_state_string(iotg->otg.state));
-}
-
-static ssize_t
-show_registers(struct device *_dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size,
-		"\n"
-		"USBCMD = 0x%08x\n"
-		"USBSTS = 0x%08x\n"
-		"USBINTR = 0x%08x\n"
-		"ASYNCLISTADDR = 0x%08x\n"
-		"PORTSC1 = 0x%08x\n"
-		"HOSTPC1 = 0x%08x\n"
-		"OTGSC = 0x%08x\n"
-		"USBMODE = 0x%08x\n",
-		readl(lnw->iotg.base + 0x30),
-		readl(lnw->iotg.base + 0x34),
-		readl(lnw->iotg.base + 0x38),
-		readl(lnw->iotg.base + 0x48),
-		readl(lnw->iotg.base + 0x74),
-		readl(lnw->iotg.base + 0xb4),
-		readl(lnw->iotg.base + 0xf4),
-		readl(lnw->iotg.base + 0xf8)
-	     );
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-static DEVICE_ATTR(registers, S_IRUGO, show_registers, NULL);
-
-static ssize_t
-show_hsm(struct device *_dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	char				*next;
-	unsigned			size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	if (iotg->otg.host)
-		iotg->hsm.a_set_b_hnp_en = iotg->otg.host->b_hnp_enable;
-
-	if (iotg->otg.gadget)
-		iotg->hsm.b_hnp_enable = iotg->otg.gadget->b_hnp_enable;
-
-	t = scnprintf(next, size,
-		"\n"
-		"current state = %s\n"
-		"a_bus_resume = \t%d\n"
-		"a_bus_suspend = \t%d\n"
-		"a_conn = \t%d\n"
-		"a_sess_vld = \t%d\n"
-		"a_srp_det = \t%d\n"
-		"a_vbus_vld = \t%d\n"
-		"b_bus_resume = \t%d\n"
-		"b_bus_suspend = \t%d\n"
-		"b_conn = \t%d\n"
-		"b_se0_srp = \t%d\n"
-		"b_sess_end = \t%d\n"
-		"b_sess_vld = \t%d\n"
-		"id = \t%d\n"
-		"a_set_b_hnp_en = \t%d\n"
-		"b_srp_done = \t%d\n"
-		"b_hnp_enable = \t%d\n"
-		"a_wait_vrise_tmout = \t%d\n"
-		"a_wait_bcon_tmout = \t%d\n"
-		"a_aidl_bdis_tmout = \t%d\n"
-		"b_ase0_brst_tmout = \t%d\n"
-		"a_bus_drop = \t%d\n"
-		"a_bus_req = \t%d\n"
-		"a_clr_err = \t%d\n"
-		"a_suspend_req = \t%d\n"
-		"b_bus_req = \t%d\n"
-		"b_bus_suspend_tmout = \t%d\n"
-		"b_bus_suspend_vld = \t%d\n",
-		otg_state_string(iotg->otg.state),
-		iotg->hsm.a_bus_resume,
-		iotg->hsm.a_bus_suspend,
-		iotg->hsm.a_conn,
-		iotg->hsm.a_sess_vld,
-		iotg->hsm.a_srp_det,
-		iotg->hsm.a_vbus_vld,
-		iotg->hsm.b_bus_resume,
-		iotg->hsm.b_bus_suspend,
-		iotg->hsm.b_conn,
-		iotg->hsm.b_se0_srp,
-		iotg->hsm.b_sess_end,
-		iotg->hsm.b_sess_vld,
-		iotg->hsm.id,
-		iotg->hsm.a_set_b_hnp_en,
-		iotg->hsm.b_srp_done,
-		iotg->hsm.b_hnp_enable,
-		iotg->hsm.a_wait_vrise_tmout,
-		iotg->hsm.a_wait_bcon_tmout,
-		iotg->hsm.a_aidl_bdis_tmout,
-		iotg->hsm.b_ase0_brst_tmout,
-		iotg->hsm.a_bus_drop,
-		iotg->hsm.a_bus_req,
-		iotg->hsm.a_clr_err,
-		iotg->hsm.a_suspend_req,
-		iotg->hsm.b_bus_req,
-		iotg->hsm.b_bus_suspend_tmout,
-		iotg->hsm.b_bus_suspend_vld
-		);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-static DEVICE_ATTR(hsm, S_IRUGO, show_hsm, NULL);
-
-static ssize_t
-get_a_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", lnw->iotg.hsm.a_bus_req);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_a_bus_req(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (!iotg->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		iotg->hsm.a_bus_req = 0;
-		dev_dbg(lnw->dev, "User request: a_bus_req = 0\n");
-	} else if (buf[0] == '1') {
-		/* If a_bus_drop is TRUE, a_bus_req can't be set */
-		if (iotg->hsm.a_bus_drop)
-			return -1;
-		iotg->hsm.a_bus_req = 1;
-		dev_dbg(lnw->dev, "User request: a_bus_req = 1\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUSR, get_a_bus_req, set_a_bus_req);
-
-static ssize_t
-get_a_bus_drop(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", lnw->iotg.hsm.a_bus_drop);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_a_bus_drop(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (!iotg->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		iotg->hsm.a_bus_drop = 0;
-		dev_dbg(lnw->dev, "User request: a_bus_drop = 0\n");
-	} else if (buf[0] == '1') {
-		iotg->hsm.a_bus_drop = 1;
-		iotg->hsm.a_bus_req = 0;
-		dev_dbg(lnw->dev, "User request: a_bus_drop = 1\n");
-		dev_dbg(lnw->dev, "User request: and a_bus_req = 0\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUSR, get_a_bus_drop, set_a_bus_drop);
-
-static ssize_t
-get_b_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", lnw->iotg.hsm.b_bus_req);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_b_bus_req(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (iotg->otg.default_a)
-		return -1;
-
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		iotg->hsm.b_bus_req = 0;
-		dev_dbg(lnw->dev, "User request: b_bus_req = 0\n");
-	} else if (buf[0] == '1') {
-		iotg->hsm.b_bus_req = 1;
-		dev_dbg(lnw->dev, "User request: b_bus_req = 1\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUSR, get_b_bus_req, set_b_bus_req);
-
-static ssize_t
-set_a_clr_err(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (!iotg->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '1') {
-		iotg->hsm.a_clr_err = 1;
-		dev_dbg(lnw->dev, "User request: a_clr_err = 1\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_clr_err, S_IWUSR, NULL, set_a_clr_err);
-
-static struct attribute *inputs_attrs[] = {
-	&dev_attr_a_bus_req.attr,
-	&dev_attr_a_bus_drop.attr,
-	&dev_attr_b_bus_req.attr,
-	&dev_attr_a_clr_err.attr,
-	NULL,
-};
-
-static struct attribute_group debug_dev_attr_group = {
-	.name = "inputs",
-	.attrs = inputs_attrs,
-};
-
-static int langwell_otg_probe(struct pci_dev *pdev,
-		const struct pci_device_id *id)
-{
-	unsigned long		resource, len;
-	void __iomem		*base = NULL;
-	int			retval;
-	u32			val32;
-	struct langwell_otg	*lnw;
-	char			qname[] = "langwell_otg_queue";
-
-	retval = 0;
-	dev_dbg(&pdev->dev, "\notg controller is detected.\n");
-	if (pci_enable_device(pdev) < 0) {
-		retval = -ENODEV;
-		goto done;
-	}
-
-	lnw = kzalloc(sizeof *lnw, GFP_KERNEL);
-	if (lnw == NULL) {
-		retval = -ENOMEM;
-		goto done;
-	}
-	the_transceiver = lnw;
-
-	/* control register: BAR 0 */
-	resource = pci_resource_start(pdev, 0);
-	len = pci_resource_len(pdev, 0);
-	if (!request_mem_region(resource, len, driver_name)) {
-		retval = -EBUSY;
-		goto err;
-	}
-	lnw->region = 1;
-
-	base = ioremap_nocache(resource, len);
-	if (base == NULL) {
-		retval = -EFAULT;
-		goto err;
-	}
-	lnw->iotg.base = base;
-
-	if (!request_mem_region(USBCFG_ADDR, USBCFG_LEN, driver_name)) {
-		retval = -EBUSY;
-		goto err;
-	}
-	lnw->cfg_region = 1;
-
-	/* For the SCCB.USBCFG register */
-	base = ioremap_nocache(USBCFG_ADDR, USBCFG_LEN);
-	if (base == NULL) {
-		retval = -EFAULT;
-		goto err;
-	}
-	lnw->usbcfg = base;
-
-	if (!pdev->irq) {
-		dev_dbg(&pdev->dev, "No IRQ.\n");
-		retval = -ENODEV;
-		goto err;
-	}
-
-	lnw->qwork = create_singlethread_workqueue(qname);
-	if (!lnw->qwork) {
-		dev_dbg(&pdev->dev, "cannot create workqueue %s\n", qname);
-		retval = -ENOMEM;
-		goto err;
-	}
-	INIT_WORK(&lnw->work, langwell_otg_work);
-
-	/* OTG common part */
-	lnw->dev = &pdev->dev;
-	lnw->iotg.otg.dev = lnw->dev;
-	lnw->iotg.otg.label = driver_name;
-	lnw->iotg.otg.set_host = langwell_otg_set_host;
-	lnw->iotg.otg.set_peripheral = langwell_otg_set_peripheral;
-	lnw->iotg.otg.set_power = langwell_otg_set_power;
-	lnw->iotg.otg.set_vbus = langwell_otg_set_vbus;
-	lnw->iotg.otg.start_srp = langwell_otg_start_srp;
-	lnw->iotg.otg.state = OTG_STATE_UNDEFINED;
-
-	if (otg_set_transceiver(&lnw->iotg.otg)) {
-		dev_dbg(lnw->dev, "can't set transceiver\n");
-		retval = -EBUSY;
-		goto err;
-	}
-
-	reset_otg();
-	init_hsm();
-
-	spin_lock_init(&lnw->lock);
-	spin_lock_init(&lnw->wq_lock);
-	INIT_LIST_HEAD(&active_timers);
-	retval = langwell_otg_init_timers(&lnw->iotg.hsm);
-	if (retval) {
-		dev_dbg(&pdev->dev, "Failed to init timers\n");
-		goto err;
-	}
-
-	init_timer(&lnw->hsm_timer);
-	ATOMIC_INIT_NOTIFIER_HEAD(&lnw->iotg.iotg_notifier);
-
-	lnw->iotg_notifier.notifier_call = langwell_otg_iotg_notify;
-
-	retval = intel_mid_otg_register_notifier(&lnw->iotg,
-						&lnw->iotg_notifier);
-	if (retval) {
-		dev_dbg(lnw->dev, "Failed to register notifier\n");
-		goto err;
-	}
-
-	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
-				driver_name, lnw) != 0) {
-		dev_dbg(lnw->dev, "request interrupt %d failed\n", pdev->irq);
-		retval = -EBUSY;
-		goto err;
-	}
-
-	/* enable OTGSC int */
-	val32 = OTGSC_DPIE | OTGSC_BSEIE | OTGSC_BSVIE |
-		OTGSC_ASVIE | OTGSC_AVVIE | OTGSC_IDIE | OTGSC_IDPU;
-	writel(val32, lnw->iotg.base + CI_OTGSC);
-
-	retval = device_create_file(&pdev->dev, &dev_attr_registers);
-	if (retval < 0) {
-		dev_dbg(lnw->dev,
-			"Can't register sysfs attribute: %d\n", retval);
-		goto err;
-	}
-
-	retval = device_create_file(&pdev->dev, &dev_attr_hsm);
-	if (retval < 0) {
-		dev_dbg(lnw->dev, "Can't hsm sysfs attribute: %d\n", retval);
-		goto err;
-	}
-
-	retval = sysfs_create_group(&pdev->dev.kobj, &debug_dev_attr_group);
-	if (retval < 0) {
-		dev_dbg(lnw->dev,
-			"Can't register sysfs attr group: %d\n", retval);
-		goto err;
-	}
-
-	if (lnw->iotg.otg.state == OTG_STATE_A_IDLE)
-		langwell_update_transceiver();
-
-	return 0;
-
-err:
-	if (the_transceiver)
-		langwell_otg_remove(pdev);
-done:
-	return retval;
-}
-
-static void langwell_otg_remove(struct pci_dev *pdev)
-{
-	struct langwell_otg *lnw = the_transceiver;
-
-	if (lnw->qwork) {
-		flush_workqueue(lnw->qwork);
-		destroy_workqueue(lnw->qwork);
-	}
-	intel_mid_otg_unregister_notifier(&lnw->iotg, &lnw->iotg_notifier);
-	langwell_otg_free_timers();
-
-	/* disable OTGSC interrupt as OTGSC doesn't change in reset */
-	writel(0, lnw->iotg.base + CI_OTGSC);
-
-	if (pdev->irq)
-		free_irq(pdev->irq, lnw);
-	if (lnw->usbcfg)
-		iounmap(lnw->usbcfg);
-	if (lnw->cfg_region)
-		release_mem_region(USBCFG_ADDR, USBCFG_LEN);
-	if (lnw->iotg.base)
-		iounmap(lnw->iotg.base);
-	if (lnw->region)
-		release_mem_region(pci_resource_start(pdev, 0),
-				pci_resource_len(pdev, 0));
-
-	otg_set_transceiver(NULL);
-	pci_disable_device(pdev);
-	sysfs_remove_group(&pdev->dev.kobj, &debug_dev_attr_group);
-	device_remove_file(&pdev->dev, &dev_attr_hsm);
-	device_remove_file(&pdev->dev, &dev_attr_registers);
-	kfree(lnw);
-	lnw = NULL;
-}
-
-static void transceiver_suspend(struct pci_dev *pdev)
-{
-	pci_save_state(pdev);
-	pci_set_power_state(pdev, PCI_D3hot);
-	langwell_otg_phy_low_power(1);
-}
-
-static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	int				ret = 0;
-
-	/* Disbale OTG interrupts */
-	langwell_otg_intr(0);
-
-	if (pdev->irq)
-		free_irq(pdev->irq, lnw);
-
-	/* Prevent more otg_work */
-	flush_workqueue(lnw->qwork);
-	destroy_workqueue(lnw->qwork);
-	lnw->qwork = NULL;
-
-	/* start actions */
-	switch (iotg->otg.state) {
-	case OTG_STATE_A_WAIT_VFALL:
-		iotg->otg.state = OTG_STATE_A_IDLE;
-	case OTG_STATE_A_IDLE:
-	case OTG_STATE_B_IDLE:
-	case OTG_STATE_A_VBUS_ERR:
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_WAIT_VRISE:
-		langwell_otg_del_timer(a_wait_vrise_tmr);
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_WAIT_BCON:
-		del_timer_sync(&lnw->hsm_timer);
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_HOST:
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_SUSPEND:
-		langwell_otg_del_timer(a_aidl_bdis_tmr);
-		langwell_otg_HABA(0);
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(lnw->dev, "host driver has been removed.\n");
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_PERIPHERAL:
-		del_timer_sync(&lnw->hsm_timer);
-
-		if (lnw->iotg.stop_peripheral)
-			lnw->iotg.stop_peripheral(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev,
-				"client driver has been removed.\n");
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_B_HOST:
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-		iotg->hsm.b_bus_req = 0;
-		iotg->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_B_PERIPHERAL:
-		if (lnw->iotg.stop_peripheral)
-			lnw->iotg.stop_peripheral(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev,
-				"client driver has been removed.\n");
-		iotg->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_B_WAIT_ACON:
-		/* delete hsm timer for b_ase0_brst_tmr */
-		del_timer_sync(&lnw->hsm_timer);
-
-		langwell_otg_HAAR(0);
-
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-		iotg->hsm.b_bus_req = 0;
-		iotg->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	default:
-		dev_dbg(lnw->dev, "error state before suspend\n");
-		break;
-	}
-
-	return ret;
-}
-
-static void transceiver_resume(struct pci_dev *pdev)
-{
-	pci_restore_state(pdev);
-	pci_set_power_state(pdev, PCI_D0);
-}
-
-static int langwell_otg_resume(struct pci_dev *pdev)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	int			ret = 0;
-
-	transceiver_resume(pdev);
-
-	lnw->qwork = create_singlethread_workqueue("langwell_otg_queue");
-	if (!lnw->qwork) {
-		dev_dbg(&pdev->dev, "cannot create langwell otg workqueuen");
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
-				driver_name, lnw) != 0) {
-		dev_dbg(&pdev->dev, "request interrupt %d failed\n", pdev->irq);
-		ret = -EBUSY;
-		goto error;
-	}
-
-	/* enable OTG interrupts */
-	langwell_otg_intr(1);
-
-	update_hsm();
-
-	langwell_update_transceiver();
-
-	return ret;
-error:
-	langwell_otg_intr(0);
-	transceiver_suspend(pdev);
-	return ret;
-}
-
-static int __init langwell_otg_init(void)
-{
-	return pci_register_driver(&otg_pci_driver);
-}
-module_init(langwell_otg_init);
-
-static void __exit langwell_otg_cleanup(void)
-{
-	pci_unregister_driver(&otg_pci_driver);
-}
-module_exit(langwell_otg_cleanup);
diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h
deleted file mode 100644
index 51f17b16d312..000000000000
--- a/include/linux/usb/langwell_otg.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008 - 2010, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#ifndef __LANGWELL_OTG_H
-#define __LANGWELL_OTG_H
-
-#include <linux/usb/intel_mid_otg.h>
-
-#define CI_USBCMD		0x30
-#	define USBCMD_RST		BIT(1)
-#	define USBCMD_RS		BIT(0)
-#define CI_USBSTS		0x34
-#	define USBSTS_SLI		BIT(8)
-#	define USBSTS_URI		BIT(6)
-#	define USBSTS_PCI		BIT(2)
-#define CI_PORTSC1		0x74
-#	define PORTSC_PP		BIT(12)
-#	define PORTSC_LS		(BIT(11) | BIT(10))
-#	define PORTSC_SUSP		BIT(7)
-#	define PORTSC_CCS		BIT(0)
-#define CI_HOSTPC1		0xb4
-#	define HOSTPC1_PHCD		BIT(22)
-#define CI_OTGSC		0xf4
-#	define OTGSC_DPIE		BIT(30)
-#	define OTGSC_1MSE		BIT(29)
-#	define OTGSC_BSEIE		BIT(28)
-#	define OTGSC_BSVIE		BIT(27)
-#	define OTGSC_ASVIE		BIT(26)
-#	define OTGSC_AVVIE		BIT(25)
-#	define OTGSC_IDIE		BIT(24)
-#	define OTGSC_DPIS		BIT(22)
-#	define OTGSC_1MSS		BIT(21)
-#	define OTGSC_BSEIS		BIT(20)
-#	define OTGSC_BSVIS		BIT(19)
-#	define OTGSC_ASVIS		BIT(18)
-#	define OTGSC_AVVIS		BIT(17)
-#	define OTGSC_IDIS		BIT(16)
-#	define OTGSC_DPS		BIT(14)
-#	define OTGSC_1MST		BIT(13)
-#	define OTGSC_BSE		BIT(12)
-#	define OTGSC_BSV		BIT(11)
-#	define OTGSC_ASV		BIT(10)
-#	define OTGSC_AVV		BIT(9)
-#	define OTGSC_ID			BIT(8)
-#	define OTGSC_HABA		BIT(7)
-#	define OTGSC_HADP		BIT(6)
-#	define OTGSC_IDPU		BIT(5)
-#	define OTGSC_DP			BIT(4)
-#	define OTGSC_OT			BIT(3)
-#	define OTGSC_HAAR		BIT(2)
-#	define OTGSC_VC			BIT(1)
-#	define OTGSC_VD			BIT(0)
-#	define OTGSC_INTEN_MASK		(0x7f << 24)
-#	define OTGSC_INT_MASK		(0x5f << 24)
-#	define OTGSC_INTSTS_MASK	(0x7f << 16)
-#define CI_USBMODE		0xf8
-#	define USBMODE_CM		(BIT(1) | BIT(0))
-#	define USBMODE_IDLE		0
-#	define USBMODE_DEVICE		0x2
-#	define USBMODE_HOST		0x3
-#define USBCFG_ADDR			0xff10801c
-#define USBCFG_LEN			4
-#	define USBCFG_VBUSVAL		BIT(14)
-#	define USBCFG_AVALID		BIT(13)
-#	define USBCFG_BVALID		BIT(12)
-#	define USBCFG_SESEND		BIT(11)
-
-#define INTR_DUMMY_MASK (USBSTS_SLI | USBSTS_URI | USBSTS_PCI)
-
-enum langwell_otg_timer_type {
-	TA_WAIT_VRISE_TMR,
-	TA_WAIT_BCON_TMR,
-	TA_AIDL_BDIS_TMR,
-	TB_ASE0_BRST_TMR,
-	TB_SE0_SRP_TMR,
-	TB_SRP_INIT_TMR,
-	TB_SRP_FAIL_TMR,
-	TB_BUS_SUSPEND_TMR
-};
-
-#define TA_WAIT_VRISE	100
-#define TA_WAIT_BCON	30000
-#define TA_AIDL_BDIS	15000
-#define TB_ASE0_BRST	5000
-#define TB_SE0_SRP	2
-#define TB_SRP_INIT	100
-#define TB_SRP_FAIL	5500
-#define TB_BUS_SUSPEND	500
-
-struct langwell_otg_timer {
-	unsigned long expires;	/* Number of count increase to timeout */
-	unsigned long count;	/* Tick counter */
-	void (*function)(unsigned long);	/* Timeout function */
-	unsigned long data;	/* Data passed to function */
-	struct list_head list;
-};
-
-struct langwell_otg {
-	struct intel_mid_otg_xceiv	iotg;
-	struct device			*dev;
-
-	void __iomem			*usbcfg;	/* SCCBUSB config Reg */
-
-	unsigned			region;
-	unsigned			cfg_region;
-
-	struct work_struct		work;
-	struct workqueue_struct		*qwork;
-	struct timer_list		hsm_timer;
-
-	spinlock_t			lock;
-	spinlock_t			wq_lock;
-
-	struct notifier_block		iotg_notifier;
-};
-
-static inline
-struct langwell_otg *mid_xceiv_to_lnw(struct intel_mid_otg_xceiv *iotg)
-{
-	return container_of(iotg, struct langwell_otg, iotg);
-}
-
-#endif /* __LANGWELL_OTG_H__ */
-- 
cgit v1.2.3


From 1a5e29fc2b90daf71a60329c29a1886fd126169a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:51 -0800
Subject: kernel-doc: fix new warnings in device.h

Fix new kernel-doc warnings:

Warning(include/linux/device.h:299): No description found for parameter 'name'
Warning(include/linux/device.h:299): No description found for parameter 'subsys'
Warning(include/linux/device.h:299): No description found for parameter 'node'
Warning(include/linux/device.h:299): No description found for parameter 'add_dev'
Warning(include/linux/device.h:299): No description found for parameter 'remove_dev'
Warning(include/linux/device.h:685): No description found for parameter 'id'
Warning(include/linux/device.h:1009): No description found for parameter '__driver'
Warning(include/linux/device.h:1009): No description found for parameter '__register'
Warning(include/linux/device.h:1009): No description found for parameter '__unregister'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 5b3adb8f9588..b63fb393aa58 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -279,11 +279,11 @@ struct device *driver_find_device(struct device_driver *drv,
 
 /**
  * struct subsys_interface - interfaces to device functions
- * @name        name of the device function
- * @subsystem   subsytem of the devices to attach to
- * @node        the list of functions registered at the subsystem
- * @add         device hookup to device function handler
- * @remove      device hookup to device function handler
+ * @name:       name of the device function
+ * @subsys:     subsytem of the devices to attach to
+ * @node:       the list of functions registered at the subsystem
+ * @add_dev:    device hookup to device function handler
+ * @remove_dev: device hookup to device function handler
  *
  * Simple interfaces attached to a subsystem. Multiple interfaces can
  * attach to a subsystem and its devices. Unlike drivers, they do not
@@ -612,6 +612,7 @@ struct device_dma_parameters {
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @devt:	For creating the sysfs "dev".
+ * @id:		device instance
  * @devres_lock: Spinlock to protect the resource of the device.
  * @devres_head: The resources list of the device.
  * @knode_class: The node used to add the device to the class list.
@@ -1003,6 +1004,10 @@ extern long sysfs_deprecated;
  * Each module may only use this macro once, and calling it replaces
  * module_init() and module_exit().
  *
+ * @__driver: driver name
+ * @__register: register function for this driver type
+ * @__unregister: unregister function for this driver type
+ *
  * Use this macro to construct bus specific macros for registering
  * drivers, and do not use it on its own.
  */
-- 
cgit v1.2.3


From 0fcd97789028e8ec286a4248c20a71eae239ba61 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:56 -0800
Subject: kernel-doc: fix new warning in usb.h

Fix new kernel-doc warning:

Warning(include/linux/usb.h:1251): No description found for parameter 'num_mapped_sgs'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 27a4e16d2bf1..69d845739bc2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1073,6 +1073,7 @@ typedef void (*usb_complete_t)(struct urb *);
  *	which the host controller driver should use in preference to the
  *	transfer_buffer.
  * @sg: scatter gather buffer list
+ * @num_mapped_sgs: (internal) number of mapped sg entries
  * @num_sgs: number of entries in the sg list
  * @transfer_buffer_length: How big is transfer_buffer.  The transfer may
  *	be broken up into chunks according to the current maximum packet
-- 
cgit v1.2.3


From b82b9183d4f18f9b8c4bb31f223eb6c79b734eb0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 24 Jan 2012 05:16:00 +0000
Subject: team: send only changed options/ports via netlink

This patch changes event message behaviour to send only updated records
instead of whole list. This fixes bug on which userspace receives non-actual
data in case multiple events occur in row.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 136 ++++++++++++++++++++++++++++++++----------------
 include/linux/if_team.h |  10 ++++
 2 files changed, 100 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index ed2a862b835d..6b678f38e5ce 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -92,9 +92,9 @@ struct team_option *__team_find_option(struct team *team, const char *opt_name)
 	return NULL;
 }
 
-int team_options_register(struct team *team,
-			  const struct team_option *option,
-			  size_t option_count)
+int __team_options_register(struct team *team,
+			    const struct team_option *option,
+			    size_t option_count)
 {
 	int i;
 	struct team_option **dst_opts;
@@ -116,8 +116,11 @@ int team_options_register(struct team *team,
 		}
 	}
 
-	for (i = 0; i < option_count; i++)
+	for (i = 0; i < option_count; i++) {
+		dst_opts[i]->changed = true;
+		dst_opts[i]->removed = false;
 		list_add_tail(&dst_opts[i]->list, &team->option_list);
+	}
 
 	kfree(dst_opts);
 	return 0;
@@ -130,10 +133,22 @@ rollback:
 	return err;
 }
 
-EXPORT_SYMBOL(team_options_register);
+static void __team_options_mark_removed(struct team *team,
+					const struct team_option *option,
+					size_t option_count)
+{
+	int i;
+
+	for (i = 0; i < option_count; i++, option++) {
+		struct team_option *del_opt;
 
-static void __team_options_change_check(struct team *team,
-					struct team_option *changed_option);
+		del_opt = __team_find_option(team, option->name);
+		if (del_opt) {
+			del_opt->changed = true;
+			del_opt->removed = true;
+		}
+	}
+}
 
 static void __team_options_unregister(struct team *team,
 				      const struct team_option *option,
@@ -152,12 +167,29 @@ static void __team_options_unregister(struct team *team,
 	}
 }
 
+static void __team_options_change_check(struct team *team);
+
+int team_options_register(struct team *team,
+			  const struct team_option *option,
+			  size_t option_count)
+{
+	int err;
+
+	err = __team_options_register(team, option, option_count);
+	if (err)
+		return err;
+	__team_options_change_check(team);
+	return 0;
+}
+EXPORT_SYMBOL(team_options_register);
+
 void team_options_unregister(struct team *team,
 			     const struct team_option *option,
 			     size_t option_count)
 {
+	__team_options_mark_removed(team, option, option_count);
+	__team_options_change_check(team);
 	__team_options_unregister(team, option, option_count);
-	__team_options_change_check(team, NULL);
 }
 EXPORT_SYMBOL(team_options_unregister);
 
@@ -176,7 +208,8 @@ static int team_option_set(struct team *team, struct team_option *option,
 	if (err)
 		return err;
 
-	__team_options_change_check(team, option);
+	option->changed = true;
+	__team_options_change_check(team);
 	return err;
 }
 
@@ -653,6 +686,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
 		return -ENOENT;
 	}
 
+	port->removed = true;
 	__team_port_change_check(port, false);
 	team_port_list_del_port(team, port);
 	team_adjust_ops(team);
@@ -1200,10 +1234,9 @@ err_fill:
 	return err;
 }
 
-static int team_nl_fill_options_get_changed(struct sk_buff *skb,
-					    u32 pid, u32 seq, int flags,
-					    struct team *team,
-					    struct team_option *changed_option)
+static int team_nl_fill_options_get(struct sk_buff *skb,
+				    u32 pid, u32 seq, int flags,
+				    struct team *team, bool fillall)
 {
 	struct nlattr *option_list;
 	void *hdr;
@@ -1223,12 +1256,19 @@ static int team_nl_fill_options_get_changed(struct sk_buff *skb,
 		struct nlattr *option_item;
 		long arg;
 
+		/* Include only changed options if fill all mode is not on */
+		if (!fillall && !option->changed)
+			continue;
 		option_item = nla_nest_start(skb, TEAM_ATTR_ITEM_OPTION);
 		if (!option_item)
 			goto nla_put_failure;
 		NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_NAME, option->name);
-		if (option == changed_option)
+		if (option->changed) {
 			NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_CHANGED);
+			option->changed = false;
+		}
+		if (option->removed)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_REMOVED);
 		switch (option->type) {
 		case TEAM_OPTION_TYPE_U32:
 			NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_U32);
@@ -1255,13 +1295,13 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int team_nl_fill_options_get(struct sk_buff *skb,
-				    struct genl_info *info, int flags,
-				    struct team *team)
+static int team_nl_fill_options_get_all(struct sk_buff *skb,
+					struct genl_info *info, int flags,
+					struct team *team)
 {
-	return team_nl_fill_options_get_changed(skb, info->snd_pid,
-						info->snd_seq, NLM_F_ACK,
-						team, NULL);
+	return team_nl_fill_options_get(skb, info->snd_pid,
+					info->snd_seq, NLM_F_ACK,
+					team, true);
 }
 
 static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
@@ -1273,7 +1313,7 @@ static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
 	if (!team)
 		return -EINVAL;
 
-	err = team_nl_send_generic(info, team, team_nl_fill_options_get);
+	err = team_nl_send_generic(info, team, team_nl_fill_options_get_all);
 
 	team_nl_team_put(team);
 
@@ -1365,10 +1405,10 @@ team_put:
 	return err;
 }
 
-static int team_nl_fill_port_list_get_changed(struct sk_buff *skb,
-					      u32 pid, u32 seq, int flags,
-					      struct team *team,
-					      struct team_port *changed_port)
+static int team_nl_fill_port_list_get(struct sk_buff *skb,
+				      u32 pid, u32 seq, int flags,
+				      struct team *team,
+				      bool fillall)
 {
 	struct nlattr *port_list;
 	void *hdr;
@@ -1387,12 +1427,19 @@ static int team_nl_fill_port_list_get_changed(struct sk_buff *skb,
 	list_for_each_entry(port, &team->port_list, list) {
 		struct nlattr *port_item;
 
+		/* Include only changed ports if fill all mode is not on */
+		if (!fillall && !port->changed)
+			continue;
 		port_item = nla_nest_start(skb, TEAM_ATTR_ITEM_PORT);
 		if (!port_item)
 			goto nla_put_failure;
 		NLA_PUT_U32(skb, TEAM_ATTR_PORT_IFINDEX, port->dev->ifindex);
-		if (port == changed_port)
+		if (port->changed) {
 			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_CHANGED);
+			port->changed = false;
+		}
+		if (port->removed)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_REMOVED);
 		if (port->linkup)
 			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_LINKUP);
 		NLA_PUT_U32(skb, TEAM_ATTR_PORT_SPEED, port->speed);
@@ -1408,13 +1455,13 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int team_nl_fill_port_list_get(struct sk_buff *skb,
-				      struct genl_info *info, int flags,
-				      struct team *team)
+static int team_nl_fill_port_list_get_all(struct sk_buff *skb,
+					  struct genl_info *info, int flags,
+					  struct team *team)
 {
-	return team_nl_fill_port_list_get_changed(skb, info->snd_pid,
-						  info->snd_seq, NLM_F_ACK,
-						  team, NULL);
+	return team_nl_fill_port_list_get(skb, info->snd_pid,
+					  info->snd_seq, NLM_F_ACK,
+					  team, true);
 }
 
 static int team_nl_cmd_port_list_get(struct sk_buff *skb,
@@ -1427,7 +1474,7 @@ static int team_nl_cmd_port_list_get(struct sk_buff *skb,
 	if (!team)
 		return -EINVAL;
 
-	err = team_nl_send_generic(info, team, team_nl_fill_port_list_get);
+	err = team_nl_send_generic(info, team, team_nl_fill_port_list_get_all);
 
 	team_nl_team_put(team);
 
@@ -1464,8 +1511,7 @@ static struct genl_multicast_group team_change_event_mcgrp = {
 	.name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME,
 };
 
-static int team_nl_send_event_options_get(struct team *team,
-					  struct team_option *changed_option)
+static int team_nl_send_event_options_get(struct team *team)
 {
 	struct sk_buff *skb;
 	int err;
@@ -1475,8 +1521,7 @@ static int team_nl_send_event_options_get(struct team *team,
 	if (!skb)
 		return -ENOMEM;
 
-	err = team_nl_fill_options_get_changed(skb, 0, 0, 0, team,
-					       changed_option);
+	err = team_nl_fill_options_get(skb, 0, 0, 0, team, false);
 	if (err < 0)
 		goto err_fill;
 
@@ -1489,18 +1534,17 @@ err_fill:
 	return err;
 }
 
-static int team_nl_send_event_port_list_get(struct team_port *port)
+static int team_nl_send_event_port_list_get(struct team *team)
 {
 	struct sk_buff *skb;
 	int err;
-	struct net *net = dev_net(port->team->dev);
+	struct net *net = dev_net(team->dev);
 
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
-	err = team_nl_fill_port_list_get_changed(skb, 0, 0, 0,
-						 port->team, port);
+	err = team_nl_fill_port_list_get(skb, 0, 0, 0, team, false);
 	if (err < 0)
 		goto err_fill;
 
@@ -1544,12 +1588,11 @@ static void team_nl_fini(void)
  * Change checkers
  ******************/
 
-static void __team_options_change_check(struct team *team,
-					struct team_option *changed_option)
+static void __team_options_change_check(struct team *team)
 {
 	int err;
 
-	err = team_nl_send_event_options_get(team, changed_option);
+	err = team_nl_send_event_options_get(team);
 	if (err)
 		netdev_warn(team->dev, "Failed to send options change via netlink\n");
 }
@@ -1559,9 +1602,10 @@ static void __team_port_change_check(struct team_port *port, bool linkup)
 {
 	int err;
 
-	if (port->linkup == linkup)
+	if (!port->removed && port->linkup == linkup)
 		return;
 
+	port->changed = true;
 	port->linkup = linkup;
 	if (linkup) {
 		struct ethtool_cmd ecmd;
@@ -1577,7 +1621,7 @@ static void __team_port_change_check(struct team_port *port, bool linkup)
 	port->duplex = 0;
 
 send_event:
-	err = team_nl_send_event_port_list_get(port);
+	err = team_nl_send_event_port_list_get(port->team);
 	if (err)
 		netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink\n",
 			    port->dev->name);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 828181fbad5d..58404b0c5010 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -46,6 +46,10 @@ struct team_port {
 	u32 speed;
 	u8 duplex;
 
+	/* Custom gennetlink interface related flags */
+	bool changed;
+	bool removed;
+
 	struct rcu_head rcu;
 };
 
@@ -72,6 +76,10 @@ struct team_option {
 	enum team_option_type type;
 	int (*getter)(struct team *team, void *arg);
 	int (*setter)(struct team *team, void *arg);
+
+	/* Custom gennetlink interface related flags */
+	bool changed;
+	bool removed;
 };
 
 struct team_mode {
@@ -207,6 +215,7 @@ enum {
 	TEAM_ATTR_OPTION_CHANGED,	/* flag */
 	TEAM_ATTR_OPTION_TYPE,		/* u8 */
 	TEAM_ATTR_OPTION_DATA,		/* dynamic */
+	TEAM_ATTR_OPTION_REMOVED,	/* flag */
 
 	__TEAM_ATTR_OPTION_MAX,
 	TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1,
@@ -227,6 +236,7 @@ enum {
 	TEAM_ATTR_PORT_LINKUP,		/* flag */
 	TEAM_ATTR_PORT_SPEED,		/* u32 */
 	TEAM_ATTR_PORT_DUPLEX,		/* u8 */
+	TEAM_ATTR_PORT_REMOVED,		/* flag */
 
 	__TEAM_ATTR_PORT_MAX,
 	TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1,
-- 
cgit v1.2.3


From e050e3f0a71bf7dc2c148b35caff0234decc8198 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 26 Jan 2012 17:03:19 +0100
Subject: perf: Fix broken interrupt rate throttling

This patch fixes the sampling interrupt throttling mechanism.

It was broken in v3.2. Events were not being unthrottled. The
unthrottling mechanism required that events be checked at each
timer tick.

This patch solves this problem and also separates:

  - unthrottling
  - multiplexing
  - frequency-mode period adjustments

Not all of them need to be executed at each timer tick.

This third version of the patch is based on my original patch +
PeterZ proposal (https://lkml.org/lkml/2012/1/7/87).

At each timer tick, for each context:

  - if the current CPU has throttled events, we unthrottle events

  - if context has frequency-based events, we adjust sampling periods

  - if we have reached the jiffies interval, we multiplex (rotate)

We decoupled rotation (multiplexing) from frequency-mode sampling
period adjustments.  They should not necessarily happen at the same
rate. Multiplexing is subject to jiffies_interval (currently at 1
but could be higher once the tunable is exposed via sysfs).

We have grouped frequency-mode adjustment and unthrottling into the
same routine to minimize code duplication. When throttled while in
frequency mode, we scan the events only once.

We have fixed the threshold enforcement code in __perf_event_overflow().
There was a bug whereby it would allow more than the authorized rate
because an increment of hwc->interrupts was not executed at the right
place.

The patch was tested with low sampling limit (2000) and fixed periods,
frequency mode, overcommitted PMU.

On a 2.1GHz AMD CPU:

 $ cat /proc/sys/kernel/perf_event_max_sample_rate
 2000

We set a rate of 3000 samples/sec (2.1GHz/3000 = 700000):

 $ perf record -e cycles,cycles -c 700000  noploop 10
 $ perf report -D | tail -21

 Aggregated stats:
           TOTAL events:      80086
            MMAP events:         88
            COMM events:          2
            EXIT events:          4
        THROTTLE events:      19996
      UNTHROTTLE events:      19996
          SAMPLE events:      40000

 cycles stats:
           TOTAL events:      40006
            MMAP events:          5
            COMM events:          1
            EXIT events:          4
        THROTTLE events:       9998
      UNTHROTTLE events:       9998
          SAMPLE events:      20000

 cycles stats:
           TOTAL events:      39996
        THROTTLE events:       9998
      UNTHROTTLE events:       9998
          SAMPLE events:      20000

For 10s, the cap is 2x2000x10 = 40000 samples.
We get exactly that: 20000 samples/event.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: <stable@kernel.org> # v3.2+
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120126160319.GA5655@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   1 +
 kernel/events/core.c       | 104 ++++++++++++++++++++++++++++-----------------
 2 files changed, 67 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 08855613ceb3..abb2776be1ba 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -587,6 +587,7 @@ struct hw_perf_event {
 	u64				sample_period;
 	u64				last_period;
 	local64_t			period_left;
+	u64                             interrupts_seq;
 	u64				interrupts;
 
 	u64				freq_time_stamp;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 32b48c889711..ba36013cfb21 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2300,6 +2300,9 @@ do {					\
 	return div64_u64(dividend, divisor);
 }
 
+static DEFINE_PER_CPU(int, perf_throttled_count);
+static DEFINE_PER_CPU(u64, perf_throttled_seq);
+
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 	}
 }
 
-static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
+/*
+ * combine freq adjustment with unthrottling to avoid two passes over the
+ * events. At the same time, make sure, having freq events does not change
+ * the rate of unthrottling as that would introduce bias.
+ */
+static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
+					   int needs_unthr)
 {
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
-	u64 interrupts, now;
+	u64 now, period = TICK_NSEC;
 	s64 delta;
 
-	if (!ctx->nr_freq)
+	/*
+	 * only need to iterate over all events iff:
+	 * - context have events in frequency mode (needs freq adjust)
+	 * - there are events to unthrottle on this cpu
+	 */
+	if (!(ctx->nr_freq || needs_unthr))
 		return;
 
+	raw_spin_lock(&ctx->lock);
+
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (event->state != PERF_EVENT_STATE_ACTIVE)
 			continue;
@@ -2344,13 +2360,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 
 		hwc = &event->hw;
 
-		interrupts = hwc->interrupts;
-		hwc->interrupts = 0;
-
-		/*
-		 * unthrottle events on the tick
-		 */
-		if (interrupts == MAX_INTERRUPTS) {
+		if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
+			hwc->interrupts = 0;
 			perf_log_throttle(event, 1);
 			event->pmu->start(event, 0);
 		}
@@ -2358,14 +2369,26 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 		if (!event->attr.freq || !event->attr.sample_freq)
 			continue;
 
-		event->pmu->read(event);
+		/*
+		 * stop the event and update event->count
+		 */
+		event->pmu->stop(event, PERF_EF_UPDATE);
+
 		now = local64_read(&event->count);
 		delta = now - hwc->freq_count_stamp;
 		hwc->freq_count_stamp = now;
 
+		/*
+		 * restart the event
+		 * reload only if value has changed
+		 */
 		if (delta > 0)
 			perf_adjust_period(event, period, delta);
+
+		event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
 	}
+
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2388,16 +2411,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
  */
 static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 {
-	u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
 	struct perf_event_context *ctx = NULL;
-	int rotate = 0, remove = 1, freq = 0;
+	int rotate = 0, remove = 1;
 
 	if (cpuctx->ctx.nr_events) {
 		remove = 0;
 		if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
 			rotate = 1;
-		if (cpuctx->ctx.nr_freq)
-			freq = 1;
 	}
 
 	ctx = cpuctx->task_ctx;
@@ -2405,37 +2425,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 		remove = 0;
 		if (ctx->nr_events != ctx->nr_active)
 			rotate = 1;
-		if (ctx->nr_freq)
-			freq = 1;
 	}
 
-	if (!rotate && !freq)
+	if (!rotate)
 		goto done;
 
 	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
 	perf_pmu_disable(cpuctx->ctx.pmu);
 
-	if (freq) {
-		perf_ctx_adjust_freq(&cpuctx->ctx, interval);
-		if (ctx)
-			perf_ctx_adjust_freq(ctx, interval);
-	}
-
-	if (rotate) {
-		cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-		if (ctx)
-			ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
+	if (ctx)
+		ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
 
-		rotate_ctx(&cpuctx->ctx);
-		if (ctx)
-			rotate_ctx(ctx);
+	rotate_ctx(&cpuctx->ctx);
+	if (ctx)
+		rotate_ctx(ctx);
 
-		perf_event_sched_in(cpuctx, ctx, current);
-	}
+	perf_event_sched_in(cpuctx, ctx, current);
 
 	perf_pmu_enable(cpuctx->ctx.pmu);
 	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-
 done:
 	if (remove)
 		list_del_init(&cpuctx->rotation_list);
@@ -2445,10 +2454,22 @@ void perf_event_task_tick(void)
 {
 	struct list_head *head = &__get_cpu_var(rotation_list);
 	struct perf_cpu_context *cpuctx, *tmp;
+	struct perf_event_context *ctx;
+	int throttled;
 
 	WARN_ON(!irqs_disabled());
 
+	__this_cpu_inc(perf_throttled_seq);
+	throttled = __this_cpu_xchg(perf_throttled_count, 0);
+
 	list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
+		ctx = &cpuctx->ctx;
+		perf_adjust_freq_unthr_context(ctx, throttled);
+
+		ctx = cpuctx->task_ctx;
+		if (ctx)
+			perf_adjust_freq_unthr_context(ctx, throttled);
+
 		if (cpuctx->jiffies_interval == 1 ||
 				!(jiffies % cpuctx->jiffies_interval))
 			perf_rotate_context(cpuctx);
@@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event,
 {
 	int events = atomic_read(&event->event_limit);
 	struct hw_perf_event *hwc = &event->hw;
+	u64 seq;
 	int ret = 0;
 
 	/*
@@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event,
 	if (unlikely(!is_sampling_event(event)))
 		return 0;
 
-	if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
-		if (throttle) {
+	seq = __this_cpu_read(perf_throttled_seq);
+	if (seq != hwc->interrupts_seq) {
+		hwc->interrupts_seq = seq;
+		hwc->interrupts = 1;
+	} else {
+		hwc->interrupts++;
+		if (unlikely(throttle
+			     && hwc->interrupts >= max_samples_per_tick)) {
+			__this_cpu_inc(perf_throttled_count);
 			hwc->interrupts = MAX_INTERRUPTS;
 			perf_log_throttle(event, 0);
 			ret = 1;
 		}
-	} else
-		hwc->interrupts++;
+	}
 
 	if (event->attr.freq) {
 		u64 now = perf_clock();
-- 
cgit v1.2.3


From 181e9bdef37bfcaa41f3ab6c948a2a0d60a268b5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 29 Jan 2012 20:35:52 +0100
Subject: PM / Hibernate: Fix s2disk regression related to freezing workqueues

Commit 2aede851ddf08666f68ffc17be446420e9d2a056

  PM / Hibernate: Freeze kernel threads after preallocating memory

introduced a mechanism by which kernel threads were frozen after
the preallocation of hibernate image memory to avoid problems with
frozen kernel threads not responding to memory freeing requests.
However, it overlooked the s2disk code path in which the
SNAPSHOT_CREATE_IMAGE ioctl was run directly after SNAPSHOT_FREE,
which caused freeze_workqueues_begin() to BUG(), because it saw
that worqueues had been already frozen.

Although in principle this issue might be addressed by removing
the relevant BUG_ON() from freeze_workqueues_begin(), that would
reintroduce the very problem that commit 2aede851ddf08666f68ffc17be4
attempted to avoid into that particular code path.  For this reason,
to fix the issue at hand, introduce thaw_kernel_threads() and make
the SNAPSHOT_FREE ioctl execute it.

Special thanks to Srivatsa S. Bhat for detailed analysis of the
problem.

Reported-and-tested-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: stable@kernel.org
---
 include/linux/freezer.h |  2 ++
 kernel/power/process.c  | 19 +++++++++++++++++++
 kernel/power/user.c     |  9 +++++++++
 3 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 0ab54e16a91f..d09af4b67cf1 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -39,6 +39,7 @@ extern bool __refrigerator(bool check_kthr_stop);
 extern int freeze_processes(void);
 extern int freeze_kernel_threads(void);
 extern void thaw_processes(void);
+extern void thaw_kernel_threads(void);
 
 static inline bool try_to_freeze(void)
 {
@@ -174,6 +175,7 @@ static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
 static inline int freeze_kernel_threads(void) { return -ENOSYS; }
 static inline void thaw_processes(void) {}
+static inline void thaw_kernel_threads(void) {}
 
 static inline bool try_to_freeze(void) { return false; }
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 77274c9ba2f1..eeca00311f39 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -188,3 +188,22 @@ void thaw_processes(void)
 	printk("done.\n");
 }
 
+void thaw_kernel_threads(void)
+{
+	struct task_struct *g, *p;
+
+	pm_nosig_freezing = false;
+	printk("Restarting kernel threads ... ");
+
+	thaw_workqueues();
+
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
+			__thaw_task(p);
+	} while_each_thread(g, p);
+	read_unlock(&tasklist_lock);
+
+	schedule();
+	printk("done.\n");
+}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6b1ab7a88522..e5a21a857302 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -274,6 +274,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		swsusp_free();
 		memset(&data->handle, 0, sizeof(struct snapshot_handle));
 		data->ready = 0;
+		/*
+		 * It is necessary to thaw kernel threads here, because
+		 * SNAPSHOT_CREATE_IMAGE may be invoked directly after
+		 * SNAPSHOT_FREE.  In that case, if kernel threads were not
+		 * thawed, the preallocation of memory carried out by
+		 * hibernation_snapshot() might run into problems (i.e. it
+		 * might fail or even deadlock).
+		 */
+		thaw_kernel_threads();
 		break;
 
 	case SNAPSHOT_PREF_IMAGE_SIZE:
-- 
cgit v1.2.3


From 1a30871fe635d3e92972e6b93e39ff65bb57e52d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 16 Jan 2012 11:07:16 +0200
Subject: mtd: fix MTD suspend

Commits 3fe4bae88460869a8e553397cd9057a4ee7ca341 and
079c985e7a6f4ce60f931cebfdd5ee3c3 broke MTD suspend in 2 ways:

1. When the '->suspend' method is not present, we return -EOPNOTSUPP, but
   the callers of 'mtd_suspend()' expects 0 instead.
2. Checking of the 'mtd' parameter against NULL has been incorrectly removed
   in 'mtd_cls_suspend()'.

This patch fixes the breakages. This has been found, analyzed, reported
and tested by Rafael J. Wysocki <rjw@sisk.pl>.

Note, this patch is not needed in the stable tree because it causes a
regression introduced during the v3.3 merge window.

Reported-by: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 2 +-
 include/linux/mtd/mtd.h | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 6ae9ca01388b..9a9ce71a71fc 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -119,7 +119,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 {
 	struct mtd_info *mtd = dev_get_drvdata(dev);
 
-	return mtd_suspend(mtd);
+	return mtd ? mtd_suspend(mtd) : 0;
 }
 
 static int mtd_cls_resume(struct device *dev)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 1a81fde8f333..d8c7aad7331c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -427,9 +427,7 @@ static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 
 static inline int mtd_suspend(struct mtd_info *mtd)
 {
-	if (!mtd->suspend)
-		return -EOPNOTSUPP;
-	return mtd->suspend(mtd);
+	return mtd->suspend ? mtd->suspend(mtd) : 0;
 }
 
 static inline void mtd_resume(struct mtd_info *mtd)
-- 
cgit v1.2.3


From 3310225dfc71a35a2cc9340c15c0e08b14b3c754 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 9 Jan 2012 11:53:50 -0600
Subject: lib: proportion: lower PROP_MAX_SHIFT to 32 on 64-bit kernel

PROP_MAX_SHIFT should be set to <=32 on 64-bit box. This fixes two bugs
in the below lines of bdi_dirty_limit():

	bdi_dirty *= numerator;
	do_div(bdi_dirty, denominator);

1) divide error: do_div() only uses the lower 32 bit of the denominator,
   which may trimmed to be 0 when PROP_MAX_SHIFT > 32.

2) overflow: (bdi_dirty * numerator) could easily overflow if numerator
   used up to 48 bits, leaving only 16 bits to bdi_dirty

Cc: <stable@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reported-by: Ilya Tumaykin <librarian_rus@yahoo.com>
Tested-by: Ilya Tumaykin <librarian_rus@yahoo.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/proportions.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index ef35bb73f69b..26a8a4ed9b07 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -81,7 +81,11 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
  * Limit the time part in order to ensure there are some bits left for the
  * cycle counter and fraction multiply.
  */
+#if BITS_PER_LONG == 32
 #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
+#else
+#define PROP_MAX_SHIFT (BITS_PER_LONG/2)
+#endif
 
 #define PROP_FRAC_SHIFT		(BITS_PER_LONG - PROP_MAX_SHIFT - 1)
 #define PROP_FRAC_BASE		(1UL << PROP_FRAC_SHIFT)
-- 
cgit v1.2.3


From 3cccd1543ab623a5065335bf08350e06ffc788ab Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Thu, 26 Jan 2012 19:13:16 +0200
Subject: lib/mpi: replaced MPI_NULL with normal NULL

MPI_NULL is replaced with normal NULL.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Reviewed-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/mpi.h | 2 --
 lib/mpi/mpicoder.c  | 8 ++++----
 lib/mpi/mpiutil.c   | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 06f88994ccaa..d02cca6cc8ce 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -57,8 +57,6 @@ struct gcry_mpi {
 
 typedef struct gcry_mpi *MPI;
 
-#define MPI_NULL NULL
-
 #define mpi_get_nlimbs(a)     ((a)->nlimbs)
 #define mpi_is_neg(a)	      ((a)->sign)
 
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 6116fc4990da..d7684aa7f65c 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -34,7 +34,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits)
 	uint8_t *frame, *fr_pt;
 	int i = 0, n;
 	size_t asnlen = DIM(asn);
-	MPI a = MPI_NULL;
+	MPI a = NULL;
 
 	if (SHA1_DIGEST_LENGTH + asnlen + 4 > nframe)
 		pr_info("MPI: can't encode a %d bit MD into a %d bits frame\n",
@@ -48,7 +48,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits)
 	 */
 	frame = kmalloc(nframe, GFP_KERNEL);
 	if (!frame)
-		return MPI_NULL;
+		return NULL;
 	n = 0;
 	frame[n++] = 0;
 	frame[n++] = 1;		/* block type */
@@ -92,7 +92,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 	int i, j;
 	unsigned nbits, nbytes, nlimbs, nread = 0;
 	mpi_limb_t a;
-	MPI val = MPI_NULL;
+	MPI val = NULL;
 
 	if (*ret_nread < 2)
 		goto leave;
@@ -109,7 +109,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 	nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
 	val = mpi_alloc(nlimbs);
 	if (!val)
-		return MPI_NULL;
+		return NULL;
 	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
 	i %= BYTES_PER_MPI_LIMB;
 	val->nbits = nbits;
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index eefc55d6b7f5..6bfc41f62b8f 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -135,7 +135,7 @@ int mpi_copy(MPI *copied, const MPI a)
 	size_t i;
 	MPI b;
 
-	*copied = MPI_NULL;
+	*copied = NULL;
 
 	if (a) {
 		b = mpi_alloc(a->nlimbs);
-- 
cgit v1.2.3


From e9c8d7a03e69093e4c33c5056a45c1233a42e8a4 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 18 Jan 2012 10:14:25 +0100
Subject: dma: sh_dma: not all SH DMAC implementations support MEMCPY

Add a flag to allow platforms to specify, whether a DMAC instance supports
the MEMCPY operation. To avoid regressions, preserve the current default.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/shdma.c    | 3 ++-
 include/linux/sh_dma.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 54043cd831c8..812fd76e9c18 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -1262,7 +1262,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 
 	INIT_LIST_HEAD(&shdev->common.channels);
 
-	dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+	if (!pdata->slave_only)
+		dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
 	if (pdata->slave && pdata->slave_num)
 		dma_cap_set(DMA_SLAVE, shdev->common.cap_mask);
 
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index 8cd7fe59cf1a..425450b980b8 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -70,6 +70,7 @@ struct sh_dmae_pdata {
 	unsigned int needs_tend_set:1;
 	unsigned int no_dmars:1;
 	unsigned int chclr_present:1;
+	unsigned int slave_only:1;
 };
 
 /* DMA register */
-- 
cgit v1.2.3


From b18db3d91234c03ad080d317878c7c77672ba326 Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Wed, 1 Feb 2012 09:12:24 -0800
Subject: Input: gpio_keys - fix struct device declared inside parameter list

A struct device parameter is used in the enable and disable callbacks to
distinguish between different gpio_keys devices.

Platforms that don't use these callbacks may not include struct device
at all, as seen on arch/arm/mach-s3c2410/mach-n30.c

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/gpio_keys.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index b5ca4b2c08ec..004ff33ab38e 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -1,6 +1,8 @@
 #ifndef _GPIO_KEYS_H
 #define _GPIO_KEYS_H
 
+struct device;
+
 struct gpio_keys_button {
 	/* Configuration parameters */
 	unsigned int code;	/* input event code (KEY_*, SW_*) */
-- 
cgit v1.2.3


From 7d731019218e49a9811f6d0adec4b1cfcb752bed Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 1 Feb 2012 11:10:24 -0800
Subject: mtd: fix merge conflict resolution breakage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes merge conflict resolution breakage introduced by merge
d3712b9dfcf4 ("Merge tag 'for-linus' of git://github.com/prasad-joshi/logfs_upstream").

The commit changed 'mtd_can_have_bb()' function and made it always
return zero, which is incorrect.  Instead, we need it to return whether
the underlying flash device can have bad eraseblocks or not.  UBI needs
this information because it affects how it handles the underlying flash.
E.g., if the underlying flash is NOR, it cannot have bad blocks and any
write or erase error is fatal, and all we can do is to switch to R/O
mode.  We do not need to reserve a pool of good eraseblocks for bad
eraseblocks handling, and so on.

This patch also removes 'mtd_can_have_bb()' invocations from Logfs to
ensure correct Logfs behavior.

I've tested that with this patch UBI works on top of NOR and NAND
flashes emulated by mtdram and nandsim correspondingly.

This patch is based on patch from Linus Torvalds.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Acked-by: Jörn Engel <joern@logfs.org>
Acked-by: Prasad Joshi <prasadjoshi.linux@gmail.com>
Acked-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/logfs/dev_mtd.c      | 6 ------
 include/linux/mtd/mtd.h | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index e97404d611e0..9c501449450d 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs)
 	filler_t *filler = logfs_mtd_readpage;
 	struct mtd_info *mtd = super->s_mtd;
 
-	if (!mtd_can_have_bb(mtd))
-		return NULL;
-
 	*ofs = 0;
 	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs += mtd->erasesize;
@@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs)
 	filler_t *filler = logfs_mtd_readpage;
 	struct mtd_info *mtd = super->s_mtd;
 
-	if (!mtd_can_have_bb(mtd))
-		return NULL;
-
 	*ofs = mtd->size - mtd->erasesize;
 	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs -= mtd->erasesize;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 221295208fd0..887ebe318c75 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -489,7 +489,7 @@ static inline int mtd_has_oob(const struct mtd_info *mtd)
 
 static inline int mtd_can_have_bb(const struct mtd_info *mtd)
 {
-	return 0;
+	return !!mtd->block_isbad;
 }
 
 	/* Kernel-side ioctl definitions */
-- 
cgit v1.2.3


From 504b61630ab65296b6c9113cce834574e8cc01de Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 31 Jan 2012 16:43:50 -0800
Subject: usb: ch9.h: usb_endpoint_maxp() uses __le16_to_cpu()

The usb/ch9.h will be installed to /usr/include/linux,
and be used from user space.
But le16_to_cpu() is only defined for kernel code.
Without this patch, user space compile will be broken.
Special thanks to Stefan Becker

Reported-by: Stefan Becker <chemobejk@gmail.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ch9.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 61b29057b054..3b6f628880f8 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -589,7 +589,7 @@ static inline int usb_endpoint_is_isoc_out(
  */
 static inline int usb_endpoint_maxp(const struct usb_endpoint_descriptor *epd)
 {
-	return le16_to_cpu(epd->wMaxPacketSize);
+	return __le16_to_cpu(epd->wMaxPacketSize);
 }
 
 /*-------------------------------------------------------------------------*/
-- 
cgit v1.2.3


From c31c151b1c4a29da4dc92212aa8648fb4f8557b9 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu, 2 Feb 2012 07:18:00 +0000
Subject: net/hyperv: Fix the page buffer when an RNDIS message goes beyond
 page boundary

There is a possible data corruption if an RNDIS message goes beyond page
boundary in the sending code path. This patch fixes the problem.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c   |  8 ++++----
 drivers/net/hyperv/rndis_filter.c | 13 +++++++++++++
 include/linux/hyperv.h            |  2 +-
 3 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 69193fcb7648..466c58a7353d 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -151,10 +151,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	int ret;
 	unsigned int i, num_pages, npg_data;
 
-	/* Add multipage for skb->data and additional one for RNDIS */
+	/* Add multipages for skb->data and additional 2 for RNDIS */
 	npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
 		>> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
-	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 1;
+	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 2;
 
 	/* Allocate a netvsc packet based on # of frags. */
 	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
@@ -173,8 +173,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 				sizeof(struct hv_netvsc_packet) +
 				    (num_pages * sizeof(struct hv_page_buffer));
 
-	/* Setup the rndis header */
-	packet->page_buf_cnt = num_pages;
+	/* If the rndis msg goes beyond 1 page, we will add 1 later */
+	packet->page_buf_cnt = num_pages - 1;
 
 	/* Initialize it from the skb */
 	packet->total_data_buflen = skb->len;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index dc2e3849573b..133b7fbf8595 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -797,6 +797,19 @@ int rndis_filter_send(struct hv_device *dev,
 			(unsigned long)rndisMessage & (PAGE_SIZE-1);
 	pkt->page_buf[0].len = rndisMessageSize;
 
+	/* Add one page_buf if the rndis msg goes beyond page boundary */
+	if (pkt->page_buf[0].offset + rndisMessageSize > PAGE_SIZE) {
+		int i;
+		for (i = pkt->page_buf_cnt; i > 1; i--)
+			pkt->page_buf[i] = pkt->page_buf[i-1];
+		pkt->page_buf_cnt++;
+		pkt->page_buf[0].len = PAGE_SIZE - pkt->page_buf[0].offset;
+		pkt->page_buf[1].pfn = virt_to_phys((void *)((ulong)
+			rndisMessage + pkt->page_buf[0].len)) >> PAGE_SHIFT;
+		pkt->page_buf[1].offset = 0;
+		pkt->page_buf[1].len = rndisMessageSize - pkt->page_buf[0].len;
+	}
+
 	/* Save the packet send completion and context */
 	filterPacket->completion = pkt->completion.send.send_completion;
 	filterPacket->completion_ctx =
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 62b908e0e591..0ae065a5fcb2 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -35,7 +35,7 @@
 #include <linux/mod_devicetable.h>
 
 
-#define MAX_PAGE_BUFFER_COUNT				18
+#define MAX_PAGE_BUFFER_COUNT				19
 #define MAX_MULTIPAGE_BUFFER_COUNT			32 /* 128K */
 
 #pragma pack(push, 1)
-- 
cgit v1.2.3


From 8cdb878dcb359fd1137e9abdee9322f5e9bcfdf8 Mon Sep 17 00:00:00 2001
From: Christopher Yeoh <cyeoh@au1.ibm.com>
Date: Thu, 2 Feb 2012 11:34:09 +1030
Subject: Fix race in process_vm_rw_core

This fixes the race in process_vm_core found by Oleg (see

  http://article.gmane.org/gmane.linux.kernel/1235667/

for details).

This has been updated since I last sent it as the creation of the new
mm_access() function did almost exactly the same thing as parts of the
previous version of this patch did.

In order to use mm_access() even when /proc isn't enabled, we move it to
kernel/fork.c where other related process mm access functions already
are.

Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c         | 20 --------------------
 include/linux/sched.h  |  6 ++++++
 kernel/fork.c          | 20 ++++++++++++++++++++
 mm/process_vm_access.c | 23 +++++++++--------------
 4 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index d9512bd03e6c..d4548dd49b02 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
 	return result;
 }
 
-static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
-{
-	struct mm_struct *mm;
-	int err;
-
-	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
-	if (err)
-		return ERR_PTR(err);
-
-	mm = get_task_mm(task);
-	if (mm && mm != current->mm &&
-			!ptrace_may_access(task, mode)) {
-		mmput(mm);
-		mm = ERR_PTR(-EACCES);
-	}
-	mutex_unlock(&task->signal->cred_guard_mutex);
-
-	return mm;
-}
-
 struct mm_struct *mm_for_maps(struct task_struct *task)
 {
 	return mm_access(task, PTRACE_MODE_READ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2234985a5e65..7d379a6bfd88 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2259,6 +2259,12 @@ static inline void mmdrop(struct mm_struct * mm)
 extern void mmput(struct mm_struct *);
 /* Grab a reference to a task's mm, if it is not already going away */
 extern struct mm_struct *get_task_mm(struct task_struct *task);
+/*
+ * Grab a reference to a task's mm, if it is not already going away
+ * and ptrace_may_access with the mode parameter passed to it
+ * succeeds.
+ */
+extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
 /* Allocate a new mm structure and copy contents from tsk->mm */
diff --git a/kernel/fork.c b/kernel/fork.c
index 051f090d40c1..1b2ef3c23ae4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -647,6 +647,26 @@ struct mm_struct *get_task_mm(struct task_struct *task)
 }
 EXPORT_SYMBOL_GPL(get_task_mm);
 
+struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
+{
+	struct mm_struct *mm;
+	int err;
+
+	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
+	if (err)
+		return ERR_PTR(err);
+
+	mm = get_task_mm(task);
+	if (mm && mm != current->mm &&
+			!ptrace_may_access(task, mode)) {
+		mmput(mm);
+		mm = ERR_PTR(-EACCES);
+	}
+	mutex_unlock(&task->signal->cred_guard_mutex);
+
+	return mm;
+}
+
 /* Please note the differences between mmput and mm_release.
  * mmput is called whenever we stop holding onto a mm_struct,
  * error success whatever.
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index e920aa3ce104..c20ff48994c2 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -298,23 +298,18 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
 		goto free_proc_pages;
 	}
 
-	task_lock(task);
-	if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
-		task_unlock(task);
-		rc = -EPERM;
-		goto put_task_struct;
-	}
-	mm = task->mm;
-
-	if (!mm || (task->flags & PF_KTHREAD)) {
-		task_unlock(task);
-		rc = -EINVAL;
+	mm = mm_access(task, PTRACE_MODE_ATTACH);
+	if (!mm || IS_ERR(mm)) {
+		rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+		/*
+		 * Explicitly map EACCES to EPERM as EPERM is a more a
+		 * appropriate error code for process_vw_readv/writev
+		 */
+		if (rc == -EACCES)
+			rc = -EPERM;
 		goto put_task_struct;
 	}
 
-	atomic_inc(&mm->mm_users);
-	task_unlock(task);
-
 	for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) {
 		rc = process_vm_rw_single_vec(
 			(unsigned long)rvec[i].iov_base, rvec[i].iov_len,
-- 
cgit v1.2.3


From ff05f603c3238010769787f3ba54c48c290ed3e5 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Thu, 2 Feb 2012 15:29:08 -0800
Subject: include/linux/lp8727.h: Remove executable bit

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lp8727.h | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 include/linux/lp8727.h

(limited to 'include/linux')

diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h
old mode 100755
new mode 100644
-- 
cgit v1.2.3


From f8447d6c213273b444c81eaa2449f55510229d4f Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Sat, 14 Jan 2012 20:58:43 +0100
Subject: mfd: Store twl6040-codec mclk configuration

Store the last used mclk configuration for the PLL.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl6040-core.c  | 3 +++
 include/linux/mfd/twl6040.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c
index dda86293dc9f..c2088f4c4547 100644
--- a/drivers/mfd/twl6040-core.c
+++ b/drivers/mfd/twl6040-core.c
@@ -282,6 +282,7 @@ int twl6040_power(struct twl6040 *twl6040, int on)
 		/* Default PLL configuration after power up */
 		twl6040->pll = TWL6040_SYSCLK_SEL_LPPLL;
 		twl6040->sysclk = 19200000;
+		twl6040->mclk = 32768;
 	} else {
 		/* already powered-down */
 		if (!twl6040->power_count) {
@@ -305,6 +306,7 @@ int twl6040_power(struct twl6040 *twl6040, int on)
 			twl6040_power_down(twl6040);
 		}
 		twl6040->sysclk = 0;
+		twl6040->mclk = 0;
 	}
 
 out:
@@ -421,6 +423,7 @@ int twl6040_set_pll(struct twl6040 *twl6040, int pll_id,
 	}
 
 	twl6040->sysclk = freq_out;
+	twl6040->mclk = freq_in;
 	twl6040->pll = pll_id;
 
 pll_out:
diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h
index 2463c2619596..9bc9ac651dad 100644
--- a/include/linux/mfd/twl6040.h
+++ b/include/linux/mfd/twl6040.h
@@ -187,8 +187,10 @@ struct twl6040 {
 	int rev;
 	u8 vibra_ctrl_cache[2];
 
+	/* PLL configuration */
 	int pll;
 	unsigned int sysclk;
+	unsigned int mclk;
 
 	unsigned int irq;
 	unsigned int irq_base;
-- 
cgit v1.2.3


From 331818f1c468a24e581aedcbe52af799366a9dfe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 3 Feb 2012 18:30:53 -0500
Subject: NFSv4: Fix an Oops in the NFSv4 getacl code

Commit bf118a342f10dafe44b14451a1392c3254629a1f (NFSv4: include bitmap
in nfsv4 get acl data) introduces the 'acl_scratch' page for the case
where we may need to decode multi-page data. However it fails to take
into account the fact that the variable may be NULL (for the case where
we're not doing multi-page decode), and it also attaches it to the
encoding xdr_stream rather than the decoding one.

The immediate result is an Oops in nfs4_xdr_enc_getacl due to the
call to page_address() with a NULL page pointer.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Andy Adamson <andros@netapp.com>
Cc: stable@vger.kernel.org
---
 fs/nfs/nfs4proc.c       | 8 ++++----
 fs/nfs/nfs4xdr.c        | 5 ++++-
 include/linux/nfs_xdr.h | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f0c849c98fe4..d202e04aca94 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 	}
 	if (npages > 1) {
 		/* for decoding across pages */
-		args.acl_scratch = alloc_page(GFP_KERNEL);
-		if (!args.acl_scratch)
+		res.acl_scratch = alloc_page(GFP_KERNEL);
+		if (!res.acl_scratch)
 			goto out_free;
 	}
 	args.acl_len = npages * PAGE_SIZE;
@@ -3612,8 +3612,8 @@ out_free:
 	for (i = 0; i < npages; i++)
 		if (pages[i])
 			__free_page(pages[i]);
-	if (args.acl_scratch)
-		__free_page(args.acl_scratch);
+	if (res.acl_scratch)
+		__free_page(res.acl_scratch);
 	return ret;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 95e92e438407..33bd8d0f745d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, args->acl_pgbase, args->acl_len);
-	xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
 
 	encode_nops(&hdr);
 }
@@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	struct compound_hdr hdr;
 	int status;
 
+	if (res->acl_scratch != NULL) {
+		void *p = page_address(res->acl_scratch);
+		xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
+	}
 	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a764cef06b73..d6ba9a12591e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -614,7 +614,6 @@ struct nfs_getaclargs {
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct page *			acl_scratch;
 	struct nfs4_sequence_args 	seq_args;
 };
 
@@ -624,6 +623,7 @@ struct nfs_getaclres {
 	size_t				acl_len;
 	size_t				acl_data_offset;
 	int				acl_flags;
+	struct page *			acl_scratch;
 	struct nfs4_sequence_res	seq_res;
 };
 
-- 
cgit v1.2.3


From d020283dc694c9ec31b410f522252f7a8397e67d Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venki@google.com>
Date: Fri, 3 Feb 2012 22:22:25 +0100
Subject: PM / QoS: CPU C-state breakage with PM Qos change

Looks like change "PM QoS: Move and rename the implementation files"
merged during the 3.2 development cycle made PM QoS depend on
CONFIG_PM which depends on (PM_SLEEP || PM_RUNTIME).

That breaks CPU C-states with kernels not having these CONFIGs, causing CPUs
to spend time in Polling loop idle instead of going into deep C-states,
consuming way way more power. This is with either acpi idle or intel idle
enabled.

Either CONFIG_PM should be enabled with any pm_qos users or
the !CONFIG_PM pm_qos_request() should return sane defaults not to break
the existing users. Here's is the patch for the latter option.

[rjw: Modified the changelog slightly.]

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@vger.kernel.org
---
 include/linux/pm_qos.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index e5bbcbaa6f57..4d99e4e6ef83 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -110,7 +110,19 @@ static inline void pm_qos_remove_request(struct pm_qos_request *req)
 			{ return; }
 
 static inline int pm_qos_request(int pm_qos_class)
-			{ return 0; }
+{
+	switch (pm_qos_class) {
+	case PM_QOS_CPU_DMA_LATENCY:
+		return PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE;
+	case PM_QOS_NETWORK_LATENCY:
+		return PM_QOS_NETWORK_LAT_DEFAULT_VALUE;
+	case PM_QOS_NETWORK_THROUGHPUT:
+		return PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE;
+	default:
+		return PM_QOS_DEFAULT_VALUE;
+	}
+}
+
 static inline int pm_qos_add_notifier(int pm_qos_class,
 				      struct notifier_block *notifier)
 			{ return 0; }
-- 
cgit v1.2.3


From 96e02d1586782eadf051fa3d6bc4132d2447ac2c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sat, 4 Feb 2012 10:47:10 +0100
Subject: exec: fix use-after-free bug in setup_new_exec()

Setting the task name is done within setup_new_exec() by accessing
bprm->filename. However this happens after flush_old_exec().
This may result in a use after free bug, flush_old_exec() may
"complete" vfork_done, which will wake up the parent which in turn
may free the passed in filename.
To fix this add a new tcomm field in struct linux_binprm which
contains the now early generated task name until it is used.

Fixes this bug on s390:

  Unable to handle kernel pointer dereference at virtual kernel address 0000000039768000
  Process kworker/u:3 (pid: 245, task: 000000003a3dc840, ksp: 0000000039453818)
  Krnl PSW : 0704000180000000 0000000000282e94 (setup_new_exec+0xa0/0x374)
  Call Trace:
  ([<0000000000282e2c>] setup_new_exec+0x38/0x374)
   [<00000000002dd12e>] load_elf_binary+0x402/0x1bf4
   [<0000000000280a42>] search_binary_handler+0x38e/0x5bc
   [<0000000000282b6c>] do_execve_common+0x410/0x514
   [<0000000000282cb6>] do_execve+0x46/0x58
   [<00000000005bce58>] kernel_execve+0x28/0x70
   [<000000000014ba2e>] ____call_usermodehelper+0x102/0x140
   [<00000000005bc8da>] kernel_thread_starter+0x6/0xc
   [<00000000005bc8d4>] kernel_thread_starter+0x0/0xc
  Last Breaking-Event-Address:
   [<00000000002830f0>] setup_new_exec+0x2fc/0x374

  Kernel panic - not syncing: Fatal exception: panic_on_oops

Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c               | 33 +++++++++++++++++----------------
 include/linux/binfmts.h |  3 ++-
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index aeb135c7ff5c..92ce83a11e90 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 	perf_event_comm(tsk);
 }
 
+static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
+{
+	int i, ch;
+
+	/* Copies the binary name from after last slash */
+	for (i = 0; (ch = *(fn++)) != '\0';) {
+		if (ch == '/')
+			i = 0; /* overwrite what we wrote */
+		else
+			if (i < len - 1)
+				tcomm[i++] = ch;
+	}
+	tcomm[i] = '\0';
+}
+
 int flush_old_exec(struct linux_binprm * bprm)
 {
 	int retval;
@@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 
 	set_mm_exe_file(bprm->mm, bprm->file);
 
+	filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
 	/*
 	 * Release all of the old mmap stuff
 	 */
@@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump);
 
 void setup_new_exec(struct linux_binprm * bprm)
 {
-	int i, ch;
-	const char *name;
-	char tcomm[sizeof(current->comm)];
-
 	arch_pick_mmap_layout(current->mm);
 
 	/* This is the point of no return */
@@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 	else
 		set_dumpable(current->mm, suid_dumpable);
 
-	name = bprm->filename;
-
-	/* Copies the binary name from after last slash */
-	for (i=0; (ch = *(name++)) != '\0';) {
-		if (ch == '/')
-			i = 0; /* overwrite what we wrote */
-		else
-			if (i < (sizeof(tcomm) - 1))
-				tcomm[i++] = ch;
-	}
-	tcomm[i] = '\0';
-	set_task_comm(current, tcomm);
+	set_task_comm(current, bprm->tcomm);
 
 	/* Set the new mm task size. We have to do that late because it may
 	 * depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index fd88a3945aa1..0092102db2de 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -18,7 +18,7 @@ struct pt_regs;
 #define BINPRM_BUF_SIZE 128
 
 #ifdef __KERNEL__
-#include <linux/list.h>
+#include <linux/sched.h>
 
 #define CORENAME_MAX_SIZE 128
 
@@ -58,6 +58,7 @@ struct linux_binprm {
 	unsigned interp_flags;
 	unsigned interp_data;
 	unsigned long loader, exec;
+	char tcomm[TASK_COMM_LEN];
 };
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
-- 
cgit v1.2.3


From 11a3122f6cf2d988a77eb8883d0fc49cd013a6d5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 7 Feb 2012 07:51:30 +0100
Subject: block: strip out locking optimization in put_io_context()

put_io_context() performed a complex trylock dancing to avoid
deferring ioc release to workqueue.  It was also broken on UP because
trylock was always assumed to succeed which resulted in unbalanced
preemption count.

While there are ways to fix the UP breakage, even the most
pathological microbench (forced ioc allocation and tight fork/exit
loop) fails to show any appreciable performance benefit of the
optimization.  Strip it out.  If there turns out to be workloads which
are affected by this change, simpler optimization from the discussion
thread can be applied later.

Signed-off-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <1328514611.21268.66.camel@sli10-conroe>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c        |  2 +-
 block/blk-core.c          |  2 +-
 block/blk-ioc.c           | 92 ++++++-----------------------------------------
 block/cfq-iosched.c       |  2 +-
 fs/ioprio.c               |  2 +-
 include/linux/blkdev.h    |  3 --
 include/linux/iocontext.h |  5 ++-
 kernel/fork.c             |  2 +-
 8 files changed, 18 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index fa8f26309444..75642a352a8f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1659,7 +1659,7 @@ static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 		ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
 		if (ioc) {
 			ioc_cgroup_changed(ioc);
-			put_io_context(ioc, NULL);
+			put_io_context(ioc);
 		}
 	}
 }
diff --git a/block/blk-core.c b/block/blk-core.c
index 636702575118..532b3a21b383 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -642,7 +642,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
 	if (rq->cmd_flags & REQ_ELVPRIV) {
 		elv_put_request(q, rq);
 		if (rq->elv.icq)
-			put_io_context(rq->elv.icq->ioc, q);
+			put_io_context(rq->elv.icq->ioc);
 	}
 
 	mempool_free(rq, q->rq.rq_pool);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 7490b6da2453..9884fd7427fe 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -29,21 +29,6 @@ void get_io_context(struct io_context *ioc)
 }
 EXPORT_SYMBOL(get_io_context);
 
-/*
- * Releasing ioc may nest into another put_io_context() leading to nested
- * fast path release.  As the ioc's can't be the same, this is okay but
- * makes lockdep whine.  Keep track of nesting and use it as subclass.
- */
-#ifdef CONFIG_LOCKDEP
-#define ioc_release_depth(q)		((q) ? (q)->ioc_release_depth : 0)
-#define ioc_release_depth_inc(q)	(q)->ioc_release_depth++
-#define ioc_release_depth_dec(q)	(q)->ioc_release_depth--
-#else
-#define ioc_release_depth(q)		0
-#define ioc_release_depth_inc(q)	do { } while (0)
-#define ioc_release_depth_dec(q)	do { } while (0)
-#endif
-
 static void icq_free_icq_rcu(struct rcu_head *head)
 {
 	struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
@@ -75,11 +60,8 @@ static void ioc_exit_icq(struct io_cq *icq)
 	if (rcu_dereference_raw(ioc->icq_hint) == icq)
 		rcu_assign_pointer(ioc->icq_hint, NULL);
 
-	if (et->ops.elevator_exit_icq_fn) {
-		ioc_release_depth_inc(q);
+	if (et->ops.elevator_exit_icq_fn)
 		et->ops.elevator_exit_icq_fn(icq);
-		ioc_release_depth_dec(q);
-	}
 
 	/*
 	 * @icq->q might have gone away by the time RCU callback runs
@@ -149,81 +131,29 @@ static void ioc_release_fn(struct work_struct *work)
 /**
  * put_io_context - put a reference of io_context
  * @ioc: io_context to put
- * @locked_q: request_queue the caller is holding queue_lock of (hint)
  *
  * Decrement reference count of @ioc and release it if the count reaches
- * zero.  If the caller is holding queue_lock of a queue, it can indicate
- * that with @locked_q.  This is an optimization hint and the caller is
- * allowed to pass in %NULL even when it's holding a queue_lock.
+ * zero.
  */
-void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
+void put_io_context(struct io_context *ioc)
 {
-	struct request_queue *last_q = locked_q;
 	unsigned long flags;
 
 	if (ioc == NULL)
 		return;
 
 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
-	if (locked_q)
-		lockdep_assert_held(locked_q->queue_lock);
-
-	if (!atomic_long_dec_and_test(&ioc->refcount))
-		return;
 
 	/*
-	 * Destroy @ioc.  This is a bit messy because icq's are chained
-	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
-	 * The inner ioc->lock should be held to walk our icq_list and then
-	 * for each icq the outer matching queue_lock should be grabbed.
-	 * ie. We need to do reverse-order double lock dancing.
-	 *
-	 * Another twist is that we are often called with one of the
-	 * matching queue_locks held as indicated by @locked_q, which
-	 * prevents performing double-lock dance for other queues.
-	 *
-	 * So, we do it in two stages.  The fast path uses the queue_lock
-	 * the caller is holding and, if other queues need to be accessed,
-	 * uses trylock to avoid introducing locking dependency.  This can
-	 * handle most cases, especially if @ioc was performing IO on only
-	 * single device.
-	 *
-	 * If trylock doesn't cut it, we defer to @ioc->release_work which
-	 * can do all the double-locking dancing.
+	 * Releasing ioc requires reverse order double locking and we may
+	 * already be holding a queue_lock.  Do it asynchronously from wq.
 	 */
-	spin_lock_irqsave_nested(&ioc->lock, flags,
-				 ioc_release_depth(locked_q));
-
-	while (!hlist_empty(&ioc->icq_list)) {
-		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
-						struct io_cq, ioc_node);
-		struct request_queue *this_q = icq->q;
-
-		if (this_q != last_q) {
-			if (last_q && last_q != locked_q)
-				spin_unlock(last_q->queue_lock);
-			last_q = NULL;
-
-			/* spin_trylock() always successes in UP case */
-			if (this_q != locked_q &&
-			    !spin_trylock(this_q->queue_lock))
-				break;
-			last_q = this_q;
-			continue;
-		}
-		ioc_exit_icq(icq);
+	if (atomic_long_dec_and_test(&ioc->refcount)) {
+		spin_lock_irqsave(&ioc->lock, flags);
+		if (!hlist_empty(&ioc->icq_list))
+			schedule_work(&ioc->release_work);
+		spin_unlock_irqrestore(&ioc->lock, flags);
 	}
-
-	if (last_q && last_q != locked_q)
-		spin_unlock(last_q->queue_lock);
-
-	spin_unlock_irqrestore(&ioc->lock, flags);
-
-	/* if no icq is left, we're done; otherwise, kick release_work */
-	if (hlist_empty(&ioc->icq_list))
-		kmem_cache_free(iocontext_cachep, ioc);
-	else
-		schedule_work(&ioc->release_work);
 }
 EXPORT_SYMBOL(put_io_context);
 
@@ -238,7 +168,7 @@ void exit_io_context(struct task_struct *task)
 	task_unlock(task);
 
 	atomic_dec(&ioc->nr_tasks);
-	put_io_context(ioc, NULL);
+	put_io_context(ioc);
 }
 
 /**
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index da21c24dbed3..5684df6848bc 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1794,7 +1794,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		cfqd->active_queue = NULL;
 
 	if (cfqd->active_cic) {
-		put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
+		put_io_context(cfqd->active_cic->icq.ioc);
 		cfqd->active_cic = NULL;
 	}
 }
diff --git a/fs/ioprio.c b/fs/ioprio.c
index f84b380d65e5..0f1b9515213b 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
 	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
 		ioc_ioprio_changed(ioc, ioprio);
-		put_io_context(ioc, NULL);
+		put_io_context(ioc);
 	}
 
 	return err;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6c6a1f008065..606cf339bb56 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -399,9 +399,6 @@ struct request_queue {
 	/* Throttle data */
 	struct throtl_data *td;
 #endif
-#ifdef CONFIG_LOCKDEP
-	int			ioc_release_depth;
-#endif
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 7e1371c4bccf..119773eebe31 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -133,7 +133,7 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 
 struct task_struct;
 #ifdef CONFIG_BLOCK
-void put_io_context(struct io_context *ioc, struct request_queue *locked_q);
+void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
@@ -141,8 +141,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
 void ioc_cgroup_changed(struct io_context *ioc);
 #else
 struct io_context;
-static inline void put_io_context(struct io_context *ioc,
-				  struct request_queue *locked_q) { }
+static inline void put_io_context(struct io_context *ioc) { }
 static inline void exit_io_context(struct task_struct *task) { }
 #endif
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 051f090d40c1..c574aefa8d1b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -890,7 +890,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 			return -ENOMEM;
 
 		new_ioc->ioprio = ioc->ioprio;
-		put_io_context(new_ioc, NULL);
+		put_io_context(new_ioc);
 	}
 #endif
 	return 0;
-- 
cgit v1.2.3


From 050c8ea80e3e90019d9e981c6a117ef614e882ed Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 8 Feb 2012 09:19:38 +0100
Subject: block: separate out blk_rq_merge_ok() and blk_try_merge() from
 elevator functions

blk_rq_merge_ok() is the elevator-neutral part of merge eligibility
test.  blk_try_merge() determines merge direction and expects the
caller to have tested elv_rq_merge_ok() previously.

elv_rq_merge_ok() now wraps blk_rq_merge_ok() and then calls
elv_iosched_allow_merge().  elv_try_merge() is removed and the two
callers are updated to call elv_rq_merge_ok() explicitly followed by
blk_try_merge().  While at it, make rq_merge_ok() functions return
bool.

This is to prepare for plug merge update and doesn't introduce any
behavior change.

This is based on Jens' patch to skip elevator_allow_merge_fn() from
plug merge.

Signed-off-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <4F16F3CA.90904@kernel.dk>
Original-patch-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c         |  4 ++--
 block/blk-merge.c        | 37 ++++++++++++++++++++++++++++++++
 block/blk.h              |  2 ++
 block/elevator.c         | 55 ++++--------------------------------------------
 include/linux/elevator.h |  3 +--
 5 files changed, 46 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 532b3a21b383..fa697bf691eb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1282,10 +1282,10 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 
 		(*request_count)++;
 
-		if (rq->q != q)
+		if (rq->q != q || !elv_rq_merge_ok(rq, bio))
 			continue;
 
-		el_ret = elv_try_merge(rq, bio);
+		el_ret = blk_try_merge(rq, bio);
 		if (el_ret == ELEVATOR_BACK_MERGE) {
 			ret = bio_attempt_back_merge(q, rq, bio);
 			if (ret)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index cfcc37cb222b..160035f54882 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -471,3 +471,40 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 {
 	return attempt_merge(q, rq, next);
 }
+
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
+{
+	if (!rq_mergeable(rq))
+		return false;
+
+	/* don't merge file system requests and discard requests */
+	if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
+		return false;
+
+	/* don't merge discard requests and secure discard requests */
+	if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
+		return false;
+
+	/* different data direction or already started, don't merge */
+	if (bio_data_dir(bio) != rq_data_dir(rq))
+		return false;
+
+	/* must be same device and not a special request */
+	if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
+		return false;
+
+	/* only merge integrity protected bio into ditto rq */
+	if (bio_integrity(bio) != blk_integrity_rq(rq))
+		return false;
+
+	return true;
+}
+
+int blk_try_merge(struct request *rq, struct bio *bio)
+{
+	if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector)
+		return ELEVATOR_BACK_MERGE;
+	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector)
+		return ELEVATOR_FRONT_MERGE;
+	return ELEVATOR_NO_MERGE;
+}
diff --git a/block/blk.h b/block/blk.h
index 7efd772336de..9c12f80882b0 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -137,6 +137,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 				struct request *next);
 void blk_recalc_rq_segments(struct request *rq);
 void blk_rq_set_mixed_merge(struct request *rq);
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
+int blk_try_merge(struct request *rq, struct bio *bio);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
diff --git a/block/elevator.c b/block/elevator.c
index 91e18f8af9be..f016855a46b0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -70,39 +70,9 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 /*
  * can we safely merge with this request?
  */
-int elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
 {
-	if (!rq_mergeable(rq))
-		return 0;
-
-	/*
-	 * Don't merge file system requests and discard requests
-	 */
-	if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
-		return 0;
-
-	/*
-	 * Don't merge discard requests and secure discard requests
-	 */
-	if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
-		return 0;
-
-	/*
-	 * different data direction or already started, don't merge
-	 */
-	if (bio_data_dir(bio) != rq_data_dir(rq))
-		return 0;
-
-	/*
-	 * must be same device and not a special request
-	 */
-	if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
-		return 0;
-
-	/*
-	 * only merge integrity protected bio into ditto rq
-	 */
-	if (bio_integrity(bio) != blk_integrity_rq(rq))
+	if (!blk_rq_merge_ok(rq, bio))
 		return 0;
 
 	if (!elv_iosched_allow_merge(rq, bio))
@@ -112,23 +82,6 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_rq_merge_ok);
 
-int elv_try_merge(struct request *__rq, struct bio *bio)
-{
-	int ret = ELEVATOR_NO_MERGE;
-
-	/*
-	 * we can merge and sequence is ok, check if it's possible
-	 */
-	if (elv_rq_merge_ok(__rq, bio)) {
-		if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector)
-			ret = ELEVATOR_BACK_MERGE;
-		else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector)
-			ret = ELEVATOR_FRONT_MERGE;
-	}
-
-	return ret;
-}
-
 static struct elevator_type *elevator_find(const char *name)
 {
 	struct elevator_type *e;
@@ -478,8 +431,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 	/*
 	 * First try one-hit cache.
 	 */
-	if (q->last_merge) {
-		ret = elv_try_merge(q->last_merge, bio);
+	if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
+		ret = blk_try_merge(q->last_merge, bio);
 		if (ret != ELEVATOR_NO_MERGE) {
 			*req = q->last_merge;
 			return ret;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c24f3d7fbf1e..d6dfb65c8885 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -122,7 +122,6 @@ extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
 extern void elv_add_request(struct request_queue *, struct request *, int);
 extern void __elv_add_request(struct request_queue *, struct request *, int);
 extern int elv_merge(struct request_queue *, struct request **, struct bio *);
-extern int elv_try_merge(struct request *, struct bio *);
 extern void elv_merge_requests(struct request_queue *, struct request *,
 			       struct request *);
 extern void elv_merged_request(struct request_queue *, struct request *, int);
@@ -155,7 +154,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
 extern int elevator_init(struct request_queue *, char *);
 extern void elevator_exit(struct elevator_queue *);
 extern int elevator_change(struct request_queue *, const char *);
-extern int elv_rq_merge_ok(struct request *, struct bio *);
+extern bool elv_rq_merge_ok(struct request *, struct bio *);
 
 /*
  * Helper functions.
-- 
cgit v1.2.3


From 07c2bd37350c9b1af71b35d05f16e300a6602948 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 8 Feb 2012 09:19:42 +0100
Subject: block: don't call elevator callbacks for plug merges

Plug merge calls two elevator callbacks outside queue lock -
elevator_allow_merge_fn() and elevator_bio_merged_fn().  Although
attempt_plug_merge() suggests that elevator is guaranteed to be there
through the existing request on the plug list, nothing prevents plug
merge from calling into dying or initializing elevator.

For regular merges, bypass ensures elvpriv count to reach zero, which
in turn prevents merges as all !ELVPRIV requests get REQ_SOFTBARRIER
from forced back insertion.  Plug merge doesn't check ELVPRIV, and, as
the requests haven't gone through elevator insertion yet, it doesn't
have SOFTBARRIER set allowing merges on a bypassed queue.

This, for example, leads to the following crash during elevator
switch.

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
 IP: [<ffffffff813b34e9>] cfq_allow_merge+0x49/0xa0
 PGD 112cbc067 PUD 115d5c067 PMD 0
 Oops: 0000 [#1] PREEMPT SMP
 CPU 1
 Modules linked in: deadline_iosched

 Pid: 819, comm: dd Not tainted 3.3.0-rc2-work+ #76 Bochs Bochs
 RIP: 0010:[<ffffffff813b34e9>]  [<ffffffff813b34e9>] cfq_allow_merge+0x49/0xa0
 RSP: 0018:ffff8801143a38f8  EFLAGS: 00010297
 RAX: 0000000000000000 RBX: ffff88011817ce28 RCX: ffff880116eb6cc0
 RDX: 0000000000000000 RSI: ffff880118056e20 RDI: ffff8801199512f8
 RBP: ffff8801143a3908 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000001 R11: 0000000000000000 R12: ffff880118195708
 R13: ffff880118052aa0 R14: ffff8801143a3d50 R15: ffff880118195708
 FS:  00007f19f82cb700(0000) GS:ffff88011fc80000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
 CR2: 0000000000000008 CR3: 0000000112c6a000 CR4: 00000000000006e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
 Process dd (pid: 819, threadinfo ffff8801143a2000, task ffff880116eb6cc0)
 Stack:
  ffff88011817ce28 ffff880118195708 ffff8801143a3928 ffffffff81391bba
  ffff88011817ce28 ffff880118195708 ffff8801143a3948 ffffffff81391bf1
  ffff88011817ce28 0000000000000000 ffff8801143a39a8 ffffffff81398e3e
 Call Trace:
  [<ffffffff81391bba>] elv_rq_merge_ok+0x4a/0x60
  [<ffffffff81391bf1>] elv_try_merge+0x21/0x40
  [<ffffffff81398e3e>] blk_queue_bio+0x8e/0x390
  [<ffffffff81396a5a>] generic_make_request+0xca/0x100
  [<ffffffff81396b04>] submit_bio+0x74/0x100
  [<ffffffff811d45c2>] __blockdev_direct_IO+0x1ce2/0x3450
  [<ffffffff811d0dc7>] blkdev_direct_IO+0x57/0x60
  [<ffffffff811460b5>] generic_file_aio_read+0x6d5/0x760
  [<ffffffff811986b2>] do_sync_read+0xe2/0x120
  [<ffffffff81199345>] vfs_read+0xc5/0x180
  [<ffffffff81199501>] sys_read+0x51/0x90
  [<ffffffff81aeac12>] system_call_fastpath+0x16/0x1b

There are multiple ways to fix this including making plug merge check
ELVPRIV; however,

* Calling into elevator outside queue lock is confusing and
  error-prone.

* Requests on plug list aren't known to the elevator.  They aren't on
  the elevator yet, so there's no elevator specific state to update.

* Given the nature of plug merges - collecting bio's for the same
  purpose from the same issuer - elevator specific restrictions aren't
  applicable.

So, simply don't call into elevator methods from plug merge by moving
elv_bio_merged() from bio_attempt_*_merge() to blk_queue_bio(), and
using blk_try_merge() in attempt_plug_merge().

This is based on Jens' patch to skip elevator_allow_merge_fn() from
plug merge.

Note that this makes per-cgroup merged stats skip plug merging.

Signed-off-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <4F16F3CA.90904@kernel.dk>
Original-patch-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c         | 19 +++++++++----------
 block/cfq-iosched.c      | 15 ++++-----------
 include/linux/elevator.h |  6 ------
 3 files changed, 13 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index fa697bf691eb..3a78b00edd71 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1212,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	drive_stat_acct(req, 0);
-	elv_bio_merged(q, req, bio);
 	return true;
 }
 
@@ -1243,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	drive_stat_acct(req, 0);
-	elv_bio_merged(q, req, bio);
 	return true;
 }
 
@@ -1257,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
  * on %current's plugged list.  Returns %true if merge was successful,
  * otherwise %false.
  *
- * This function is called without @q->queue_lock; however, elevator is
- * accessed iff there already are requests on the plugged list which in
- * turn guarantees validity of the elevator.
- *
- * Note that, on successful merge, elevator operation
- * elevator_bio_merged_fn() will be called without queue lock.  Elevator
- * must be ready for this.
+ * Plugging coalesces IOs from the same issuer for the same purpose without
+ * going through @q->queue_lock.  As such it's more of an issuing mechanism
+ * than scheduling, and the request, while may have elvpriv data, is not
+ * added on the elevator at this point.  In addition, we don't have
+ * reliable access to the elevator outside queue lock.  Only check basic
+ * merging parameters without querying the elevator.
  */
 static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 			       unsigned int *request_count)
@@ -1282,7 +1279,7 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 
 		(*request_count)++;
 
-		if (rq->q != q || !elv_rq_merge_ok(rq, bio))
+		if (rq->q != q || !blk_rq_merge_ok(rq, bio))
 			continue;
 
 		el_ret = blk_try_merge(rq, bio);
@@ -1347,12 +1344,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	el_ret = elv_merge(q, &req, bio);
 	if (el_ret == ELEVATOR_BACK_MERGE) {
 		if (bio_attempt_back_merge(q, req, bio)) {
+			elv_bio_merged(q, req, bio);
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
 		if (bio_attempt_front_merge(q, req, bio)) {
+			elv_bio_merged(q, req, bio);
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5684df6848bc..d0ba50533668 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1699,18 +1699,11 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 
 	/*
 	 * Lookup the cfqq that this bio will be queued with and allow
-	 * merge only if rq is queued there.  This function can be called
-	 * from plug merge without queue_lock.  In such cases, ioc of @rq
-	 * and %current are guaranteed to be equal.  Avoid lookup which
-	 * requires queue_lock by using @rq's cic.
+	 * merge only if rq is queued there.
 	 */
-	if (current->io_context == RQ_CIC(rq)->icq.ioc) {
-		cic = RQ_CIC(rq);
-	} else {
-		cic = cfq_cic_lookup(cfqd, current->io_context);
-		if (!cic)
-			return false;
-	}
+	cic = cfq_cic_lookup(cfqd, current->io_context);
+	if (!cic)
+		return false;
 
 	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
 	return cfqq == RQ_CFQQ(rq);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index d6dfb65c8885..7d4e0356f329 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -42,12 +42,6 @@ struct elevator_ops
 	elevator_merged_fn *elevator_merged_fn;
 	elevator_merge_req_fn *elevator_merge_req_fn;
 	elevator_allow_merge_fn *elevator_allow_merge_fn;
-
-	/*
-	 * Used for both plugged list and elevator merging and in the
-	 * former case called without queue_lock.  Read comment on top of
-	 * attempt_plug_merge() for details.
-	 */
 	elevator_bio_merged_fn *elevator_bio_merged_fn;
 
 	elevator_dispatch_fn *elevator_dispatch_fn;
-- 
cgit v1.2.3


From cdccaa9467b982d57b139818d15e1e994feca372 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 8 Feb 2012 20:03:14 +0100
Subject: cdrom: move shared static to cdrom_device_info

The keeplocked variable in the cdrom driver is shared across multiple
drives, but set in per-device ioctls.  Move it to the per-device struct,
avoiding that the setting on one drive affects the driver's behavior
when closing another.

[ Impact: limit udev's confusion to one drive when a CD burning program
  unlocks the CD door at the end of burning. ]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/cdrom/cdrom.c | 12 +++++-------
 include/linux/cdrom.h |  3 ++-
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 8fefe59f52a7..d620b4495745 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -286,8 +286,6 @@
 
 /* used to tell the module to turn on full debugging messages */
 static bool debug;
-/* used to keep tray locked at all times */
-static int keeplocked;
 /* default compatibility mode */
 static bool autoclose=1;
 static bool autoeject;
@@ -1204,7 +1202,7 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
 		cdinfo(CD_CLOSE, "Use count for \"/dev/%s\" now zero\n", cdi->name);
 		cdrom_dvd_rw_close_write(cdi);
 
-		if ((cdo->capability & CDC_LOCK) && !keeplocked) {
+		if ((cdo->capability & CDC_LOCK) && !cdi->keeplocked) {
 			cdinfo(CD_CLOSE, "Unlocking door!\n");
 			cdo->lock_door(cdi, 0);
 		}
@@ -1371,7 +1369,7 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
 	curslot = info->hdr.curslot;
 	kfree(info);
 
-	if (cdi->use_count > 1 || keeplocked) {
+	if (cdi->use_count > 1 || cdi->keeplocked) {
 		if (slot == CDSL_CURRENT) {
 	    		return curslot;
 		} else {
@@ -2289,7 +2287,7 @@ static int cdrom_ioctl_eject(struct cdrom_device_info *cdi)
 
 	if (!CDROM_CAN(CDC_OPEN_TRAY))
 		return -ENOSYS;
-	if (cdi->use_count != 1 || keeplocked)
+	if (cdi->use_count != 1 || cdi->keeplocked)
 		return -EBUSY;
 	if (CDROM_CAN(CDC_LOCK)) {
 		int ret = cdi->ops->lock_door(cdi, 0);
@@ -2316,7 +2314,7 @@ static int cdrom_ioctl_eject_sw(struct cdrom_device_info *cdi,
 
 	if (!CDROM_CAN(CDC_OPEN_TRAY))
 		return -ENOSYS;
-	if (keeplocked)
+	if (cdi->keeplocked)
 		return -EBUSY;
 
 	cdi->options &= ~(CDO_AUTO_CLOSE | CDO_AUTO_EJECT);
@@ -2447,7 +2445,7 @@ static int cdrom_ioctl_lock_door(struct cdrom_device_info *cdi,
 	if (!CDROM_CAN(CDC_LOCK))
 		return -EDRIVE_CANT_DO_THIS;
 
-	keeplocked = arg ? 1 : 0;
+	cdi->keeplocked = arg ? 1 : 0;
 
 	/*
 	 * Don't unlock the door on multiple opens by default, but allow
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 35eae4b67503..7c48029dffe6 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -952,7 +952,8 @@ struct cdrom_device_info {
     	char name[20];                  /* name of the device type */
 /* per-device flags */
         __u8 sanyo_slot		: 2;	/* Sanyo 3 CD changer support */
-        __u8 reserved		: 6;	/* not used yet */
+        __u8 keeplocked		: 1;	/* CDROM_LOCKDOOR status */
+        __u8 reserved		: 5;	/* not used yet */
 	int cdda_method;		/* see flags */
 	__u8 last_sense;
 	__u8 media_written;		/* dirty flag, DVD+RW bookkeeping */
-- 
cgit v1.2.3


From d9f5343e35d9138432657202afa8e3ddb2ade360 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Thu, 5 Jan 2012 16:28:54 -0800
Subject: USB: Remove duplicate USB 3.0 hub feature #defines.

Somehow we ended up with duplicate hub feature #defines in ch11.h.
Tatyana Brokhman first created the USB 3.0 hub feature macros in 2.6.38
with commit 0eadcc09203349b11ca477ec367079b23d32ab91 "usb: USB3.0 ch11
definitions".  In 2.6.39, I modified a patch from John Youn that added
similar macros in a different place in the same file, and committed
dbe79bbe9dcb22cb3651c46f18943477141ca452 "USB 3.0 Hub Changes".

Some of the #defines used different names for the same values.  Others
used exactly the same names with the same values, like these gems:

 #define USB_PORT_FEAT_BH_PORT_RESET     28
...
 #define USB_PORT_FEAT_BH_PORT_RESET            28

According to my very geeky husband (who looked it up in the C99 spec),
it is allowed to have object-like macros with duplicate names as long as
the replacement list is exactly the same.  However, he recalled that
some compilers will give warnings when they find duplicate macros.  It's
probably best to remove the duplicates in the stable tree, so that the
code compiles for everyone.

The macros are now fixed to move the feature requests that are specific
to USB 3.0 hubs into a new section (out of the USB 2.0 hub feature
section), and use the most common macro name.

This patch should be backported to 2.6.39.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: Tatyana Brokhman <tlinder@codeaurora.org>
Cc: John Youn <johnyoun@synopsys.com>
Cc: Jamey Sharp <jamey@minilop.net>
Cc: stable@vger.kernel.org
---
 include/linux/usb/ch11.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
index 31fdb4c6ee3d..0b83acd3360a 100644
--- a/include/linux/usb/ch11.h
+++ b/include/linux/usb/ch11.h
@@ -61,12 +61,6 @@
 #define USB_PORT_FEAT_TEST              21
 #define USB_PORT_FEAT_INDICATOR         22
 #define USB_PORT_FEAT_C_PORT_L1         23
-#define USB_PORT_FEAT_C_PORT_LINK_STATE	25
-#define USB_PORT_FEAT_C_PORT_CONFIG_ERROR 26
-#define USB_PORT_FEAT_PORT_REMOTE_WAKE_MASK 27
-#define USB_PORT_FEAT_BH_PORT_RESET     28
-#define USB_PORT_FEAT_C_BH_PORT_RESET   29
-#define USB_PORT_FEAT_FORCE_LINKPM_ACCEPT 30
 
 /*
  * Port feature selectors added by USB 3.0 spec.
@@ -75,8 +69,8 @@
 #define USB_PORT_FEAT_LINK_STATE		5
 #define USB_PORT_FEAT_U1_TIMEOUT		23
 #define USB_PORT_FEAT_U2_TIMEOUT		24
-#define USB_PORT_FEAT_C_LINK_STATE		25
-#define USB_PORT_FEAT_C_CONFIG_ERR		26
+#define USB_PORT_FEAT_C_PORT_LINK_STATE		25
+#define USB_PORT_FEAT_C_PORT_CONFIG_ERROR	26
 #define USB_PORT_FEAT_REMOTE_WAKE_MASK		27
 #define USB_PORT_FEAT_BH_PORT_RESET		28
 #define USB_PORT_FEAT_C_BH_PORT_RESET		29
-- 
cgit v1.2.3


From 2c4967f741e87cdd63de7271b97807041dccbf3b Mon Sep 17 00:00:00 2001
From: Sujit Reddy Thumma <sthumma@codeaurora.org>
Date: Sat, 4 Feb 2012 16:14:50 -0500
Subject: mmc: core: Ensure clocks are always enabled before host interaction

Ensure clocks are always enabled before any interaction with the
host controller driver. This makes sure that there is no race
between host execution and the core layer turning off clocks
in different context with clock gating framework.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Per Forlin <per.forlin@stericsson.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c     | 19 ++++++++++++++++---
 drivers/mmc/core/host.h     | 21 ---------------------
 drivers/mmc/core/sd.c       | 22 ++++++++++++++++++----
 drivers/mmc/core/sdio_irq.c | 10 ++++++++--
 include/linux/mmc/host.h    | 19 +++++++++++++++++++
 5 files changed, 61 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index f545a3e6eb80..b3063b741df3 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -290,8 +290,11 @@ static void mmc_wait_for_req_done(struct mmc_host *host,
 static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
 		 bool is_first_req)
 {
-	if (host->ops->pre_req)
+	if (host->ops->pre_req) {
+		mmc_host_clk_hold(host);
 		host->ops->pre_req(host, mrq, is_first_req);
+		mmc_host_clk_release(host);
+	}
 }
 
 /**
@@ -306,8 +309,11 @@ static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
 static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
 			 int err)
 {
-	if (host->ops->post_req)
+	if (host->ops->post_req) {
+		mmc_host_clk_hold(host);
 		host->ops->post_req(host, mrq, err);
+		mmc_host_clk_release(host);
+	}
 }
 
 /**
@@ -620,7 +626,9 @@ int mmc_host_enable(struct mmc_host *host)
 		int err;
 
 		host->en_dis_recurs = 1;
+		mmc_host_clk_hold(host);
 		err = host->ops->enable(host);
+		mmc_host_clk_release(host);
 		host->en_dis_recurs = 0;
 
 		if (err) {
@@ -640,7 +648,9 @@ static int mmc_host_do_disable(struct mmc_host *host, int lazy)
 		int err;
 
 		host->en_dis_recurs = 1;
+		mmc_host_clk_hold(host);
 		err = host->ops->disable(host, lazy);
+		mmc_host_clk_release(host);
 		host->en_dis_recurs = 0;
 
 		if (err < 0) {
@@ -1203,8 +1213,11 @@ int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, bool cmd11
 
 	host->ios.signal_voltage = signal_voltage;
 
-	if (host->ops->start_signal_voltage_switch)
+	if (host->ops->start_signal_voltage_switch) {
+		mmc_host_clk_hold(host);
 		err = host->ops->start_signal_voltage_switch(host, &host->ios);
+		mmc_host_clk_release(host);
+	}
 
 	return err;
 }
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index fb8a5cd2e4a1..08a7852ade44 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -14,27 +14,6 @@
 
 int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
-
-#ifdef CONFIG_MMC_CLKGATE
-void mmc_host_clk_hold(struct mmc_host *host);
-void mmc_host_clk_release(struct mmc_host *host);
-unsigned int mmc_host_clk_rate(struct mmc_host *host);
-
-#else
-static inline void mmc_host_clk_hold(struct mmc_host *host)
-{
-}
-
-static inline void mmc_host_clk_release(struct mmc_host *host)
-{
-}
-
-static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
-{
-	return host->ios.clock;
-}
-#endif
-
 void mmc_host_deeper_disable(struct work_struct *work);
 
 #endif
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index c63ad03c29c7..5017f9354ce2 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -451,9 +451,11 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status)
 	 * information and let the hardware specific code
 	 * return what is possible given the options
 	 */
+	mmc_host_clk_hold(card->host);
 	drive_strength = card->host->ops->select_drive_strength(
 		card->sw_caps.uhs_max_dtr,
 		host_drv_type, card_drv_type);
+	mmc_host_clk_release(card->host);
 
 	err = mmc_sd_switch(card, 1, 2, drive_strength, status);
 	if (err)
@@ -660,9 +662,12 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 		goto out;
 
 	/* SPI mode doesn't define CMD19 */
-	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
+	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning) {
+		mmc_host_clk_hold(card->host);
 		err = card->host->ops->execute_tuning(card->host,
 						      MMC_SEND_TUNING_BLOCK);
+		mmc_host_clk_release(card->host);
+	}
 
 out:
 	kfree(status);
@@ -850,8 +855,11 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
 	if (!reinit) {
 		int ro = -1;
 
-		if (host->ops->get_ro)
+		if (host->ops->get_ro) {
+			mmc_host_clk_hold(card->host);
 			ro = host->ops->get_ro(host);
+			mmc_host_clk_release(card->host);
+		}
 
 		if (ro < 0) {
 			pr_warning("%s: host does not "
@@ -967,8 +975,11 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		 * Since initialization is now complete, enable preset
 		 * value registers for UHS-I cards.
 		 */
-		if (host->ops->enable_preset_value)
+		if (host->ops->enable_preset_value) {
+			mmc_host_clk_hold(card->host);
 			host->ops->enable_preset_value(host, true);
+			mmc_host_clk_release(card->host);
+		}
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
@@ -1151,8 +1162,11 @@ int mmc_attach_sd(struct mmc_host *host)
 		return err;
 
 	/* Disable preset value enable if already set since last time */
-	if (host->ops->enable_preset_value)
+	if (host->ops->enable_preset_value) {
+		mmc_host_clk_hold(host);
 		host->ops->enable_preset_value(host, false);
+		mmc_host_clk_release(host);
+	}
 
 	err = mmc_send_app_op_cond(host, 0, &ocr);
 	if (err)
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index 68f81b9ee0fb..f573e7f9f740 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -146,15 +146,21 @@ static int sdio_irq_thread(void *_host)
 		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (host->caps & MMC_CAP_SDIO_IRQ)
+		if (host->caps & MMC_CAP_SDIO_IRQ) {
+			mmc_host_clk_hold(host);
 			host->ops->enable_sdio_irq(host, 1);
+			mmc_host_clk_release(host);
+		}
 		if (!kthread_should_stop())
 			schedule_timeout(period);
 		set_current_state(TASK_RUNNING);
 	} while (!kthread_should_stop());
 
-	if (host->caps & MMC_CAP_SDIO_IRQ)
+	if (host->caps & MMC_CAP_SDIO_IRQ) {
+		mmc_host_clk_hold(host);
 		host->ops->enable_sdio_irq(host, 0);
+		mmc_host_clk_release(host);
+	}
 
 	pr_debug("%s: IRQ thread exiting with code %d\n",
 		 mmc_hostname(host), ret);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0beba1e5e1ed..73e5ee8e9ca2 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -444,4 +444,23 @@ static inline int mmc_boot_partition_access(struct mmc_host *host)
 	return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC);
 }
 
+#ifdef CONFIG_MMC_CLKGATE
+void mmc_host_clk_hold(struct mmc_host *host);
+void mmc_host_clk_release(struct mmc_host *host);
+unsigned int mmc_host_clk_rate(struct mmc_host *host);
+
+#else
+static inline void mmc_host_clk_hold(struct mmc_host *host)
+{
+}
+
+static inline void mmc_host_clk_release(struct mmc_host *host)
+{
+}
+
+static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
+{
+	return host->ios.clock;
+}
+#endif
 #endif /* LINUX_MMC_HOST_H */
-- 
cgit v1.2.3


From 6e8201f57c9359c9c5dc8f9805c15a4392492a10 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Mon, 16 Jan 2012 17:49:01 +0900
Subject: mmc: core: add the capability for broken voltage

There is an understood mismatch between the voltage the host controller is
set to and the voltage supplied to the card by a fixed voltage regulator.
Teaching the driver to accept the mismatch is overly complicated.  Instead
just accept the regulator's voltage.

This patch adds MMC_CAP2_BROKEN_VOLTAGE.

If the voltage didn't satisfy between min_uV and max_uV, try to change
the voltage in core.c.  When changing the voltage, maybe use
regulator_set_voltage().

In regulator_set_voltage(), check the below condition.

	/* sanity check */
	if (!rdev->desc->ops->set_voltage &&
	    !rdev->desc->ops->set_voltage_sel) {
		ret = -EINVAL;
		goto out;
	}

If some board should use the fixed-regulator, always return -EINVAL.
Then, eMMC didn't initialize always.

So if use a fixed-regulator, we need to add the MMC_CAP2_BROKEN_VOLTAGE.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 4 ++++
 include/linux/mmc/host.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index b3063b741df3..18661554e79c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1131,6 +1131,10 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc,
 		 * might not allow this operation
 		 */
 		voltage = regulator_get_voltage(supply);
+
+		if (mmc->caps2 & MMC_CAP2_BROKEN_VOLTAGE)
+			min_uV = max_uV = voltage;
+
 		if (voltage < 0)
 			result = voltage;
 		else if (voltage < min_uV || voltage > max_uV)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 73e5ee8e9ca2..ee2b0363c040 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -257,6 +257,7 @@ struct mmc_host {
 #define MMC_CAP2_HS200_1_2V_SDR	(1 << 6)        /* can support */
 #define MMC_CAP2_HS200		(MMC_CAP2_HS200_1_8V_SDR | \
 				 MMC_CAP2_HS200_1_2V_SDR)
+#define MMC_CAP2_BROKEN_VOLTAGE	(1 << 7)	/* Use the broken voltage */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 	unsigned int        power_notify_type;
-- 
cgit v1.2.3


From 3e73c36b4dc224529d0b0c0d5d69c0dacd793c42 Mon Sep 17 00:00:00 2001
From: Girish K S <girish.shivananjappa@linaro.org>
Date: Tue, 31 Jan 2012 15:44:03 +0530
Subject: mmc: core: Fix PowerOff Notify suspend/resume

Modified the mmc_poweroff to resume before sending the poweroff
notification command. In sleep mode only AWAKE and RESET commands are
allowed, so before sending the poweroff notification command resume from
sleep mode and then send the notification command.

PowerOff Notify is tested on a Synopsis Designware Host Controller
(eMMC 4.5). The suspend to RAM and resume works fine.

Signed-off-by: Girish K S <girish.shivananjappa@linaro.org>
Tested-by: Girish K S <girish.shivananjappa@linaro.org>
Reviewed-by: Saugata Das <saugata.das@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 26 +++++++++++++++++++-------
 drivers/mmc/core/mmc.c   | 17 ++++++++++++-----
 include/linux/mmc/card.h |  4 ++++
 3 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 18661554e79c..690255c7d4dc 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1256,6 +1256,7 @@ static void mmc_poweroff_notify(struct mmc_host *host)
 	int err = 0;
 
 	card = host->card;
+	mmc_claim_host(host);
 
 	/*
 	 * Send power notify command only if card
@@ -1286,6 +1287,7 @@ static void mmc_poweroff_notify(struct mmc_host *host)
 		/* Set the card state to no notification after the poweroff */
 		card->poweroff_notify_state = MMC_NO_POWER_NOTIFICATION;
 	}
+	mmc_release_host(host);
 }
 
 /*
@@ -1344,12 +1346,28 @@ static void mmc_power_up(struct mmc_host *host)
 
 void mmc_power_off(struct mmc_host *host)
 {
+	int err = 0;
 	mmc_host_clk_hold(host);
 
 	host->ios.clock = 0;
 	host->ios.vdd = 0;
 
-	mmc_poweroff_notify(host);
+	/*
+	 * For eMMC 4.5 device send AWAKE command before
+	 * POWER_OFF_NOTIFY command, because in sleep state
+	 * eMMC 4.5 devices respond to only RESET and AWAKE cmd
+	 */
+	if (host->card && mmc_card_is_sleep(host->card) &&
+	    host->bus_ops->resume) {
+		err = host->bus_ops->resume(host);
+
+		if (!err)
+			mmc_poweroff_notify(host);
+		else
+			pr_warning("%s: error %d during resume "
+				   "(continue with poweroff sequence)\n",
+				   mmc_hostname(host), err);
+	}
 
 	/*
 	 * Reset ocr mask to be the highest possible voltage supported for
@@ -2403,12 +2421,6 @@ int mmc_suspend_host(struct mmc_host *host)
 		 */
 		if (mmc_try_claim_host(host)) {
 			if (host->bus_ops->suspend) {
-				/*
-				 * For eMMC 4.5 device send notify command
-				 * before sleep, because in sleep state eMMC 4.5
-				 * devices respond to only RESET and AWAKE cmd
-				 */
-				mmc_poweroff_notify(host);
 				err = host->bus_ops->suspend(host);
 			}
 			mmc_do_release_host(host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index dd3fcac684a3..9be031934e33 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1316,11 +1316,13 @@ static int mmc_suspend(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
-	if (mmc_card_can_sleep(host))
+	if (mmc_card_can_sleep(host)) {
 		err = mmc_card_sleep(host);
-	else if (!mmc_host_is_spi(host))
+		if (!err)
+			mmc_card_set_sleep(host->card);
+	} else if (!mmc_host_is_spi(host))
 		mmc_deselect_cards(host);
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 	mmc_release_host(host);
 
 	return err;
@@ -1340,7 +1342,11 @@ static int mmc_resume(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
-	err = mmc_init_card(host, host->ocr, host->card);
+	if (mmc_card_is_sleep(host->card)) {
+		err = mmc_card_awake(host);
+		mmc_card_clr_sleep(host->card);
+	} else
+		err = mmc_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
 
 	return err;
@@ -1350,7 +1356,8 @@ static int mmc_power_restore(struct mmc_host *host)
 {
 	int ret;
 
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
+	mmc_card_clr_sleep(host->card);
 	mmc_claim_host(host);
 	ret = mmc_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 9f22ba572de0..19a41d1737af 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -217,6 +217,7 @@ struct mmc_card {
 #define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
 #define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
 #define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
+#define MMC_STATE_SLEEP		(1<<9)		/* card is in sleep state */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -382,6 +383,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_sd_card_uhs(c)	((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 #define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
+#define mmc_card_is_sleep(c)	((c)->state & MMC_STATE_SLEEP)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
@@ -393,7 +395,9 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
+#define mmc_card_set_sleep(c)	((c)->state |= MMC_STATE_SLEEP)
 
+#define mmc_card_clr_sleep(c)	((c)->state &= ~MMC_STATE_SLEEP)
 /*
  * Quirk add/remove for MMC products.
  */
-- 
cgit v1.2.3


From f9c2a0dc42a6938ff2a80e55ca2bbd1d5581c72e Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Thu, 9 Feb 2012 14:32:43 +0900
Subject: mmc: dw_mmc: Fix PIO mode with support of highmem

Current PIO mode makes a kernel crash with CONFIG_HIGHMEM.
Highmem pages have a NULL from sg_virt(sg).
This patch fixes the following problem.

Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: Oops: 817 [#1] PREEMPT SMP
Modules linked in:
CPU: 0    Not tainted  (3.0.15-01423-gdbf465f #589)
PC is at dw_mci_pull_data32+0x4c/0x9c
LR is at dw_mci_read_data_pio+0x54/0x1f0
pc : [<c0358824>]    lr : [<c035988c>]    psr: 20000193
sp : c0619d48  ip : c0619d70  fp : c0619d6c
r10: 00000000  r9 : 00000002  r8 : 00001000
r7 : 00000200  r6 : 00000000  r5 : e1dd3100  r4 : 00000000
r3 : 65622023  r2 : 0000007f  r1 : eeb96000  r0 : e1dd3100
Flags: nzCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment
xkernel
Control: 10c5387d  Table: 61e2004a  DAC: 00000015
Process swapper (pid: 0, stack limit = 0xc06182f0)
Stack: (0xc0619d48 to 0xc061a000)
9d40:                   e1dd3100 e1a4f000 00000000 e1dd3100 e1a4f000 00000200
9d60: c0619da4 c0619d70 c035988c c03587e4 c0619d9c e18158f4 e1dd3100 e1dd3100
9d80: 00000020 00000000 00000000 00000020 c06e8a84 00000000 c0619e04 c0619da8
9da0: c0359b24 c0359844 e18158f4 e1dd3164 e1dd3168 e1dd3150 3d02fc79 e1dd3154
9dc0: e1dd3178 00000000 00000020 00000000 e1dd3150 00000000 c10dd7e8 e1a84900
9de0: c061e7cc 00000000 00000000 0000008d c06e8a84 c061e780 c0619e4c c0619e08
9e00: c00c4738 c0359a34 3d02fc79 00000000 c0619e4c c05a1698 c05a1670 c05a165c
9e20: c04de8b0 c061e780 c061e7cc e1a84900 ffffed68 0000008d c0618000 00000000
9e40: c0619e6c c0619e50 c00c48b4 c00c46c8 c061e780 c00423ac c061e7cc ffffed68
9e60: c0619e8c c0619e70 c00c7358 c00c487c 0000008d ffffee38 c0618000 ffffed68
9e80: c0619ea4 c0619e90 c00c4258 c00c72b0 c00423ac ffffee38 c0619ecc c0619ea8
9ea0: c004241c c00c4234 ffffffff f8810000 0000006d 00000002 00000001 7fffffff
9ec0: c0619f44 c0619ed0 c0048bc0 c00423c4 220ae7a9 00000000 386f0d30 0005d3a4
9ee0: c00423ac c10dd0b8 c06f2cd8 c0618000 c0594778 c003a674 7fffffff c0619f44
9f00: 386f0d30 c0619f18 c00a6f94 c005be3c 80000013 ffffffff 386f0d30 0005d3a4
9f20: 386f0d30 0005d2d1 c10dd0a8 c10dd0b8 c06f2cd8 c0618000 c0619f74 c0619f48
9f40: c0345858 c005be00 c00a2440 c0618000 c0618000 c00410d8 c06c1944 c00410fc
9f60: c0594778 c003a674 c0619f9c c0619f78 c004a7e8 c03457b4 c0618000 c06c18f8
9f80: 00000000 c0039c70 c06c18d4 c003a674 c0619fb4 c0619fa0 c04ceafc c004a714
9fa0: c06287b4 c06c18f8 c0619ff4 c0619fb8 c0008b68 c04cea68 c0008578 00000000
9fc0: 00000000 c003a674 00000000 10c5387d c0628658 c003aa78 c062f1c4 4000406a
9fe0: 413fc090 00000000 00000000 c0619ff8 40008044 c0008858 00000000 00000000
Backtrace:
[<c03587d8>] (dw_mci_pull_data32+0x0/0x9c) from [<c035988c>] (dw_mci_read_data_pio+0x54/0x1f0)
 r6:00000200 r5:e1a4f000 r4:e1dd3100
 [<c0359838>] (dw_mci_read_data_pio+0x0/0x1f0) from [<c0359b24>] (dw_mci_interrupt+0xfc/0x4a4)
[<c0359a28>] (dw_mci_interrupt+0x0/0x4a4) from [<c00c4738>] (handle_irq_event_percpu+0x7c/0x1b4)
[<c00c46bc>] (handle_irq_event_percpu+0x0/0x1b4) from [<c00c48b4>] (handle_irq_event+0x44/0x64)
[<c00c4870>] (handle_irq_event+0x0/0x64) from [<c00c7358>] (handle_fasteoi_irq+0xb4/0x124)
 r7:ffffed68 r6:c061e7cc r5:c00423ac r4:c061e780
 [<c00c72a4>] (handle_fasteoi_irq+0x0/0x124) from [<c00c4258>] (generic_handle_irq+0x30/0x38)
 r7:ffffed68 r6:c0618000 r5:ffffee38 r4:0000008d
 [<c00c4228>] (generic_handle_irq+0x0/0x38) from [<c004241c>] (asm_do_IRQ+0x64/0xe0)
 r5:ffffee38 r4:c00423ac
 [<c00423b8>] (asm_do_IRQ+0x0/0xe0) from [<c0048bc0>] (__irq_svc+0x80/0x14c)
Exception stack(0xc0619ed0 to 0xc0619f18)

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Acked-by: Will Newton <will.newton@imgtec.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 144 +++++++++++++++++++++++----------------------
 include/linux/mmc/dw_mmc.h |   6 +-
 2 files changed, 79 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 0e342793ff14..8bec1c36b159 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -22,7 +22,6 @@
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/scatterlist.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
@@ -502,8 +501,14 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
 		host->dir_status = DW_MCI_SEND_STATUS;
 
 	if (dw_mci_submit_data_dma(host, data)) {
+		int flags = SG_MITER_ATOMIC;
+		if (host->data->flags & MMC_DATA_READ)
+			flags |= SG_MITER_TO_SG;
+		else
+			flags |= SG_MITER_FROM_SG;
+
+		sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
 		host->sg = data->sg;
-		host->pio_offset = 0;
 		host->part_buf_start = 0;
 		host->part_buf_count = 0;
 
@@ -972,6 +977,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
 				 * generates a block interrupt, hence setting
 				 * the scatter-gather pointer to NULL.
 				 */
+				sg_miter_stop(&host->sg_miter);
 				host->sg = NULL;
 				ctrl = mci_readl(host, CTRL);
 				ctrl |= SDMMC_CTRL_FIFO_RESET;
@@ -1311,54 +1317,44 @@ static void dw_mci_pull_data(struct dw_mci *host, void *buf, int cnt)
 
 static void dw_mci_read_data_pio(struct dw_mci *host)
 {
-	struct scatterlist *sg = host->sg;
-	void *buf = sg_virt(sg);
-	unsigned int offset = host->pio_offset;
+	struct sg_mapping_iter *sg_miter = &host->sg_miter;
+	void *buf;
+	unsigned int offset;
 	struct mmc_data	*data = host->data;
 	int shift = host->data_shift;
 	u32 status;
 	unsigned int nbytes = 0, len;
+	unsigned int remain, fcnt;
 
 	do {
-		len = host->part_buf_count +
-			(SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
-		if (offset + len <= sg->length) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+
+		host->sg = sg_miter->__sg;
+		buf = sg_miter->addr;
+		remain = sg_miter->length;
+		offset = 0;
+
+		do {
+			fcnt = (SDMMC_GET_FCNT(mci_readl(host, STATUS))
+					<< shift) + host->part_buf_count;
+			len = min(remain, fcnt);
+			if (!len)
+				break;
 			dw_mci_pull_data(host, (void *)(buf + offset), len);
-
 			offset += len;
 			nbytes += len;
-
-			if (offset == sg->length) {
-				flush_dcache_page(sg_page(sg));
-				host->sg = sg = sg_next(sg);
-				if (!sg)
-					goto done;
-
-				offset = 0;
-				buf = sg_virt(sg);
-			}
-		} else {
-			unsigned int remaining = sg->length - offset;
-			dw_mci_pull_data(host, (void *)(buf + offset),
-					 remaining);
-			nbytes += remaining;
-
-			flush_dcache_page(sg_page(sg));
-			host->sg = sg = sg_next(sg);
-			if (!sg)
-				goto done;
-
-			offset = len - remaining;
-			buf = sg_virt(sg);
-			dw_mci_pull_data(host, buf, offset);
-			nbytes += offset;
-		}
+			remain -= len;
+		} while (remain);
+		sg_miter->consumed = offset;
 
 		status = mci_readl(host, MINTSTS);
 		mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
 		if (status & DW_MCI_DATA_ERROR_FLAGS) {
 			host->data_status = status;
 			data->bytes_xfered += nbytes;
+			sg_miter_stop(sg_miter);
+			host->sg = NULL;
 			smp_wmb();
 
 			set_bit(EVENT_DATA_ERROR, &host->pending_events);
@@ -1367,65 +1363,66 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 			return;
 		}
 	} while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/
-	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
+
+	if (!remain) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+		sg_miter->consumed = 0;
+	}
+	sg_miter_stop(sg_miter);
 	return;
 
 done:
 	data->bytes_xfered += nbytes;
+	sg_miter_stop(sg_miter);
+	host->sg = NULL;
 	smp_wmb();
 	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
 }
 
 static void dw_mci_write_data_pio(struct dw_mci *host)
 {
-	struct scatterlist *sg = host->sg;
-	void *buf = sg_virt(sg);
-	unsigned int offset = host->pio_offset;
+	struct sg_mapping_iter *sg_miter = &host->sg_miter;
+	void *buf;
+	unsigned int offset;
 	struct mmc_data	*data = host->data;
 	int shift = host->data_shift;
 	u32 status;
 	unsigned int nbytes = 0, len;
+	unsigned int fifo_depth = host->fifo_depth;
+	unsigned int remain, fcnt;
 
 	do {
-		len = ((host->fifo_depth -
-			SDMMC_GET_FCNT(mci_readl(host, STATUS))) << shift)
-			- host->part_buf_count;
-		if (offset + len <= sg->length) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+
+		host->sg = sg_miter->__sg;
+		buf = sg_miter->addr;
+		remain = sg_miter->length;
+		offset = 0;
+
+		do {
+			fcnt = ((fifo_depth -
+				 SDMMC_GET_FCNT(mci_readl(host, STATUS)))
+					<< shift) - host->part_buf_count;
+			len = min(remain, fcnt);
+			if (!len)
+				break;
 			host->push_data(host, (void *)(buf + offset), len);
-
 			offset += len;
 			nbytes += len;
-			if (offset == sg->length) {
-				host->sg = sg = sg_next(sg);
-				if (!sg)
-					goto done;
-
-				offset = 0;
-				buf = sg_virt(sg);
-			}
-		} else {
-			unsigned int remaining = sg->length - offset;
-
-			host->push_data(host, (void *)(buf + offset),
-					remaining);
-			nbytes += remaining;
-
-			host->sg = sg = sg_next(sg);
-			if (!sg)
-				goto done;
-
-			offset = len - remaining;
-			buf = sg_virt(sg);
-			host->push_data(host, (void *)buf, offset);
-			nbytes += offset;
-		}
+			remain -= len;
+		} while (remain);
+		sg_miter->consumed = offset;
 
 		status = mci_readl(host, MINTSTS);
 		mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
 		if (status & DW_MCI_DATA_ERROR_FLAGS) {
 			host->data_status = status;
 			data->bytes_xfered += nbytes;
+			sg_miter_stop(sg_miter);
+			host->sg = NULL;
 
 			smp_wmb();
 
@@ -1435,12 +1432,20 @@ static void dw_mci_write_data_pio(struct dw_mci *host)
 			return;
 		}
 	} while (status & SDMMC_INT_TXDR); /* if TXDR write again */
-	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
+
+	if (!remain) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+		sg_miter->consumed = 0;
+	}
+	sg_miter_stop(sg_miter);
 	return;
 
 done:
 	data->bytes_xfered += nbytes;
+	sg_miter_stop(sg_miter);
+	host->sg = NULL;
 	smp_wmb();
 	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
 }
@@ -1643,6 +1648,7 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 				 * block interrupt, hence setting the
 				 * scatter-gather pointer to NULL.
 				 */
+				sg_miter_stop(&host->sg_miter);
 				host->sg = NULL;
 
 				ctrl = mci_readl(host, CTRL);
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index e8779c6d1759..aae5d1f1bb39 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -14,6 +14,8 @@
 #ifndef LINUX_MMC_DW_MMC_H
 #define LINUX_MMC_DW_MMC_H
 
+#include <linux/scatterlist.h>
+
 #define MAX_MCI_SLOTS	2
 
 enum dw_mci_state {
@@ -40,7 +42,7 @@ struct mmc_data;
  * @lock: Spinlock protecting the queue and associated data.
  * @regs: Pointer to MMIO registers.
  * @sg: Scatterlist entry currently being processed by PIO code, if any.
- * @pio_offset: Offset into the current scatterlist entry.
+ * @sg_miter: PIO mapping scatterlist iterator.
  * @cur_slot: The slot which is currently using the controller.
  * @mrq: The request currently being processed on @cur_slot,
  *	or NULL if the controller is idle.
@@ -115,7 +117,7 @@ struct dw_mci {
 	void __iomem		*regs;
 
 	struct scatterlist	*sg;
-	unsigned int		pio_offset;
+	struct sg_mapping_iter	sg_miter;
 
 	struct dw_mci_slot	*cur_slot;
 	struct mmc_request	*mrq;
-- 
cgit v1.2.3


From 6b6dc836a195e077e76977b6c020a73de411b46d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 10 Feb 2012 11:03:00 +0100
Subject: vfs: Provide function to get superblock and wait for it to thaw

In quota code we need to find a superblock corresponding to a device and wait
for superblock to be unfrozen. However this waiting has to happen without
s_umount semaphore because that is required for superblock to thaw. So provide
a function in VFS for this to keep dances with s_umount where they belong.

[AV: implementation switched to saner variant]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 22 ++++++++++++++++++++++
 include/linux/fs.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/fs/super.c b/fs/super.c
index 6015c02296b7..6277ec6cb60a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -633,6 +633,28 @@ rescan:
 
 EXPORT_SYMBOL(get_super);
 
+/**
+ *	get_super_thawed - get thawed superblock of a device
+ *	@bdev: device to get the superblock for
+ *
+ *	Scans the superblock list and finds the superblock of the file system
+ *	mounted on the device. The superblock is returned once it is thawed
+ *	(or immediately if it was not frozen). %NULL is returned if no match
+ *	is found.
+ */
+struct super_block *get_super_thawed(struct block_device *bdev)
+{
+	while (1) {
+		struct super_block *s = get_super(bdev);
+		if (!s || s->s_frozen == SB_UNFROZEN)
+			return s;
+		up_read(&s->s_umount);
+		vfs_check_frozen(s, SB_FREEZE_WRITE);
+		put_super(s);
+	}
+}
+EXPORT_SYMBOL(get_super_thawed);
+
 /**
  * get_active_super - get an active reference to the superblock of a device
  * @bdev: device to get the superblock for
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 386da09f229d..69cd5bb640f5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2496,6 +2496,7 @@ extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_super_thawed(struct block_device *);
 extern struct super_block *get_active_super(struct block_device *bdev);
 extern void drop_super(struct super_block *sb);
 extern void iterate_supers(void (*)(struct super_block *, void *), void *);
-- 
cgit v1.2.3


From f2ea0f5f04c97b48c88edccba52b0682fbe45087 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 14 Jan 2012 21:44:49 +0300
Subject: crypto: sha512 - use standard ror64()

Use standard ror64() instead of hand-written.
There is no standard ror64, so create it.

The difference is shift value being "unsigned int" instead of uint64_t
(for which there is no reason). gcc starts to emit native ROR instructions
which it doesn't do for some reason currently. This should make the code
faster.

Patch survives in-tree crypto test and ping flood with hmac(sha512) on.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha512_generic.c | 13 ++++---------
 include/linux/bitops.h  | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index f04af931a682..107f6f7be5e1 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -31,11 +31,6 @@ static inline u64 Maj(u64 x, u64 y, u64 z)
         return (x & y) | (z & (x | y));
 }
 
-static inline u64 RORu64(u64 x, u64 y)
-{
-        return (x >> y) | (x << (64 - y));
-}
-
 static const u64 sha512_K[80] = {
         0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL,
         0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
@@ -66,10 +61,10 @@ static const u64 sha512_K[80] = {
         0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
 };
 
-#define e0(x)       (RORu64(x,28) ^ RORu64(x,34) ^ RORu64(x,39))
-#define e1(x)       (RORu64(x,14) ^ RORu64(x,18) ^ RORu64(x,41))
-#define s0(x)       (RORu64(x, 1) ^ RORu64(x, 8) ^ (x >> 7))
-#define s1(x)       (RORu64(x,19) ^ RORu64(x,61) ^ (x >> 6))
+#define e0(x)       (ror64(x,28) ^ ror64(x,34) ^ ror64(x,39))
+#define e1(x)       (ror64(x,14) ^ ror64(x,18) ^ ror64(x,41))
+#define s0(x)       (ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7))
+#define s1(x)       (ror64(x,19) ^ ror64(x,61) ^ (x >> 6))
 
 static inline void LOAD_OP(int I, u64 *W, const u8 *input)
 {
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3ef66a2a083..fc8a3ffce320 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -49,6 +49,26 @@ static inline unsigned long hweight_long(unsigned long w)
 	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
 }
 
+/**
+ * rol64 - rotate a 64-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 rol64(__u64 word, unsigned int shift)
+{
+	return (word << shift) | (word >> (64 - shift));
+}
+
+/**
+ * ror64 - rotate a 64-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 ror64(__u64 word, unsigned int shift)
+{
+	return (word >> shift) | (word << (64 - shift));
+}
+
 /**
  * rol32 - rotate a 32-bit value left
  * @word: value to rotate
-- 
cgit v1.2.3


From 59cca653a601372e9b4a430d867377a3e4a36d76 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Thu, 2 Feb 2012 10:46:49 +0200
Subject: digsig: changed type of the timestamp

time_t was used in the signature and key packet headers,
which is typedef of long and is different on 32 and 64 bit architectures.
Signature and key format should be independent of architecture.
Similar to GPG, I have changed the type to uint32_t.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/digsig.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/digsig.h b/include/linux/digsig.h
index b01558b15814..6f85a070bb45 100644
--- a/include/linux/digsig.h
+++ b/include/linux/digsig.h
@@ -30,7 +30,7 @@ enum digest_algo {
 
 struct pubkey_hdr {
 	uint8_t		version;	/* key format version */
-	time_t		timestamp;	/* key made, always 0 for now */
+	uint32_t	timestamp;	/* key made, always 0 for now */
 	uint8_t		algo;
 	uint8_t		nmpi;
 	char		mpi[0];
@@ -38,7 +38,7 @@ struct pubkey_hdr {
 
 struct signature_hdr {
 	uint8_t		version;	/* signature format version */
-	time_t		timestamp;	/* signature made */
+	uint32_t	timestamp;	/* signature made */
 	uint8_t		algo;
 	uint8_t		hash;
 	uint8_t		keyid[8];
-- 
cgit v1.2.3


From 88ba136d6635b262f77cc418d536115fb8e4d4ab Mon Sep 17 00:00:00 2001
From: Joerg Willmann <joe@clnt.de>
Date: Tue, 21 Feb 2012 13:26:14 +0100
Subject: netfilter: ebtables: fix alignment problem in ppc

ebt_among extension of ebtables uses __alignof__(_xt_align) while the
corresponding kernel module uses __alignof__(ebt_replace) to determine
the alignment in EBT_ALIGN().

These are the results of these values on different platforms:

x86 x86_64 ppc
__alignof__(_xt_align) 4 8 8
__alignof__(ebt_replace) 4 8 4

ebtables fails to add rules which use the among extension.

I'm using kernel 2.6.33 and ebtables 2.0.10-4

According to Bart De Schuymer, userspace alignment was changed to
_xt_align to fix an alignment issue on a userspace32-kernel64 system
(he thinks it was for an ARM device). So userspace must be right.
The kernel alignment macro needs to change so it also uses _xt_align
instead of ebt_replace. The userspace changes date back from
June 29, 2009.

Signed-off-by: Joerg Willmann <joe@clnt.de>
Signed-off by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 8797ed16feb2..4dd5bd6994a8 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -285,8 +285,8 @@ struct ebt_table {
 	struct module *me;
 };
 
-#define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \
-		     ~(__alignof__(struct ebt_replace)-1))
+#define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \
+		     ~(__alignof__(struct _xt_align)-1))
 extern struct ebt_table *ebt_register_table(struct net *net,
 					    const struct ebt_table *table);
 extern void ebt_unregister_table(struct net *net, struct ebt_table *table);
-- 
cgit v1.2.3


From 115c9b81928360d769a76c632bae62d15206a94a Mon Sep 17 00:00:00 2001
From: Greg Rose <gregory.v.rose@intel.com>
Date: Tue, 21 Feb 2012 16:54:48 -0500
Subject: rtnetlink: Fix problem with buffer allocation

Implement a new netlink attribute type IFLA_EXT_MASK.  The mask
is a 32 bit value that can be used to indicate to the kernel that
certain extended ifinfo values are requested by the user application.
At this time the only mask value defined is RTEXT_FILTER_VF to
indicate that the user wants the ifinfo dump to send information
about the VFs belonging to the interface.

This patch fixes a bug in which certain applications do not have
large enough buffers to accommodate the extra information returned
by the kernel with large numbers of SR-IOV virtual functions.
Those applications will not send the new netlink attribute with
the interface info dump request netlink messages so they will
not get unexpectedly large request buffers returned by the kernel.

Modifies the rtnl_calcit function to traverse the list of net
devices and compute the minimum buffer size that can hold the
info dumps of all matching devices based upon the filter passed
in via the new netlink attribute filter mask.  If no filter
mask is sent then the buffer allocation defaults to NLMSG_GOODSIZE.

With this change it is possible to add yet to be defined netlink
attributes to the dump request which should make it fairly extensible
in the future.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h   |  1 +
 include/linux/rtnetlink.h |  3 ++
 include/net/rtnetlink.h   |  2 +-
 net/core/rtnetlink.c      | 78 ++++++++++++++++++++++++++++++++++-------------
 4 files changed, 62 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c52d4b5f872a..4b24ff453aee 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -137,6 +137,7 @@ enum {
 	IFLA_AF_SPEC,
 	IFLA_GROUP,		/* Group the device belongs to */
 	IFLA_NET_NS_FD,
+	IFLA_EXT_MASK,		/* Extended info mask, VFs, etc */
 	__IFLA_MAX
 };
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 8e872ead88b5..577592ea0ea0 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -602,6 +602,9 @@ struct tcamsg {
 #define TCA_ACT_TAB 1 /* attr type must be >=1 */	
 #define TCAA_MAX 1
 
+/* New extended info filters for IFLA_EXT_MASK */
+#define RTEXT_FILTER_VF		(1 << 0)
+
 /* End of information exported to user level */
 
 #ifdef __KERNEL__
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 678f1ffaf843..370293901971 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -6,7 +6,7 @@
 
 typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *);
 typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
-typedef u16 (*rtnl_calcit_func)(struct sk_buff *);
+typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *);
 
 extern int	__rtnl_register(int protocol, int msgtype,
 				rtnl_doit_func, rtnl_dumpit_func,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 65aebd450027..606a6e8f3671 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -60,7 +60,6 @@ struct rtnl_link {
 };
 
 static DEFINE_MUTEX(rtnl_mutex);
-static u16 min_ifinfo_dump_size;
 
 void rtnl_lock(void)
 {
@@ -724,10 +723,11 @@ static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
 }
 
 /* All VF info */
-static inline int rtnl_vfinfo_size(const struct net_device *dev)
+static inline int rtnl_vfinfo_size(const struct net_device *dev,
+				   u32 ext_filter_mask)
 {
-	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
-
+	if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
+	    (ext_filter_mask & RTEXT_FILTER_VF)) {
 		int num_vfs = dev_num_vf(dev->dev.parent);
 		size_t size = nla_total_size(sizeof(struct nlattr));
 		size += nla_total_size(num_vfs * sizeof(struct nlattr));
@@ -766,7 +766,8 @@ static size_t rtnl_port_size(const struct net_device *dev)
 		return port_self_size;
 }
 
-static noinline size_t if_nlmsg_size(const struct net_device *dev)
+static noinline size_t if_nlmsg_size(const struct net_device *dev,
+				     u32 ext_filter_mask)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
 	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -784,8 +785,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(4) /* IFLA_MASTER */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
-	       + nla_total_size(4) /* IFLA_NUM_VF */
-	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+	       + nla_total_size(ext_filter_mask
+			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
 	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
 	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
@@ -868,7 +870,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
 
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
-			    unsigned int flags)
+			    unsigned int flags, u32 ext_filter_mask)
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
@@ -941,10 +943,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		goto nla_put_failure;
 	copy_rtnl_link_stats64(nla_data(attr), stats);
 
-	if (dev->dev.parent)
+	if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF))
 		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
 
-	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
+	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
+	    && (ext_filter_mask & RTEXT_FILTER_VF)) {
 		int i;
 
 		struct nlattr *vfinfo, *vf;
@@ -1048,6 +1051,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	struct hlist_head *head;
 	struct hlist_node *node;
+	struct nlattr *tb[IFLA_MAX+1];
+	u32 ext_filter_mask = 0;
 
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
@@ -1055,6 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	cb->seq = net->dev_base_seq;
 
+	nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+		    ifla_policy);
+
+	if (tb[IFLA_EXT_MASK])
+		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
@@ -1064,7 +1075,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
 					     NETLINK_CB(cb->skb).pid,
 					     cb->nlh->nlmsg_seq, 0,
-					     NLM_F_MULTI) <= 0)
+					     NLM_F_MULTI,
+					     ext_filter_mask) <= 0)
 				goto out;
 
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -1100,6 +1112,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
 	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
 	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
+	[IFLA_EXT_MASK]		= { .type = NLA_U32 },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1509,8 +1522,6 @@ errout:
 
 	if (send_addr_notify)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	min_ifinfo_dump_size = max_t(u16, if_nlmsg_size(dev),
-				     min_ifinfo_dump_size);
 
 	return err;
 }
@@ -1842,6 +1853,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	struct net_device *dev = NULL;
 	struct sk_buff *nskb;
 	int err;
+	u32 ext_filter_mask = 0;
 
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
 	if (err < 0)
@@ -1850,6 +1862,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (tb[IFLA_IFNAME])
 		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
 
+	if (tb[IFLA_EXT_MASK])
+		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index > 0)
 		dev = __dev_get_by_index(net, ifm->ifi_index);
@@ -1861,12 +1876,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (dev == NULL)
 		return -ENODEV;
 
-	nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+	nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
 	if (nskb == NULL)
 		return -ENOBUFS;
 
 	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
-			       nlh->nlmsg_seq, 0, 0);
+			       nlh->nlmsg_seq, 0, 0, ext_filter_mask);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
@@ -1877,8 +1892,31 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	return err;
 }
 
-static u16 rtnl_calcit(struct sk_buff *skb)
+static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	struct nlattr *tb[IFLA_MAX+1];
+	u32 ext_filter_mask = 0;
+	u16 min_ifinfo_dump_size = 0;
+
+	nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy);
+
+	if (tb[IFLA_EXT_MASK])
+		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+
+	if (!ext_filter_mask)
+		return NLMSG_GOODSIZE;
+	/*
+	 * traverse the list of net devices and compute the minimum
+	 * buffer size based upon the filter mask.
+	 */
+	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
+		min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
+					     if_nlmsg_size(dev,
+						           ext_filter_mask));
+	}
+
 	return min_ifinfo_dump_size;
 }
 
@@ -1913,13 +1951,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 	int err = -ENOBUFS;
 	size_t if_info_size;
 
-	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL);
+	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
-	min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size);
-
-	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
+	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -1977,7 +2013,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			return -EOPNOTSUPP;
 		calcit = rtnl_get_calcit(family, type);
 		if (calcit)
-			min_dump_alloc = calcit(skb);
+			min_dump_alloc = calcit(skb, nlh);
 
 		__rtnl_unlock();
 		rtnl = net->rtnl;
-- 
cgit v1.2.3


From faf309009e2e18d30c032b7d9479f29b91677c37 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 21 Feb 2012 17:24:20 -0800
Subject: sys_poll: fix incorrect type for 'timeout' parameter

The 'poll()' system call timeout parameter is supposed to be 'int', not
'long'.

Now, the reason this matters is that right now 32-bit compat mode is
broken on at least x86-64, because the 32-bit code just calls
'sys_poll()' directly on x86-64, and the 32-bit argument will have been
zero-extended, turning a signed 'int' into a large unsigned 'long'
value.

We could just introduce a 'compat_sys_poll()' function for this, and
that may eventually be what we have to do, but since the actual standard
poll() semantics is *supposed* to be 'int', and since at least on x86-64
glibc sign-extends the argument before invocing the system call (so
nobody can actually use a 64-bit timeout value in user space _anyway_,
even in 64-bit binaries), the simpler solution would seem to be to just
fix the definition of the system call to match what it should have been
from the very start.

If it turns out that somebody somehow circumvents the user-level libc
64-bit sign extension and actually uses a large unsigned 64-bit timeout
despite that not being how poll() is supposed to work, we will need to
do the compat_sys_poll() approach.

Reported-by: Thomas Meyer <thomas@m3y3r.de>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/kernel/compat_wrapper.S | 2 +-
 fs/select.c                       | 2 +-
 include/linux/syscalls.h          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 18c51df9fe06..ff605a39cf43 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -662,7 +662,7 @@ ENTRY(sys32_getresuid16_wrapper)
 ENTRY(sys32_poll_wrapper)
 	llgtr	%r2,%r2			# struct pollfd *
 	llgfr	%r3,%r3			# unsigned int
-	lgfr	%r4,%r4			# long
+	lgfr	%r4,%r4			# int
 	jg	sys_poll		# branch to system call
 
 ENTRY(sys32_setresgid16_wrapper)
diff --git a/fs/select.c b/fs/select.c
index d33418fdc858..e782258d0de3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block)
 }
 
 SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
-		long, timeout_msecs)
+		int, timeout_msecs)
 {
 	struct timespec end_time, *to = NULL;
 	int ret;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 515669fa3c1d..8ec1153ff57b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -624,7 +624,7 @@ asmlinkage long sys_socketpair(int, int, int, int __user *);
 asmlinkage long sys_socketcall(int call, unsigned long __user *args);
 asmlinkage long sys_listen(int, int);
 asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
-				long timeout);
+				int timeout);
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
-- 
cgit v1.2.3


From 03606895cd98c0a628b17324fd7b5ff15db7e3cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 23 Feb 2012 10:55:02 +0000
Subject: ipsec: be careful of non existing mac headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Niccolo Belli reported ipsec crashes in case we handle a frame without
mac header (atm in his case)

Before copying mac header, better make sure it is present.

Bugzilla reference:  https://bugzilla.kernel.org/show_bug.cgi?id=42809

Reported-by: Niccolò Belli <darkbasic@linuxsystems.it>
Tested-by: Niccolò Belli <darkbasic@linuxsystems.it>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h       | 10 ++++++++++
 net/ipv4/xfrm4_mode_beet.c   |  5 +----
 net/ipv4/xfrm4_mode_tunnel.c |  6 ++----
 net/ipv6/xfrm6_mode_beet.c   |  6 +-----
 net/ipv6/xfrm6_mode_tunnel.c |  6 ++----
 5 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 50db9b04a552..ae86adee3746 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1465,6 +1465,16 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
 }
 #endif /* NET_SKBUFF_DATA_USES_OFFSET */
 
+static inline void skb_mac_header_rebuild(struct sk_buff *skb)
+{
+	if (skb_mac_header_was_set(skb)) {
+		const unsigned char *old_mac = skb_mac_header(skb);
+
+		skb_set_mac_header(skb, -skb->mac_len);
+		memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+	}
+}
+
 static inline int skb_checksum_start_offset(const struct sk_buff *skb)
 {
 	return skb->csum_start - skb_headroom(skb);
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 63418185f524..e3db3f915114 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_push(skb, sizeof(*iph));
 	skb_reset_network_header(skb);
-
-	memmove(skb->data - skb->mac_len, skb_mac_header(skb),
-		skb->mac_len);
-	skb_set_mac_header(skb, -skb->mac_len);
+	skb_mac_header_rebuild(skb);
 
 	xfrm4_beet_make_header(skb);
 
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 534972e114ac..ed4bf11ef9f4 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	const unsigned char *old_mac;
 	int err = -EINVAL;
 
 	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
@@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!(x->props.flags & XFRM_STATE_NOECN))
 		ipip_ecn_decapsulate(skb);
 
-	old_mac = skb_mac_header(skb);
-	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
 	err = 0;
 
 out:
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index a81ce9450750..9949a356d62c 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ipv6hdr *ip6h;
-	const unsigned char *old_mac;
 	int size = sizeof(struct ipv6hdr);
 	int err;
 
@@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	__skb_push(skb, size);
 	skb_reset_network_header(skb);
-
-	old_mac = skb_mac_header(skb);
-	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+	skb_mac_header_rebuild(skb);
 
 	xfrm6_beet_make_header(skb);
 
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 261e6e6f487e..9f2095b19ad0 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = -EINVAL;
-	const unsigned char *old_mac;
 
 	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
 		goto out;
@@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!(x->props.flags & XFRM_STATE_NOECN))
 		ipip6_ecn_decapsulate(skb);
 
-	old_mac = skb_mac_header(skb);
-	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
 	err = 0;
 
 out:
-- 
cgit v1.2.3


From d80e731ecab420ddcb79ee9d0ac427acbc187b4b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 24 Feb 2012 20:07:11 +0100
Subject: epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree()

This patch is intentionally incomplete to simplify the review.
It ignores ep_unregister_pollwait() which plays with the same wqh.
See the next change.

epoll assumes that the EPOLL_CTL_ADD'ed file controls everything
f_op->poll() needs. In particular it assumes that the wait queue
can't go away until eventpoll_release(). This is not true in case
of signalfd, the task which does EPOLL_CTL_ADD uses its ->sighand
which is not connected to the file.

This patch adds the special event, POLLFREE, currently only for
epoll. It expects that init_poll_funcptr()'ed hook should do the
necessary cleanup. Perhaps it should be defined as EPOLLFREE in
eventpoll.

__cleanup_sighand() is changed to do wake_up_poll(POLLFREE) if
->signalfd_wqh is not empty, we add the new signalfd_cleanup()
helper.

ep_poll_callback(POLLFREE) simply does list_del_init(task_list).
This make this poll entry inconsistent, but we don't care. If you
share epoll fd which contains our sigfd with another process you
should blame yourself. signalfd is "really special". I simply do
not know how we can define the "right" semantics if it used with
epoll.

The main problem is, epoll calls signalfd_poll() once to establish
the connection with the wait queue, after that signalfd_poll(NULL)
returns the different/inconsistent results depending on who does
EPOLL_CTL_MOD/signalfd_read/etc. IOW: apart from sigmask, signalfd
has nothing to do with the file, it works with the current thread.

In short: this patch is the hack which tries to fix the symptoms.
It also assumes that nobody can take tasklist_lock under epoll
locks, this seems to be true.

Note:

	- we do not have wake_up_all_poll() but wake_up_poll()
	  is fine, poll/epoll doesn't use WQ_FLAG_EXCLUSIVE.

	- signalfd_cleanup() uses POLLHUP along with POLLFREE,
	  we need a couple of simple changes in eventpoll.c to
	  make sure it can't be "lost".

Reported-by: Maxime Bizon <mbizon@freebox.fr>
Cc: <stable@kernel.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c             |  4 ++++
 fs/signalfd.c              | 11 +++++++++++
 include/asm-generic/poll.h |  2 ++
 include/linux/signalfd.h   |  5 ++++-
 kernel/fork.c              |  5 ++++-
 5 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aabdfc38cf24..34bbfc6dd8dc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -842,6 +842,10 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 	struct epitem *epi = ep_item_from_wait(wait);
 	struct eventpoll *ep = epi->ep;
 
+	/* the caller holds eppoll_entry->whead->lock */
+	if ((unsigned long)key & POLLFREE)
+		list_del_init(&wait->task_list);
+
 	spin_lock_irqsave(&ep->lock, flags);
 
 	/*
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b451dd..79c1eea98a3a 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,17 @@
 #include <linux/signalfd.h>
 #include <linux/syscalls.h>
 
+void signalfd_cleanup(struct sighand_struct *sighand)
+{
+	wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+
+	if (likely(!waitqueue_active(wqh)))
+		return;
+
+	/* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+	wake_up_poll(wqh, POLLHUP | POLLFREE);
+}
+
 struct signalfd_ctx {
 	sigset_t sigmask;
 };
diff --git a/include/asm-generic/poll.h b/include/asm-generic/poll.h
index 44bce836d350..9ce7f44aebd2 100644
--- a/include/asm-generic/poll.h
+++ b/include/asm-generic/poll.h
@@ -28,6 +28,8 @@
 #define POLLRDHUP       0x2000
 #endif
 
+#define POLLFREE	0x4000	/* currently only for epoll */
+
 struct pollfd {
 	int fd;
 	short events;
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index 3ff4961da9b5..247399b2979a 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -61,13 +61,16 @@ static inline void signalfd_notify(struct task_struct *tsk, int sig)
 		wake_up(&tsk->sighand->signalfd_wqh);
 }
 
+extern void signalfd_cleanup(struct sighand_struct *sighand);
+
 #else /* CONFIG_SIGNALFD */
 
 static inline void signalfd_notify(struct task_struct *tsk, int sig) { }
 
+static inline void signalfd_cleanup(struct sighand_struct *sighand) { }
+
 #endif /* CONFIG_SIGNALFD */
 
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SIGNALFD_H */
-
diff --git a/kernel/fork.c b/kernel/fork.c
index b77fd559c78e..e2cd3e2a5ae8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/oom.h>
 #include <linux/khugepaged.h>
+#include <linux/signalfd.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -935,8 +936,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_sighand(struct sighand_struct *sighand)
 {
-	if (atomic_dec_and_test(&sighand->count))
+	if (atomic_dec_and_test(&sighand->count)) {
+		signalfd_cleanup(sighand);
 		kmem_cache_free(sighand_cachep, sighand);
+	}
 }
 
 
-- 
cgit v1.2.3


From 3c761ea05a8900a907f32b628611873f6bef24b2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 26 Feb 2012 09:44:55 -0800
Subject: Fix autofs compile without CONFIG_COMPAT

The autofs compat handling fix caused a compile failure when
CONFIG_COMPAT isn't defined.

Instead of adding random #ifdef'fery in autofs, let's just make the
compat helpers earlier to use: without CONFIG_COMPAT, is_compat_task()
just hardcodes to zero.

We could probably do something similar for a number of other cases where
we have #ifdef's in code, but this is the low-hanging fruit.

Reported-and-tested-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 41c9f6515f46..7e05fcee75a1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -561,5 +561,9 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 		unsigned long liovcnt, const struct compat_iovec __user *rvec,
 		unsigned long riovcnt, unsigned long flags);
 
+#else
+
+#define is_compat_task() (0)
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
-- 
cgit v1.2.3


From c8e252586f8d5de906385d8cf6385fee289a825e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 2 Mar 2012 10:43:48 -0800
Subject: regset: Prevent null pointer reference on readonly regsets

The regset common infrastructure assumed that regsets would always
have .get and .set methods, but not necessarily .active methods.
Unfortunately people have since written regsets without .set methods.

Rather than putting in stub functions everywhere, handle regsets with
null .get or .set methods explicitly.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@hack.frob.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c        | 2 +-
 include/linux/regset.h | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bcb884e2d613..07d096c49920 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1421,7 +1421,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 	for (i = 1; i < view->n; ++i) {
 		const struct user_regset *regset = &view->regsets[i];
 		do_thread_regset_writeback(t->task, regset);
-		if (regset->core_note_type &&
+		if (regset->core_note_type && regset->get &&
 		    (!regset->active || regset->active(t->task, regset))) {
 			int ret;
 			size_t size = regset->n * regset->size;
diff --git a/include/linux/regset.h b/include/linux/regset.h
index 8abee6556223..5150fd16ef93 100644
--- a/include/linux/regset.h
+++ b/include/linux/regset.h
@@ -335,6 +335,9 @@ static inline int copy_regset_to_user(struct task_struct *target,
 {
 	const struct user_regset *regset = &view->regsets[setno];
 
+	if (!regset->get)
+		return -EOPNOTSUPP;
+
 	if (!access_ok(VERIFY_WRITE, data, size))
 		return -EIO;
 
@@ -358,6 +361,9 @@ static inline int copy_regset_from_user(struct task_struct *target,
 {
 	const struct user_regset *regset = &view->regsets[setno];
 
+	if (!regset->set)
+		return -EOPNOTSUPP;
+
 	if (!access_ok(VERIFY_READ, data, size))
 		return -EIO;
 
-- 
cgit v1.2.3


From 5189fa19a4b2b4c3bec37c3a019d446148827717 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 2 Mar 2012 10:43:49 -0800
Subject: regset: Return -EFAULT, not -EIO, on host-side memory fault

There is only one error code to return for a bad user-space buffer
pointer passed to a system call in the same address space as the
system call is executed, and that is EFAULT.  Furthermore, the
low-level access routines, which catch most of the faults, return
EFAULT already.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@hack.frob.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/regset.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regset.h b/include/linux/regset.h
index 5150fd16ef93..686f37327a49 100644
--- a/include/linux/regset.h
+++ b/include/linux/regset.h
@@ -339,7 +339,7 @@ static inline int copy_regset_to_user(struct task_struct *target,
 		return -EOPNOTSUPP;
 
 	if (!access_ok(VERIFY_WRITE, data, size))
-		return -EIO;
+		return -EFAULT;
 
 	return regset->get(target, regset, offset, size, NULL, data);
 }
@@ -365,7 +365,7 @@ static inline int copy_regset_from_user(struct task_struct *target,
 		return -EOPNOTSUPP;
 
 	if (!access_ok(VERIFY_READ, data, size))
-		return -EIO;
+		return -EFAULT;
 
 	return regset->set(target, regset, offset, size, NULL, data);
 }
-- 
cgit v1.2.3


From 8966be90304b394fd6a2c5af7b6b3abe2df3889c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Mar 2012 14:23:30 -0800
Subject: vfs: trivial __d_lookup_rcu() cleanups

These don't change any semantics, but they clean up the code a bit and
mark some arguments appropriately 'const'.

They came up as I was doing the word-at-a-time dcache name accessor
code, and cleaning this up now allows me to send out a smaller relevant
interesting patch for the experimental stuff.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c            | 13 ++++++++-----
 include/linux/dcache.h |  3 ++-
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index fe19ac13f75f..138be96e25b6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -104,7 +104,7 @@ static unsigned int d_hash_shift __read_mostly;
 
 static struct hlist_bl_head *dentry_hashtable __read_mostly;
 
-static inline struct hlist_bl_head *d_hash(struct dentry *parent,
+static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 					unsigned long hash)
 {
 	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
@@ -1717,8 +1717,9 @@ EXPORT_SYMBOL(d_add_ci);
  * child is looked up. Thus, an interlocking stepping of sequence lock checks
  * is formed, giving integrity down the path walk.
  */
-struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
-				unsigned *seq, struct inode **inode)
+struct dentry *__d_lookup_rcu(const struct dentry *parent,
+				const struct qstr *name,
+				unsigned *seqp, struct inode **inode)
 {
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
@@ -1748,6 +1749,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 	 * See Documentation/filesystems/path-lookup.txt for more details.
 	 */
 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
+		unsigned seq;
 		struct inode *i;
 		const char *tname;
 		int tlen;
@@ -1756,7 +1758,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 			continue;
 
 seqretry:
-		*seq = read_seqcount_begin(&dentry->d_seq);
+		seq = read_seqcount_begin(&dentry->d_seq);
 		if (dentry->d_parent != parent)
 			continue;
 		if (d_unhashed(dentry))
@@ -1771,7 +1773,7 @@ seqretry:
 		 * edge of memory when walking. If we could load this
 		 * atomically some other way, we could drop this check.
 		 */
-		if (read_seqcount_retry(&dentry->d_seq, *seq))
+		if (read_seqcount_retry(&dentry->d_seq, seq))
 			goto seqretry;
 		if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
 			if (parent->d_op->d_compare(parent, *inode,
@@ -1788,6 +1790,7 @@ seqretry:
 		 * order to do anything useful with the returned dentry
 		 * anyway.
 		 */
+		*seqp = seq;
 		*inode = i;
 		return dentry;
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d64a55b23afd..61b24261e07a 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -309,7 +309,8 @@ extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
 extern struct dentry *d_lookup(struct dentry *, struct qstr *);
 extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
 extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
-extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
+extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
+				const struct qstr *name,
 				unsigned *seq, struct inode **inode);
 
 /**
-- 
cgit v1.2.3


From 0145acc202ca613b23b5383e55df3c32a92ad1bf Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Mar 2012 14:32:59 -0800
Subject: vfs: uninline full_name_hash()

.. and also use it in lookup_one_len() rather than open-coding it.

There aren't any performance-critical users, so inlining it is silly.
But it wouldn't matter if it wasn't for the fact that the word-at-a-time
dentry name patches want to conditionally replace the function, and
uninlining it sets the stage for that.

So again, this is a preparatory patch that doesn't change any semantics,
and only prepares for a much cleaner and testable word-at-a-time dentry
name accessor patch.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c             | 13 +++++++++----
 include/linux/dcache.h |  9 +--------
 2 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index a780ea515c47..ec72fa1acb14 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1374,6 +1374,14 @@ static inline int can_lookup(struct inode *inode)
 	return 1;
 }
 
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+	unsigned long hash = init_name_hash();
+	while (len--)
+		hash = partial_name_hash(*name++, hash);
+	return end_name_hash(hash);
+}
+
 /*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
@@ -1775,24 +1783,21 @@ static struct dentry *lookup_hash(struct nameidata *nd)
 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 {
 	struct qstr this;
-	unsigned long hash;
 	unsigned int c;
 
 	WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
 
 	this.name = name;
 	this.len = len;
+	this.hash = full_name_hash(name, len);
 	if (!len)
 		return ERR_PTR(-EACCES);
 
-	hash = init_name_hash();
 	while (len--) {
 		c = *(const unsigned char *)name++;
 		if (c == '/' || c == '\0')
 			return ERR_PTR(-EACCES);
-		hash = partial_name_hash(c, hash);
 	}
-	this.hash = end_name_hash(hash);
 	/*
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 61b24261e07a..f1c7eb8461be 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -89,14 +89,7 @@ static inline unsigned long end_name_hash(unsigned long hash)
 }
 
 /* Compute the hash for a name string. */
-static inline unsigned int
-full_name_hash(const unsigned char *name, unsigned int len)
-{
-	unsigned long hash = init_name_hash();
-	while (len--)
-		hash = partial_name_hash(*name++, hash);
-	return end_name_hash(hash);
-}
+extern unsigned int full_name_hash(const unsigned char *, unsigned int);
 
 /*
  * Try to keep struct dentry aligned on 64 byte cachelines (this will
-- 
cgit v1.2.3


From 5707c87f20bca9e76969bb4096149de6ef74cbb9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Mar 2012 14:47:15 -0800
Subject: vfs: clarify and clean up dentry_cmp()

It did some odd things for unclear reasons.  As this is one of the
functions that gets changed when doing word-at-a-time compares, this is
yet another of the "don't change any semantics, but clean things up so
that subsequent patches don't get obscured by the cleanups".

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dcache.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f1c7eb8461be..4270bedd2308 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -54,18 +54,17 @@ extern struct dentry_stat_t dentry_stat;
 static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 				const unsigned char *ct, size_t tcount)
 {
-	int ret;
 	if (scount != tcount)
 		return 1;
+
 	do {
-		ret = (*cs != *ct);
-		if (ret)
-			break;
+		if (*cs != *ct)
+			return 1;
 		cs++;
 		ct++;
 		tcount--;
 	} while (tcount);
-	return ret;
+	return 0;
 }
 
 /* Name hashing routines. Initial hash value */
-- 
cgit v1.2.3